# Embeddings with langchain-google-firestore

## Install libraries, import, and set up environment

In [8]:
%%writefile requirements.txt
langchain-google-firestore
google-cloud-firestore
langchain-text-splitters

Overwriting requirements.txt


In [9]:
%pip install --user -qr requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain_google_firestore import FirestoreVectorStore
from langchain_google_vertexai import VertexAIEmbeddings
from google.cloud import firestore
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [18]:
PROJECT_ID=!gcloud config get-value project
PROJECT_ID=PROJECT_ID[0]
PROJECT_ID

'erschmid-test-291318'

## Create embeddings from file

In [13]:
herodotus_histories = ""
with open("/home/jupyter/MyHerodotus/data/history.mb.txt", "r") as f:
    herodotus_histories = f.read()

In [15]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.create_documents([herodotus_histories])

In [21]:
embeddings_llm = VertexAIEmbeddings(
    project=PROJECT_ID,
    location="us-west1",
    model_name="textembedding-gecko@latest"
)

In [20]:
client  = firestore.Client(project=PROJECT_ID, database="embedding-db")

In [22]:
vector_store = FirestoreVectorStore.from_documents(
    client=client,
    collection="Histories",
    documents=texts,
    embedding=embeddings_llm)

In [28]:
!(gcloud firestore indexes composite create --project=erschmid-test-291318 \
  --database="embedding-db" \
  --collection-group=Histories \
  --query-scope=COLLECTION \
  --field-config=vector-config='{"dimension":"768","flat": "{}"}',field-path=embedding)

Create request issued
Waiting for operation [projects/erschmid-test-291318/databases/embedding-db/ope
rations/S0U0aFhqT2dBQ0lDDCoDIDVjNTllMzM0Zjc5Ny1jMTFiLTFmNDQtMGMyNy04MGY3ZGE5ZSQ
ac2VuaWxlcGlwCQpBEg] to complete...done.                                       
Created index [CICAgOjXh4EK].


## Query the vector store

In [38]:
doc = vector_store.similarity_search("Mycenae", k=4)
doc

[Document(metadata={'reference': {'path': 'Histories/KJbmEMTGDSAn3fXrSdzN', 'firestore_type': 'document_reference'}, 'embedding': {'values': [0.00026718771550804377, -0.0006115688593126833, -0.04475724324584007, 0.032995786517858505, -0.0019528967095538974, -0.010361099615693092, 0.03273545578122139, -0.002580843633040786, 0.028879767283797264, 0.03579668700695038, 0.023989783599972725, -0.024440215900540352, -0.046862050890922546, -0.028072623535990715, -0.01967662386596203, 0.06356358528137207, 0.000945216859690845, 0.025882497429847717, -0.030239678919315338, -0.0667070746421814, -0.05471836030483246, 0.0008609961369074881, 0.009911967441439629, -0.04735606163740158, -0.00908174179494381, -0.03295475244522095, -0.01040920615196228, -0.03833555430173874, -0.018144866451621056, -0.021037815138697624, -0.08148606866598129, 0.017405841499567032, -0.05816338211297989, -0.06157497689127922, -0.047898873686790466, -0.027285011485219002, 0.05081131309270859, -0.00555255776271224, 0.03552310

## Create image embeddings

In [34]:
image_embeddings_llm = VertexAIEmbeddings(
    project=PROJECT_ID,
    location="us-west1",
    model_name="multimodalembedding@001"
)

In [36]:
client  = firestore.Client(project=PROJECT_ID, database="image-embedding-db")

In [None]:
vector_store = FirestoreVectorStore.from_documents(
    client=client,
    collection="Histories",
    documents=texts,
    embedding=image_embeddings_llm)

In [None]:
vector_store.add_images([
    "gs://myherodotus/images/athens_acropolis.jpg",
    "gs://myherodotus/images/crete_knossos.jpg",
    "gs://myherodotus/images/epidauros_theatre.jpg",
    "gs://myherodotus/images/mycenae_lion_gate.jpg",
    "gs://myherodotus/images/nafplion_palamidi.jpg",
    "gs://myherodotus/images/santorini_thera.jpg",
    "gs://myherodotus/images/sparta_tomb_of_leonidas.jpg",
])

## Sources

+ [Blog post](https://atamel.dev/posts/2024/10-09_firestore_text_embedding_search/)
+ [`langchain-google-firestore-python` repo](https://github.com/googleapis/langchain-google-firestore-python)
+ [langchain-google-firestore docs](https://cloud.google.com/python/docs/reference/langchain-google-firestore/latest)
+ [`langchain_text_splitters` docs](https://api.python.langchain.com/en/latest/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html)
+ [langchain_core.vectorstores docs](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html)
