In [1]:
# @title Region { display-mode: "form" }
REGION = "US"  # @param {type: "string"}

In [2]:
# @title Dataset and Table { display-mode: "form" }
DATASET = "my_langchain_dataset"  # @param {type: "string"}
TABLE = "doc_and_vectors"  # @param {type: "string"}

In [4]:
# @title Project { display-mode: "form" }
PROJECT_ID = "image-genai"  # @param {type:"string"}

In [5]:
from langchain_google_vertexai import VertexAIEmbeddings

embedding = VertexAIEmbeddings(
    model_name="textembedding-gecko@latest", project=PROJECT_ID
)

In [6]:
from google.cloud import bigquery

client = bigquery.Client(project=PROJECT_ID, location=REGION)
client.create_dataset(dataset=DATASET, exists_ok=True)

Dataset(DatasetReference('image-genai', 'my_langchain_dataset'))

In [16]:
from langchain.vectorstores.utils import DistanceStrategy
from langchain_community.vectorstores import BigQueryVectorSearch

store = BigQueryVectorSearch(
    project_id=PROJECT_ID,
    dataset_name=DATASET,
    table_name=TABLE,
    location=REGION,
    embedding=embedding,
    distance_strategy=DistanceStrategy.COSINE,
)

In [17]:
all_texts = ["Apples", "oranges", "Cars and airplanes", "Pineapple", "Train", "Banana"]
metadatas = [{"len": len(t)} for t in all_texts]

store.add_texts(all_texts, metadatas=metadatas)

['59c5cc3ab6564f9d842dd03cbd8a36dd',
 '0db1af769164464db52737b32776e379',
 '320472815b094fabb5bc0c7cc3e0009b',
 '251d24d5e0de4c40b54a8b730187ea52',
 'e5a4f9d5d890459687be28d8f57a3d50',
 'cfd94490528f42d488631b571fb76c68']

In [19]:
query = "I'd like a fruit."
query_vector = embedding.embed_query(query)
docs = store.similarity_search_by_vector(query_vector, k=4)
print(docs)

[Document(page_content='Pineapple', metadata={'len': 9, '__id': '251d24d5e0de4c40b54a8b730187ea52'}), Document(page_content='Apples', metadata={'len': 6, '__id': '59c5cc3ab6564f9d842dd03cbd8a36dd'}), Document(page_content='Banana', metadata={'len': 6, '__id': 'cfd94490528f42d488631b571fb76c68'}), Document(page_content='oranges', metadata={'len': 7, '__id': '0db1af769164464db52737b32776e379'})]
