In [43]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings 
from langchain_core.documents import Document
import json
import os
from dotenv import load_dotenv

load_dotenv()

# load documents.json
documents_json = json.load(open("data/docs/documents.json"))

embeddings_model_name = "models/embedding-001"
embeddings_model = GoogleGenerativeAIEmbeddings(
    model=embeddings_model_name,
    api_key=os.environ["GOOGLE_API_KEY"]
)

In [44]:
docs = []
for doc_json in documents_json:
	docs.append(Document(page_content=doc_json["page_content"], metadata=doc_json["metadata"]))

In [None]:
texts = [doc["page_content"] for doc in documents_json]
embeddings = embeddings_model.embed_documents(texts)

[[-0.002561260014772415, 0.004651861265301704, -0.006319968495517969, 0.041564472019672394, 0.05573377013206482, 0.03451664000749588, 0.04410780593752861, 0.0007556002819910645, 0.015481526032090187, 0.03723793476819992, -0.02207607962191105, -0.01647445373237133, -0.022981248795986176, 0.00831670593470335, 0.015804056078195572, -0.07811958342790604, -0.003934910986572504, 0.04765526205301285, -0.0008038444211706519, 0.006577260792255402, 0.022651640698313713, -0.007846054621040821, -0.008183703757822514, -0.04779662564396858, -0.03798665106296539, -0.02656390890479088, -0.012496856972575188, -0.08151742070913315, -0.02896464802324772, 0.006231401115655899, -0.056888118386268616, 0.028096789494156837, -0.03993210569024086, 0.028676258400082588, 0.029394211247563362, -0.029117049649357796, -0.005781660322099924, -0.018695378676056862, 0.01596338115632534, 0.020902886986732483, -0.025986840948462486, -0.05633864179253578, -0.05047985911369324, -0.00041341205360367894, -0.0127119775861501

In [None]:
embedded_docs = []
for embedding, doc_json in zip(embeddings, documents_json):
	embedding_metadata = {
		"embedding": embedding,
		"embedding_model": embeddings_model_name,
	}
	doc_json["embedding_metadata"] = embedding_metadata
	embedded_docs.append(doc_json)

with open("data/docs/gemini_embedded_docs.json", "w") as f:
	json.dump(embedded_docs, f)