In [None]:
###Building Embeddings with Gemini ######

In [1]:
%pip install google-generativeai

Note: you may need to restart the kernel to use updated packages.


In [8]:
%pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [12]:
###configure gemini api key ####
import google.generativeai as genai
from dotenv import load_dotenv
import os

load_dotenv() 

genai.configure(
    api_key=os.environ.get("gemini_key")
)

In [11]:
def get_gemini_embedding(text):
    response = genai.embed_content(
        model="models/gemini-embedding-exp-03-07", # Choose your desired Gemini embedding model
        content=text
    )
    return response["embedding"]

documents = [
    "The Eiffel Tower is located in Paris.",
    "The Colosseum is in Rome, Italy.",
    "The Taj Mahal is a famous monument in India.",
    "Mount Everest is the highest mountain in the world.",
    "Python is a popular programming language."
]

embeddings = [get_gemini_embedding(doc) for doc in documents]
print("embeddings:",embeddings)

embeddings: [[-0.014374869, 0.023314737, 0.02095817, -0.05281218, -0.040019937, -0.027528822, -0.01905583, 0.02301112, 0.01459665, -0.03704371, 0.0050854487, 0.00090701535, 0.0027497867, 5.8592686e-05, 0.13110267, -0.00046146743, 0.009326338, 0.0038111731, 0.0056180367, -0.024173645, -0.008126271, 0.0020824946, 0.029302387, -0.011774, -0.021935781, 0.018495733, 0.0011998338, 0.0031212159, 0.032464176, 0.0388313, 0.0030836998, -0.011484932, -0.011864708, 0.016357575, -0.0010674719, 0.006416102, 0.019915415, -0.0073276297, -0.004182217, -0.00092841755, -0.014717298, -0.024058407, 0.0022807773, -0.0064465012, -0.026009332, -0.019843165, 0.0018400602, -0.028546335, 0.017378114, -0.001595628, -0.0010924004, -0.005215876, 0.005695372, -0.16460715, 0.010148166, 0.0038794293, -0.0009364415, 0.025844315, -0.013989137, -0.026747216, -0.012599205, 0.024463631, 0.017147409, 0.00361584, -0.0008483797, -0.00075625366, 0.008153406, 0.007565779, -0.003236341, 0.023695407, 0.02242463, 0.023699269, -0.0

In [None]:
###insert these embeddings in chromadb#######

In [13]:
%pip install chromadb

Note: you may need to restart the kernel to use updated packages.


In [16]:
import chromadb
client = chromadb.PersistentClient(path="./chroma_db")


In [None]:
####Understanding how collections work in Chroma DB#####

In [19]:
#### Example of collections ###
collection = client.create_collection(name="collection3")

# Insert sample data (ID, embeddings, metadata)
collection.add(
    ids=["1", "2", "3"],
    embeddings=[[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5]],
    metadatas=[{"name": "Laxmi"}, {"name": "Rathaur"}, {"name":  "Test_embedding"}]
)



In [20]:
print("Available Collections:", client.list_collections())

Available Collections: [Collection(name=collection1), Collection(name=collection2), Collection(name=collection3)]


In [22]:
print("Fetching data with ID 2:", collection.get(ids=["2"]))

Fetching data with ID 2: {'ids': ['2'], 'embeddings': None, 'documents': [None], 'uris': None, 'included': ['metadatas', 'documents'], 'data': None, 'metadatas': [{'name': 'Rathaur'}]}


In [23]:
print("Fetching data with ID 1:", collection.get(ids=["1"], include=["embeddings", "metadatas"]))

Fetching data with ID 1: {'ids': ['1'], 'embeddings': array([[0.1       , 0.2       , 0.30000001]]), 'documents': None, 'uris': None, 'included': ['embeddings', 'metadatas'], 'data': None, 'metadatas': [{'name': 'Laxmi'}]}


In [24]:
#### updating the embedding###
collection.update(
        ids=[ "3"],
         embeddings=[ [0.3, 0.4, 0.5]],
           metadatas=[ {"name":  "RAG"}]
     )

In [25]:
print("Fetching data with ID 3:", collection.get(ids=["3"]))

Fetching data with ID 3: {'ids': ['3'], 'embeddings': None, 'documents': [None], 'uris': None, 'included': ['metadatas', 'documents'], 'data': None, 'metadatas': [{'name': 'RAG'}]}


In [26]:
###deleting the embedding####
collection.delete(ids=["3"])
print("Fetching data with ID 3:", collection.get(ids=["3"]))



Fetching data with ID 3: {'ids': [], 'embeddings': None, 'documents': [], 'uris': None, 'included': ['metadatas', 'documents'], 'data': None, 'metadatas': []}


In [None]:
########End of Collections Overview###############

In [None]:
#####Adding the embedding to Collections for quick retrival ##############

In [27]:

### Add embedding to the collections ###

collection2 = client.create_collection(name="collection5")

collection2.add(
    ids=[str(i) for i in range(len(documents))],  # Unique IDs
    documents=documents,
    embeddings=embeddings
)

print("Data added successfully!")


Data added successfully!


In [28]:
#####search in the embeddings , closest result ####
query_text = "Where is the Eiffel Tower?"
query_embedding = get_gemini_embedding(query_text)
#print(query_embedding)

results = collection2.query(
    query_embeddings=[query_embedding],
    n_results=2,  # Get top 2 matches
    include=["documents", "distances"]
)

print("Query:", query_text)
print("Most Similar Result:", results["documents"][0])
print("Distance:", results["distances"][0])

Query: Where is the Eiffel Tower?
Most Similar Result: ['The Eiffel Tower is located in Paris.', 'The Colosseum is in Rome, Italy.']
Distance: [0.32698601484298706, 0.8199817538261414]


In [None]:
####Search done on embeddings ###

In [None]:
###add new embeddings to collection###

In [29]:
updated_text = "The Eiffel Tower is one of the most visited landmarks in the world."
updated_embedding = get_gemini_embedding(updated_text)

collection2.update(
    ids=["0"],  # ID of the document to update
    documents=[updated_text],
    embeddings=[updated_embedding]
)

print("Data updated successfully!")

Data updated successfully!


In [30]:
tower_ht_text = "Eiffel Tower is 330 tall."

collection2.add(
    ids=["6"],
    embeddings=get_gemini_embedding(tower_ht_text),
    documents=tower_ht_text
)

In [31]:
####search again ####

query_text = "Where is the Eiffel Tower?"
query_embedding = get_gemini_embedding(query_text)

results = collection2.query(
    query_embeddings=[query_embedding],
    n_results=2,  # Get top 2 matches
    include=["documents", "distances"]
)

print("Query:", query_text)
print("Most Similar Result:", results["documents"][0])
print("Distance:", results["distances"][0])

Query: Where is the Eiffel Tower?
Most Similar Result: ['Eiffel Tower is 330 tall.', 'The Eiffel Tower is one of the most visited landmarks in the world.']
Distance: [0.5473446846008301, 0.5790139436721802]


In [None]:
####End of search on embeddings ##