In [1]:
import os
import google.generativeai as gen_ai
import chromadb
# from chromadb.config import Settings
from langchain_google_genai import GoogleGenerativeAIEmbeddings


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="local_db")
collection_name = 'new_sentiment'
collection = chroma_client.get_or_create_collection(name=collection_name)


In [3]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Initialize GoogleGenerativeAIEmbedding
embedding_model = GoogleGenerativeAIEmbeddings(
        model='models/embedding-001',
        google_api_key="AIzaSyA_jMrk2zSlIJOUic6v-5zCscQoUUObNOM", 
    )


I0000 00:00:1722863505.887518  107114 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache


In [4]:
# Load quotes from file
def load_quotes(file_path):
    with open(file_path, 'r') as file:
        quotes = [line.strip() for line in file.readlines() if line.strip()]
    return quotes

quotes = load_quotes('quotes.txt')

## Custom Embedding

In [11]:
import google.generativeai as genai
from chromadb.api.types import Documents, EmbeddingFunction
from typing import List

class GoogleEmbedding(EmbeddingFunction):
    def __init__(self, api_key: str, model_name: str = "models/embedding-001"):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel(model_name)

    def __call__(self, texts: Documents) -> List[List[float]]:
        embeddings = []
        for text in texts:
            embedding = self.model.embed_content(
                content=text,
                task_type="retrieval_document",
            )
            embeddings.append(embedding["embedding"])
        return embeddings



In [13]:
# Usage example:
api_key = "AIzaSyA_jMrk2zSlIJOUic6v-5zCscQoUUObNOM"
custom_ef = GoogleEmbedding(api_key)

# Create a Chroma collection with the custom embedding function
import chromadb

client = chromadb.Client()
collection = client.get_or_create_collection(name="my_collection", embedding_function=custom_ef)

# Add documents to the collection
collection.add(
    documents=["This is a sample document", "Another example text"],
    ids=["doc1", "doc2"]
)

# Query the collection
results = collection.query(
    query_texts=["Sample query"],
    n_results=2
)

print(results)

AttributeError: 'GenerativeModel' object has no attribute 'embed_content'

In [7]:
def get_embeddings(sentences, embedding_model):
    embeddings = []
    for sentence in sentences:
        embedding = embedding_model.embed_documents(sentence)
        embeddings.append(embedding)
    return embeddings


In [8]:
ebedded_quotes = get_embeddings(quotes, embedding_model)

In [8]:
ebedded_quotes[0]

[[0.024471649900078773,
  -0.01305623259395361,
  -0.06021108105778694,
  -0.0020621614530682564,
  0.07738668471574783,
  0.023887744173407555,
  0.03119765594601631,
  -0.034495286643505096,
  -0.004100431688129902,
  0.03734447434544563,
  0.000586601032409817,
  0.032102592289447784,
  -0.034391868859529495,
  0.04177775979042053,
  -0.004937065299600363,
  -0.004613683093339205,
  0.02385326661169529,
  0.018540488556027412,
  0.026034804061055183,
  -0.03530442342162132,
  0.014486931264400482,
  0.019572876393795013,
  -0.00313089182600379,
  -0.018203256651759148,
  0.03286775201559067,
  -0.003953968640416861,
  -0.007449539843946695,
  -0.06073935329914093,
  -0.050464123487472534,
  0.008126725442707539,
  -0.05761221796274185,
  0.018344031646847725,
  -0.04777732118964195,
  0.015780625864863396,
  -0.005116638261824846,
  -0.03576986491680145,
  -0.0161280520260334,
  -0.0007497142069041729,
  -0.01135867927223444,
  0.03976156562566757,
  0.01767323724925518,
  -0.018950

In [None]:
# Store embeddings in ChromaDB
for i, (quote, embedding) in enumerate(zip(quotes, ebedded_quotes)):
    collection.add(
        ids=[str(i)],
        documents=[quote],
        metadatas=[{"doc_id": i}]
    )

In [9]:
def store_quotes_in_db(quotes, embedded_quotes):
    ids = []
    documents = []
    embeddings = []

    for idx, (quote, embedding) in enumerate(zip(quotes, embedded_quotes)):
        if not all(isinstance(x, (int, float)) for x in embedding):
            print(f"Embedding at index {idx} has invalid structure: {embedding}")
            raise ValueError(f"Embedding at index {idx} is not a flat list of ints/floats")
        
        documents.append({"quote": quote})
        ids.append(f"id{idx}")
        embeddings.append(embedding)

    collection.add(
        ids=ids, 
        documents=documents, 
        embeddings=embeddings
    )

    print("Data added to ChromaDB collection successfully!")


In [10]:
store_quotes_in_db(quotes, ebedded_quotes)


Embedding at index 0 has invalid structure: [[0.024471649900078773, -0.01305623259395361, -0.06021108105778694, -0.0020621614530682564, 0.07738668471574783, 0.023887744173407555, 0.03119765594601631, -0.034495286643505096, -0.004100431688129902, 0.03734447434544563, 0.000586601032409817, 0.032102592289447784, -0.034391868859529495, 0.04177775979042053, -0.004937065299600363, -0.004613683093339205, 0.02385326661169529, 0.018540488556027412, 0.026034804061055183, -0.03530442342162132, 0.014486931264400482, 0.019572876393795013, -0.00313089182600379, -0.018203256651759148, 0.03286775201559067, -0.003953968640416861, -0.007449539843946695, -0.06073935329914093, -0.050464123487472534, 0.008126725442707539, -0.05761221796274185, 0.018344031646847725, -0.04777732118964195, 0.015780625864863396, -0.005116638261824846, -0.03576986491680145, -0.0161280520260334, -0.0007497142069041729, -0.01135867927223444, 0.03976156562566757, 0.01767323724925518, -0.018950799480080605, -0.04099314659833908, -0

ValueError: Embedding at index 0 is not a flat list of ints/floats

In [2]:
import os
import time
from pydantic import BaseModel
from dotenv import load_dotenv
import google.generativeai as genai
from fastapi import FastAPI, HTTPException
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings

load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))



app = FastAPI()

# Initialize GoogleGenerativeAIEmbedding
embedding_model = GoogleGenerativeAIEmbeddings(
    model='models/embedding-001',
    # google_api_key="AIzaSyA_jMrk2zSlIJOUic6v-5zCscQoUUObNOM", 
)

In [4]:
res = embedding_model.embed_documents("hi hell")

In [5]:
res

[[0.02902643196284771,
  -0.01834726706147194,
  -0.07433483004570007,
  -0.008478998206555843,
  0.07129383832216263,
  0.01129910908639431,
  0.02824152261018753,
  -0.021319091320037842,
  0.0030015911906957626,
  0.03199739009141922,
  -0.019613204523921013,
  0.0349922701716423,
  -0.03391839936375618,
  0.03200799599289894,
  -0.003912167623639107,
  -0.004259420558810234,
  0.0047530909068882465,
  0.010103954002261162,
  0.04085555300116539,
  -0.02268749102950096,
  0.01576179265975952,
  0.0025741150602698326,
  -0.009621430188417435,
  0.0002575013495516032,
  0.031116677448153496,
  0.00282790744677186,
  -0.01665043644607067,
  -0.056548845022916794,
  -0.05027078092098236,
  -0.0011164176976308227,
  -0.07096169143915176,
  0.0013611565809696913,
  -0.06286182254552841,
  0.015642981976270676,
  -0.0044594332575798035,
  -0.031905051320791245,
  -0.015190220437943935,
  0.0029025250114500523,
  -0.004732009954750538,
  0.038073547184467316,
  0.015366901643574238,
  -0.01

In [6]:
import numpy as np

array = np.array(res)

In [8]:
array.shape

(7, 768)