In [6]:
# !pip install -q -U langchain-chroma

In [10]:
# !pip install langchain-google.genai

In [16]:
from dotenv import load_dotenv
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Load environment variables
load_dotenv()


True

In [14]:
# Get API key
google_api_key = os.getenv("GOOGLE_API_KEY")


In [17]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")


In [20]:
len(embeddings.embed_query("sharyar khan"))

768

In [21]:
from langchain_core.documents import Document


In [42]:
doc1 = Document(
    page_content="LangChain provides a suite of tools for building applications powered by language models.",
    metadata={"author": "Alice", "date": "2025-03-10"}
)

doc2 = Document(
    page_content="Chroma is an open-source embedding database this is best cat i have for efficient retrieval of vector embeddings.",
    metadata={"author": "Bob", "date": "2025-03-11"}
)

doc3 = Document(
    page_content="FastAPI is a modern, fast web framework for building APIs with Python.",
    metadata={"author": "Charlie", "date": "2025-03-12"}
)
documents = [doc1, doc2, doc3]



In [43]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Directory to save data locally
)

In [44]:
# Add documents to Chroma
vector_store.add_documents(documents)

print("Documents added successfully!")

Documents added successfully!


In [45]:
query_text = "What is FastAPI?"
# query_embedding = embeddings.embed_query(query_text)

# Retrieve relevant documents
retrieved_docs = vector_store.similarity_search(query_text, k=2)

print("Retrieved Documents:")
for doc in retrieved_docs:
    print(doc.page_content)


Retrieved Documents:
FastAPI is a modern, fast web framework for building APIs with Python.
FastAPI is a modern, fast web framework for building APIs with Python.


In [46]:
import chromadb

# Connect to ChromaDB in the current directory
chroma_client = chromadb.PersistentClient(path="./chroma_langchain_db")

# List all available collections
collections = chroma_client.list_collections()
print(collections)


['example_collection']


In [65]:
from langchain_google_genai import ChatGoogleGenerativeAI


# Initialize the Gemini model correctly
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",  # Use "gemini-pro" or "gemini-pro-vision"
    google_api_key=os.getenv("GOOGLE_API_KEY"),
    temperature=0.5
)

In [62]:
# this is runable function that run and release
from langchain_core.runnables import RunnableLambda

In [None]:

retriver = RunnableLambda(vector_store.similarity_search).bind(k=2)


In [56]:
retriver.batch(["chroma"])[0][0]

Document(id='e806a2fc-b0ea-4d7f-a0db-7ff4836049a5', metadata={'author': 'Bob', 'date': '2025-03-11'}, page_content='Chroma is an open-source embedding database designed for efficient retrieval of vector embeddings.')

In [57]:
from langchain_core.prompts import  ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [58]:
message = """ 
    answere the question using the provided context

    {question}

    context:
    {context}
    """


In [59]:
prompt = ChatPromptTemplate.from_messages(["human",message])


## RAG

In [66]:
rag_chain = {"context":retriver, "question":RunnablePassthrough()} | prompt | llm

In [69]:
response = rag_chain.invoke("tell me about cat")
response.content

'Based on the provided context, the documents mention "cat" in the context of Chroma, an open-source embedding database. One document states "this is best cat i have for efficient retrieval of vector embeddings."'