This file contains the implementation of the RAG system using Langchain

In [None]:
pip install langchain langchain-community langchain-google-genai langchain-chroma python-dotenv pypdf

In [None]:
GOOGLE_API_KEY="your-api-key"  # Set your Google API key in .env file

In [None]:
from dotenv import load_dotenv
from langchain_community.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os

# Load environment variables
load_dotenv()
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")  # safer than direct assignment

# Path to your CSV file
csv_dir = "/Users/subashkannan/Desktop/agentic-learning-main/day_2/task_1/data"

# Load CSV documents
documents = []
for filename in os.listdir(csv_dir):
    if filename.lower().endswith(".csv"):
        file_path = os.path.join(csv_dir, filename)
        loader = CSVLoader(file_path=file_path)
        documents.extend(loader.load())

# Split the documents into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(documents)

# Initialize the embedding model
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Check if chunks exist
if not chunks:
	print("⚠️ No document chunks found. Please check your CSV file.")
else:
	vectordb = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory="chroma_db")
	vectordb.persist()
	print("✅ CSV Embeddings created and stored.")


In [None]:
import os
from dotenv import load_dotenv
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

load_dotenv()

# Load vector DB
vectordb = Chroma(
    persist_directory="chroma_db",
    embedding_function=GoogleGenerativeAIEmbeddings(model="models/embedding-001")
)

# Set up retriever
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

# Load Gemini LLM
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)

# RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

# Ask a question
query = "What are the common issues reported in iPhone reviews?"
result = qa_chain.invoke({"query": query})

print("\n🧠 Answer:\n", result["result"])
print("\n📚 Sources:\n", result["source_documents"])
