In [6]:
# ============================================================================
# LangChain RAG Tool Setup - Cell 1: Imports and LLM Configuration
# ============================================================================

# ============================================================================
# Core Imports
# ============================================================================
import os
from dotenv import load_dotenv

# ============================================================================
# Text Processing & Embeddings
# ============================================================================
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings

# ============================================================================
# Vector Stores & Retrieval
# ============================================================================
from langchain_community.vectorstores import FAISS
from langchain_core.tools import create_retriever_tool

# ============================================================================
# Language Models (LLMs)
# ============================================================================
from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI

# ============================================================================
# Prompts & Agents
# ============================================================================
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain.agents import create_agent

# ============================================================================
# Memory & Conversation (for RAG applications)
# ============================================================================
from langgraph.checkpoint.memory import InMemorySaver

# Load environment variables from .env file
load_dotenv()

# ============================================================================
# API Key Configuration
# ============================================================================
# Retrieve API keys for Gemini and OpenAI from environment variables
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Validate that both API keys are present
assert GEMINI_API_KEY, "GEMINI_API_KEY is missing. Check your .env file."
assert OPENAI_API_KEY, "OPENAI_API_KEY is missing. Check your .env file."

# Display partial keys to confirm successful loading (for security, only show first 6 chars)
print("Key loaded:", GEMINI_API_KEY[:6] + "..." )
print("OpenAI Key loaded:", OPENAI_API_KEY[:6] + "...")

# ============================================================================
# Initialize Language Models (LLMs)
# ============================================================================
# Initialize Google's Gemini LLM with temperature=0 for deterministic responses
gemini_llm = ChatGoogleGenerativeAI(
    temperature=0,  # Deterministic output (no randomness)
    model="gemini-2.5-flash",  # Using the faster Gemini model
    google_api_key=GEMINI_API_KEY
)

# Initialize OpenAI's GPT-3.5-turbo LLM
openai_llm = ChatOpenAI(
    temperature=0,  # Deterministic output
    model="gpt-3.5-turbo",
    openai_api_key=OPENAI_API_KEY
)

# ============================================================================
# Create a Simple Prompt Template
# ============================================================================
# Define a reusable prompt template for company name generation
prompt = PromptTemplate(
    input_variables=["product"],  # Variable to be filled dynamically
    template="Give me a creative name for a company that makes {product}?",
)

# ============================================================================
# Example 1: LLM Chain (Prompt → LLM)
# ============================================================================
# Create a chain: prompt template piped to OpenAI LLM
chain = prompt | openai_llm

# Execute the chain with a specific product type
response = chain.invoke({"product": "smart home devices"})

# Display the generated company name
print("OpenAI response:", response)

Key loaded: AIzaSy...
OpenAI Key loaded: sk-pro...
OpenAI response: content='"IntelliHome Innovations"' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 21, 'total_tokens': 28, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-CrZD9o5uLDwCLQMKBoCKYrVxewGC9', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--019b626b-811f-7fe1-b6eb-b29f74da2d1c-0' usage_metadata={'input_tokens': 21, 'output_tokens': 7, 'total_tokens': 28, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [7]:
# ============================================================================
# LangChain RAG Tool Setup - Cell 2: FAISS Vector Database & Retriever
# ============================================================================

# ============================================================================
# Step 1: Load Document
# ============================================================================
# Read the sample.txt file containing text data to be indexed
print("Loading document from sample.txt...")
with open("sample.txt", "r", encoding="utf-8") as f:
    text_data = f.read()
print("✓ Document loaded successfully\n")

# ============================================================================
# Step 2: Split Text into Chunks
# ============================================================================
# Initialize text splitter to break documents into manageable chunks
# - separator="\n": Split on newline characters
# - chunk_size=300: Each chunk contains ~300 characters
# - chunk_overlap=50: Overlap for context preservation
print("Splitting text into chunks...")
splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=300,
    chunk_overlap=50
)
# Create document objects from text
docs = splitter.create_documents([text_data])
print(f"✓ Created {len(docs)} document chunks\n")

# ============================================================================
# Step 3: Create Embeddings and FAISS Vector Store
# ============================================================================
# Initialize OpenAI embeddings (converts text to vectors)
print("Creating embeddings and FAISS vector store...")
embeddings = OpenAIEmbeddings()

# Create FAISS vector database from documents
vectorstore = FAISS.from_documents(docs, embeddings)
print("✓ FAISS vector store created successfully\n")

# ============================================================================
# Step 4: Create Retriever
# ============================================================================
# Create a retriever that fetches relevant documents
# search_kwargs={"k": 4} means return top 4 most relevant documents
print("Creating retriever...")
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
print("✓ Retriever configured (returns top 4 documents)\n")

# ============================================================================
# Step 5: Convert Retriever to RAG Tool
# ============================================================================
# Wrap the retriever as a tool for the agent to use
print("Creating RAG tool...")
rag_tool = create_retriever_tool(
    retriever,
    name="rag_qa_tool",
    description="Use this tool to answer questions about LangChain using the sample.txt document"
)
print("✓ RAG tool created successfully\n")

# ============================================================================
# Step 6: Create Agent with RAG Tool
# ============================================================================
# Create modern agent that can use the RAG tool
print("Initializing agent with RAG tool...")
agent = create_agent(
    openai_llm,  # Use OpenAI LLM
    tools=[rag_tool],  # Provide RAG tool
    system_prompt="You are a helpful assistant. Use the RAG tool when needed to answer questions accurately based on the provided documents."
)
print("✓ Agent created successfully\n")

# ============================================================================
# Step 7: Test the RAG Agent
# ============================================================================
print("="*80)
print("TESTING RAG AGENT WITH FAISS VECTOR STORE")
print("="*80 + "\n")

# Test Query 1
query1 = "What is LangChain?"
print(f"Query: {query1}")
result1 = agent.invoke({
    "messages": [{"role": "user", "content": query1}]
})
print(f"Answer: {result1['messages'][-1].content}\n")

# Test Query 2
query2 = "Why do we use LangChain?"
print(f"Query: {query2}")
result2 = agent.invoke({
    "messages": [{"role": "user", "content": query2}]
})
print(f"Answer: {result2['messages'][-1].content}\n")

print("="*80)

Loading document from sample.txt...
✓ Document loaded successfully

Splitting text into chunks...
✓ Created 1 document chunks

Creating embeddings and FAISS vector store...
✓ FAISS vector store created successfully

Creating retriever...
✓ Retriever configured (returns top 4 documents)

Creating RAG tool...
✓ RAG tool created successfully

Initializing agent with RAG tool...
✓ Agent created successfully

TESTING RAG AGENT WITH FAISS VECTOR STORE

Query: What is LangChain?
Answer: LangChain is a framework for building applications with Large Language Models (LLMs). It was created by Harrison Chase and supports RAG, agents, memory, tools, and more. LangChain is commonly used in chatbots, document Q&A, and AI workflow.

Query: Why do we use LangChain?
Answer: LangChain is used as a framework for building applications with Large Language Models (LLMs). It was created by Harrison Chase and supports various features such as RAG, agents, memory, tools, and more. LangChain is commonly used in 