In [1]:
# Install the RAG toolkit
!pip install -qU langchain langchain-core langchain-groq langchain-huggingface langchain-community faiss-cpu sentence-transformers python-dotenv

# Verify installation
import langchain
print(f"✅ LangChain version: {langchain.__version__}")
print("✅ All RAG packages installed!")
print()
print("Your toolkit:")
print("   - LangChain + Groq     --> LLM")
print("   - HuggingFace          --> Embeddings (local)")
print("   - FAISS                --> Vector store (local)")

✅ LangChain version: 1.2.10
✅ All RAG packages installed!

Your toolkit:
   - LangChain + Groq     --> LLM
   - HuggingFace          --> Embeddings (local)
   - FAISS                --> Vector store (local)



[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq

load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")

if not groq_api_key:
    print("❌ API key not found! Create .env with: GROQ_API_KEY=your_key")
else:
    print(f"✅ Groq API key loaded: {groq_api_key[:12]}...")

# Initialize LLM — temperature 0 for factual RAG answers
llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0,
    groq_api_key=groq_api_key
)

# Quick test
response = llm.invoke("Say 'RAG pipeline ready!' in exactly 3 words.")
print(f"AI: {response.content}")
print()
print("LLM ready. Let's build RAG.")

✅ Groq API key loaded: gsk_Q08MI36d...
AI: RAG pipeline ready

LLM ready. Let's build RAG.


In [3]:
from langchain_core.documents import Document

# Simulate a company knowledge base
documents = [
    # HR Policy
    Document(
        page_content="Annual leave policy: All full-time employees receive 24 days of paid annual leave per year. Leave accrues at 2 days per month. Unused leave up to 10 days can be carried forward to the next year. Leave beyond 10 days expires on March 31st. Employees must submit leave requests at least 3 business days in advance for planned leave. Emergency leave can be applied retroactively within 2 business days. Work from home policy: Employees can work from home up to 3 days per week. Monday and Thursday are mandatory in-office days for team collaboration. New employees in their first 90 days must work from office full-time. Reimbursement policy: Business travel expenses are reimbursed within 15 business days. Hotels are capped at Rs 5000 per night for metro cities. Meal allowance is Rs 1500 per day during travel.",
        metadata={"source": "hr_policies.pdf", "category": "HR"}
    ),
    # Tech Documentation
    Document(
        page_content="Tech stack overview: Our backend is built with Python 3.11 and FastAPI. We use PostgreSQL 15 as our primary database with Redis for caching. The frontend is React 18 with TypeScript. Deployment is through Docker containers on AWS ECS with auto-scaling. CI/CD pipelines run on GitHub Actions. We follow a trunk-based development model with feature flags for gradual rollouts. Code review guidelines: All pull requests require at least 2 approvals before merging. Reviews should be completed within 24 hours. No PR should exceed 400 lines of changed code. All tests must pass in CI before merging.",
        metadata={"source": "tech_docs.md", "category": "Engineering"}
    ),
    # Security
    Document(
        page_content="Security protocols: All API keys and secrets must be stored in AWS Secrets Manager — never in code or environment files in production. Two-factor authentication is mandatory for all company accounts. Passwords must be at least 16 characters. We conduct quarterly security audits and annual penetration testing. Any suspected security breach must be reported to security@company.com within 1 hour.",
        metadata={"source": "security_handbook.pdf", "category": "Engineering"}
    ),
    # Product Information
    Document(
        page_content="Product pricing tiers: The Starter plan costs Rs 999 per month and includes 5 users, 10GB storage, and email support. The Professional plan costs Rs 2999 per month with 25 users, 100GB storage, priority support, and API access. The Enterprise plan is custom-priced and includes unlimited users, unlimited storage, dedicated account manager, SSO integration, and 99.9% SLA guarantee. Refund policy: Customers can request a full refund within 14 days of purchase. After 14 days, a prorated refund is available for annual subscriptions only.",
        metadata={"source": "product_info.pdf", "category": "Product"}
    ),
    # Company Culture
    Document(
        page_content="Company values: We operate on five core values. First, Customer Obsession — every decision starts with the customer. Second, Bias for Action — we prefer moving fast and iterating. Third, Radical Transparency — we share company financials and challenges with all employees quarterly. Fourth, Ownership Mentality — everyone acts like a founder. Fifth, Continuous Learning — every employee gets Rs 50000 per year for courses, books, and conferences. Team rituals: Monday kickoff at 10 AM, Wednesday lunch-and-learn, Friday demos at 4 PM. Monthly town halls with the CEO. Annual hackathon in December.",
        metadata={"source": "culture_handbook.pdf", "category": "Culture"}
    ),
]

print(f"Loaded {len(documents)} documents")
print(f"Categories: {set(d.metadata['category'] for d in documents)}")
print(f"Sources: {set(d.metadata['source'] for d in documents)}")
print()
print("Sample document:")
print(f"   Content: {documents[0].page_content[:80]}...")
print(f"   Metadata: {documents[0].metadata}")

Loaded 5 documents
Categories: {'Product', 'Engineering', 'Culture', 'HR'}
Sources: {'product_info.pdf', 'culture_handbook.pdf', 'security_handbook.pdf', 'hr_policies.pdf', 'tech_docs.md'}

Sample document:
   Content: Annual leave policy: All full-time employees receive 24 days of paid annual leav...
   Metadata: {'source': 'hr_policies.pdf', 'category': 'HR'}


In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    length_function=len,
    separators=["\n\n", "\n", ". ", " ", ""]
)

chunks = text_splitter.split_documents(documents)

print(f"Original documents: {len(documents)}")
print(f"After splitting:    {len(chunks)} chunks")
print(f"Avg chunk size:     {sum(len(c.page_content) for c in chunks) // len(chunks)} characters")
print()
print("=" * 60)
print()
print("Sample chunks (first 3):")
for i, chunk in enumerate(chunks[:3]):
    print(f"\n--- Chunk {i+1} [{chunk.metadata['source']}] ---")
    print(f"{chunk.page_content[:150]}...")
    print(f"Length: {len(chunk.page_content)} chars")

Original documents: 5
After splitting:    9 chunks
Avg chunk size:     326 characters


Sample chunks (first 3):

--- Chunk 1 [hr_policies.pdf] ---
Annual leave policy: All full-time employees receive 24 days of paid annual leave per year. Leave accrues at 2 days per month. Unused leave up to 10 d...
Length: 473 chars

--- Chunk 2 [hr_policies.pdf] ---
. Monday and Thursday are mandatory in-office days for team collaboration. New employees in their first 90 days must work from office full-time. Reimb...
Length: 337 chars

--- Chunk 3 [tech_docs.md] ---
Tech stack overview: Our backend is built with Python 3.11 and FastAPI. We use PostgreSQL 15 as our primary database with Redis for caching. The front...
Length: 460 chars


In [5]:
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={"device": "cpu"}
)

print("✅ Embedding model loaded (runs locally!)")

sample = embeddings.embed_query("What is the leave policy?")
print(f"Embedding dimensions: {len(sample)}")
print(f"First 10 values: {[round(x, 4) for x in sample[:10]]}")

print()
print("=" * 60)
print()
print("Semantic Similarity Demo:")

sentences = [
    "How many vacation days do I get?",
    "What is the annual leave policy?",
    "How do I deploy a Docker container?",
]

vecs = [embeddings.embed_query(s) for s in sentences]

from numpy import dot
from numpy.linalg import norm

def cosine_sim(a, b):
    return dot(a, b) / (norm(a) * norm(b))

print(f"\n   '{sentences[0]}'")
print(f"   vs '{sentences[1]}'")
print(f"   Similarity: {cosine_sim(vecs[0], vecs[1]):.4f}  <-- HIGH (same topic!)")

print(f"\n   '{sentences[0]}'")
print(f"   vs '{sentences[2]}'")
print(f"   Similarity: {cosine_sim(vecs[0], vecs[2]):.4f}  <-- LOW (different topics)")

print()
print("✅ Embeddings capture MEANING, not just keywords!")

✅ Embedding model loaded (runs locally!)
Embedding dimensions: 384
First 10 values: [0.0538, 0.0522, 0.0, 0.036, 0.1113, 0.117, 0.037, -0.1007, -0.0746, 0.006]


Semantic Similarity Demo:

   'How many vacation days do I get?'
   vs 'What is the annual leave policy?'
   Similarity: 0.4222  <-- HIGH (same topic!)

   'How many vacation days do I get?'
   vs 'How do I deploy a Docker container?'
   Similarity: 0.0215  <-- LOW (different topics)

✅ Embeddings capture MEANING, not just keywords!


In [6]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(chunks, embeddings)

print(f"✅ Vector store built with {len(chunks)} chunks")
print()
print("=" * 60)
print()
print("Testing similarity search:")

query = "How many vacation days do I get per year?"
print(f"\nQuery: '{query}'")

results = vectorstore.similarity_search(query, k=2)

for i, doc in enumerate(results):
    print(f"\nResult {i+1} [Source: {doc.metadata['source']}]:")
    print(f"   {doc.page_content[:200]}...")

print()
print("✅ Found the leave policy — even though we said 'vacation' not 'leave'!")
print("   That's semantic search. Meaning, not keywords.")

✅ Vector store built with 9 chunks


Testing similarity search:

Query: 'How many vacation days do I get per year?'

Result 1 [Source: hr_policies.pdf]:
   Annual leave policy: All full-time employees receive 24 days of paid annual leave per year. Leave accrues at 2 days per month. Unused leave up to 10 days can be carried forward to the next year. Leave...

Result 2 [Source: hr_policies.pdf]:
   . Monday and Thursday are mandatory in-office days for team collaboration. New employees in their first 90 days must work from office full-time. Reimbursement policy: Business travel expenses are reim...

✅ Found the leave policy — even though we said 'vacation' not 'leave'!
   That's semantic search. Meaning, not keywords.


In [7]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

def format_docs(docs):
    formatted = []
    for doc in docs:
        source = doc.metadata.get('source', 'unknown')
        formatted.append(f"[Source: {source}]\n{doc.page_content}")
    return "\n\n".join(formatted)

rag_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a helpful company assistant. Answer questions using ONLY the provided context.

Rules:
- Answer based ONLY on the context below
- If the answer is not in the context, say: "I don't have that information in my knowledge base."
- Always cite the source document
- Be concise and specific"""),
    ("human", """Context:
{context}

Question: {question}

Answer:""")
])

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

print("✅ RAG chain built!")
print()
print("Pipeline: Question --> Retrieve --> Prompt --> LLM --> Answer")
print()
print("Ready to answer questions from our knowledge base.")

✅ RAG chain built!

Pipeline: Question --> Retrieve --> Prompt --> LLM --> Answer

Ready to answer questions from our knowledge base.


In [8]:
test_questions = [
    "How many days of annual leave do employees get?",
    "Which days are mandatory to come to office?",
    "What's included in the Professional pricing plan?",
    "When is the CEO's birthday?",
    "How much learning budget does each employee get?",
]

for q in test_questions:
    print(f"Q: {q}")
    answer = rag_chain.invoke(q)
    print(f"A: {answer}")
    print("-" * 60)

Q: How many days of annual leave do employees get?
A: According to the hr_policies.pdf, full-time employees receive 24 days of paid annual leave per year.
------------------------------------------------------------
Q: Which days are mandatory to come to office?
A: Monday and Thursday are mandatory in-office days for team collaboration. [Source: hr_policies.pdf]
------------------------------------------------------------
Q: What's included in the Professional pricing plan?
A: The Professional plan includes 25 users, 100GB storage, priority support, and API access. [Source: product_info.pdf]
------------------------------------------------------------
Q: When is the CEO's birthday?
A: I don't have that information in my knowledge base. [Source: culture_handbook.pdf, product_info.pdf]
------------------------------------------------------------
Q: How much learning budget does each employee get?
A: Each employee gets Rs 50,000 per year for courses, books, and conferences [Source: cultur

In [9]:
def rag_with_sources(question):
    docs = retriever.invoke(question)
    context = format_docs(docs)
    prompt_messages = rag_prompt.format_messages(context=context, question=question)
    answer = llm.invoke(prompt_messages).content
    sources = list(set(f"{d.metadata['source']} ({d.metadata['category']})" for d in docs))
    return answer, sources, docs

question = "What is the code review process and how many approvals are needed?"
answer, sources, docs = rag_with_sources(question)

print(f"Q: {question}")
print(f"\nAnswer:\n{answer}")
print(f"\nSources used:")
for s in sources:
    print(f"   - {s}")

print()
print("=" * 60)

question2 = "What happens to my unused leave days?"
answer2, sources2, _ = rag_with_sources(question2)
print(f"\nQ: {question2}")
print(f"\nAnswer:\n{answer2}")
print(f"\nSources used:")
for s in sources2:
    print(f"   - {s}")

Q: What is the code review process and how many approvals are needed?

Answer:
According to [Source: tech_docs.md], all pull requests require at least 2 approvals before merging. Reviews should be completed within 24 hours.

Sources used:
   - product_info.pdf (Product)
   - tech_docs.md (Engineering)


Q: What happens to my unused leave days?

Answer:
According to the hr_policies.pdf, unused leave up to 10 days can be carried forward to the next year, and leave beyond 10 days expires on March 31st. [Source: hr_policies.pdf]

Sources used:
   - product_info.pdf (Product)
   - hr_policies.pdf (HR)


In [10]:
def interactive_rag():
    print()
    print("=" * 60)
    print("COMPANY KNOWLEDGE BASE — RAG CHATBOT")
    print("=" * 60)
    print()
    print("Ask anything about company policies, tech, product, or culture.")
    print("Type 'quit' to exit.")

    while True:
        user_input = input("\nYou: ").strip()

        if user_input.lower() in ['quit', 'exit', 'bye']:
            print("\nGoodbye!")
            break

        if not user_input:
            continue

        try:
            answer, sources, _ = rag_with_sources(user_input)
            print(f"\nAnswer: {answer}")
            print(f"\nSources: {', '.join(sources)}")
        except Exception as e:
            print(f"\nError: {str(e)}")
            break

print("Starting interactive RAG demo...")
interactive_rag()

Starting interactive RAG demo...

COMPANY KNOWLEDGE BASE — RAG CHATBOT

Ask anything about company policies, tech, product, or culture.
Type 'quit' to exit.

Answer: According to the hr_policies.pdf, the annual leave policy is as follows: 
- All full-time employees receive 24 days of paid annual leave per year.
- Leave accrues at 2 days per month.
- Unused leave up to 10 days can be carried forward to the next year.
- Leave beyond 10 days expires on March 31st.
- Employees must submit leave requests at least 3 business days in advance for planned leave.
- Emergency leave can be applied retroactively within 2 business days. [Source: hr_policies.pdf]

Sources: hr_policies.pdf (HR), culture_handbook.pdf (Culture)

Goodbye!
