In [None]:
# RAG Implementation - Step by Step

This notebook teaches RAG (Retrieval-Augmented Generation) fundamentals through practical implementation.

**What is RAG?**
RAG combines retrieval of relevant documents with language model generation to produce answers grounded in actual data.

**Steps we'll follow:**
1. Install and import dependencies
2. Setup vector database
3. Load and chunk documents
4. Create retriever
5. Build RAG chain
6. Test with queries

In [None]:
## Step 1: Install Required Packages

In [None]:
import subprocess
import sys

# List of packages needed for RAG
packages = [
    "langchain",
    "langchain-chroma",
    "langchain-openai",
    "langchain-core",
    "python-dotenv",
    "chromadb"
]

print("Installing packages...\n")

for package in packages:
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
        print(f"✓ {package}")
    except:
        print(f"✗ {package} (already installed)")

print("\n✓ Setup complete!")

## Step 2: Import Libraries

In [None]:
from dotenv import load_dotenv
from langchain_chroma import Chroma
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_text_splitters import CharacterTextSplitter

# Load environment variables from .env file
load_dotenv()

print("✓ All imports successful")

## Step 3: Initialize Embeddings Model

Embeddings convert text into numerical vectors that capture semantic meaning.
We'll use OpenAI's text-embedding-3-large model.

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

print("✓ Embeddings model initialized")
print(f"  Model: text-embedding-3-large")
print(f"  Type: {type(embeddings).__name__}")

## Step 4: Create Vector Store (ChromaDB)

ChromaDB is a lightweight vector database. It will store our document embeddings
and allow us to search for similar documents semantically.

In [None]:
vector_store = Chroma(
    collection_name="rag_collection",
    embedding_function=embeddings
)

print("✓ Vector store created")
print(f"  Database: ChromaDB")
print(f"  Collection: rag_collection")

## Step 5: Load Document

Read the document that we want to use for our RAG system.

In [None]:
# Read the document
with open("2024_state_of_the_union.txt") as f:
    document_text = f.read()

print("✓ Document loaded")
print(f"  Total characters: {len(document_text):,}")
print(f"  First 200 chars: {document_text[:200]}...")

## Step 6: Split Document into Chunks

Large documents need to be split into smaller chunks so the LLM can process them.
We use CharacterTextSplitter to create overlapping chunks.

In [None]:
text_splitter = CharacterTextSplitter(
    chunk_size=1000,          # Each chunk is ~1000 characters
    chunk_overlap=200,        # 200 chars overlap between chunks
    length_function=len
)

# Split the document into chunks
chunks = text_splitter.create_documents([document_text])

print("✓ Document split into chunks")
print(f"  Number of chunks: {len(chunks)}")
print(f"  Chunk size: 1000 characters")
print(f"  Overlap: 200 characters")

## Step 7: Add Chunks to Vector Store

Each chunk will be converted to an embedding and stored in the vector database.

In [None]:
document_ids = vector_store.add_documents(chunks)

print("✓ Chunks added to vector store")
print(f"  Total documents stored: {len(document_ids)}")
print(f"  Status: Ready for retrieval")

## Step 8: Initialize the Language Model (LLM)

We'll use GPT-4o-mini from OpenAI for generating answers.

In [None]:
llm = ChatOpenAI(model="gpt-4o-mini")

print("✓ LLM initialized")
print(f"  Model: gpt-4o-mini")

## Step 9: Create a Retriever

A retriever searches the vector store for documents similar to the query.

In [None]:
retriever = vector_store.as_retriever()

print("✓ Retriever created")
print("  Function: Searches vector store for relevant documents")

## Step 10: Create a Prompt Template

The prompt tells the LLM how to use the retrieved context to answer the query.

In [None]:
prompt_template = """Answer the question based only on the provided context. 
If the answer is not in the context, say "I don't know based on the provided context."

Context: {context}

Question: {query}

Answer:"""

prompt = PromptTemplate.from_template(prompt_template)

print("✓ Prompt template created")
print("  Template variables: {context}, {query}")

## Step 11: Create a Function to Format Documents

This function converts retrieved documents into a single string for the prompt.

In [None]:
def format_docs(docs):
    """Convert list of documents to formatted string."""
    return "\n\n".join(doc.page_content for doc in docs)

print("✓ Document formatter created")

## Step 12: Build the Complete RAG Chain

Combine all components into a single chain:
1. Retriever fetches relevant documents
2. Formatter converts them to text
3. Prompt template fills in context and query
4. LLM generates the answer
5. Output parser extracts the text

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("✓ RAG Chain built successfully!")
print("\nChain flow:")
print("  Query → Retriever → Format → Prompt → LLM → Output")

## Step 13: Test RAG - Query 1 (Within Document)

Test with a question related to the document content.

In [None]:
query1 = "Who invaded Ukraine according to the 2024 State of the Union?"

print("=" * 70)
print("TEST QUERY 1")
print("=" * 70)
print(f"\nQuestion: {query1}\n")

answer1 = rag_chain.invoke(query1)

print("Answer:")
print(answer1)
print("\n" + "=" * 70)