In [7]:
import os
# Load environment variables from .env file
from dotenv import load_dotenv
load_dotenv()  # This will load variables from a .env file into the environment

# Get your OpenAI API key from the environment
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found in environment. Please set it in your .env file.")


In [8]:
def do_invoke_llm(prompt):
    from langchain_openai import ChatOpenAI
    from langchain_core.messages import HumanMessage

    llm = ChatOpenAI(openai_api_key=openai_api_key)
    response = llm.invoke([HumanMessage(content=prompt)])
    print("LLM Response:", response.content)

do_invoke_llm("Hello, LLM! How are you?")

LLM Response: Hello! I'm just a language model, so I don't have feelings, but I'm here to help you with any questions or conversations you'd like to have. How can I assist you today?


# RAG (Retrieval-Augmented Generation) Workshop

This notebook demonstrates the two main steps of RAG:

1. **Offline - Document Storage** 🗄️: Loading, chunking, embedding, and storing documents in a vector database (FAISS)
2. **Online - Information Retrieval** 🔎: Retrieving relevant chunks based on user queries and generating responses

## What is RAG?

RAG combines the power of information retrieval with large language models. Instead of relying solely on the model's training data, RAG allows us to:
- Provide up-to-date information
- Access domain-specific knowledge
- Reduce hallucinations
- Cite sources

✨ Let's walk through each step of the process! 📝


## Step 1: Offline - Document Storage

### 1.1 Document Loading
First, we need to load our document. We'll use a simple text file about AI and Machine Learning.


In [None]:
def do_load_document(file_path):
    """Load a text document from the specified file path."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        print(f"✅ Document loaded successfully!")
        print(f"📄 Document length: {len(content)} characters")
        print(f"🔤 First 200 characters:\n{content[:200]}...")
        return content
    except FileNotFoundError:
        print(f"❌ Error: File '{file_path}' not found.")
        return None
    except Exception as e:
        print(f"❌ Error loading document: {e}")
        return None



In [2]:
# Load our sample document
document_content = do_load_document("sample_document.txt")

✅ Document loaded successfully!
📄 Document length: 163912 characters
🔤 First 200 characters:
The Project Gutenberg eBook of Alice's Adventures in Wonderland
    
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no r...


### 1.2 Document Chunking
Large documents need to be split into smaller chunks for efficient retrieval and to fit within LLM context windows. We'll use LangChain's text splitter to create overlapping chunks.


In [3]:
def do_chunk_text(text, chunk_size=500, chunk_overlap=50):
    """Split text into overlapping chunks for better retrieval."""
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    
    # Create text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", ". ", "! ", "? ", " ", ""]
    )
    
    # Split the text
    chunks = text_splitter.split_text(text)
    
    print(f"✅ Text chunked successfully!")
    print(f"📊 Number of chunks: {len(chunks)}")
    print(f"📏 Average chunk size: {sum(len(chunk) for chunk in chunks) / len(chunks):.0f} characters")
    print(f"\n🔍 First chunk preview:\n{chunks[0][:300]}...")
    
    return chunks

In [4]:

# Only chunk if we successfully loaded the document
if document_content:
    text_chunks = do_chunk_text(document_content)
else:
    print("❌ Cannot chunk text - document not loaded")


✅ Text chunked successfully!
📊 Number of chunks: 452
📏 Average chunk size: 364 characters

🔍 First chunk preview:
The Project Gutenberg eBook of Alice's Adventures in Wonderland
    
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms
of the Project Gu...


### 1.3 Creating Embeddings and Vector Store
Now we'll convert our text chunks into vector embeddings and store them in a FAISS vector database for efficient similarity search.


In [10]:
def do_create_vector_store(chunks):
    """Create embeddings and FAISS vector store from text chunks."""
    from langchain_openai import OpenAIEmbeddings
    from langchain_community.vectorstores import FAISS
    
    # Create embeddings model
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    
    print("🔄 Creating embeddings and vector store...")
    print("⏳ This may take a moment...")
    
    # Create FAISS vector store
    from langchain_community.vectorstores import Chroma
    vector_store = Chroma.from_texts(chunks, embeddings)
    
    print(f"✅ Vector store created successfully!")
    print(f"🗃️ Stored {len(chunks)} document chunks")
    print(f"🧮 Each embedding has {len(vector_store.embeddings.embed_query('test'))} dimensions")
    
    return vector_store


In [12]:
# Create vector store if we have chunks
if 'text_chunks' in locals() and text_chunks:
    vector_store = do_create_vector_store(text_chunks)
else:
    print("❌ Cannot create vector store - no text chunks available")


🔄 Creating embeddings and vector store...
⏳ This may take a moment...
✅ Vector store created successfully!
🗃️ Stored 452 document chunks
🧮 Each embedding has 1536 dimensions


## Step 2: Information Retrieval

### 2.1 Similarity Search
Now we can search for relevant chunks based on user queries using vector similarity.


In [13]:
def do_similarity_search(vector_store, query, k=3):
    """Search for the most relevant chunks based on the query."""
    print(f"🔍 Searching for: '{query}'")
    print(f"📊 Retrieving top {k} most relevant chunks...\n")
    
    # Perform similarity search
    relevant_docs = vector_store.similarity_search(query, k=k)
    
    print(f"✅ Found {len(relevant_docs)} relevant chunks:")
    print("="*80)
    
    for i, doc in enumerate(relevant_docs, 1):
        print(f"\n🔸 Chunk {i}:")
        print(f"📝 Content: {doc.page_content[:300]}...")
        print("-"*60)
    
    return relevant_docs

# Test similarity search with a sample query
if 'vector_store' in locals():
    query = "Did they ask alice to drink them?"
    relevant_chunks = do_similarity_search(vector_store, query)
else:
    print("❌ Cannot perform search - vector store not available")


🔍 Searching for: 'Did they ask alice to drink them?'
📊 Retrieving top 3 most relevant chunks...

✅ Found 3 relevant chunks:

🔸 Chunk 1:
📝 Content: It was all very well to say “Drink me,” but the wise little Alice was
not going to do _that_ in a hurry. “No, I’ll look first,” she said,
“and see whether it’s marked ‘_poison_’ or not”; for she had read
several nice little histories about children who had got burnt, and
eaten up by wild beasts and ...
------------------------------------------------------------

🔸 Chunk 2:
📝 Content: There seemed to be no use in waiting by the little door, so she went
back to the table, half hoping she might find another key on it, or at
any rate a book of rules for shutting people up like telescopes: this
time she found a little bottle on it, (“which certainly was not here
before,” said Alice,)...
------------------------------------------------------------

🔸 Chunk 3:
📝 Content: The great question certainly was, what? Alice looked all round her at
the flo

### 2.2 RAG Pipeline - Putting It All Together
Now we'll combine retrieval with generation to create a complete RAG system that can answer questions based on our document.


In [None]:
def do_rag_query(vector_store, query, k=3):
    """Complete RAG pipeline: retrieve relevant chunks and generate response."""
    from langchain_openai import ChatOpenAI
    from langchain_core.messages import HumanMessage
    
    print(f"🎯 RAG Query: '{query}'")
    print("="*80)
    
    # Step 1: Retrieve relevant chunks
    print("🔍 Step 1: Retrieving relevant information...")
    relevant_docs = vector_store.similarity_search(query, k=k)
    
    # Combine retrieved content
    context = "\n\n".join([doc.page_content for doc in relevant_docs])
    
    print(f"✅ Retrieved {len(relevant_docs)} relevant chunks")
    print(f"📄 Total context length: {len(context)} characters")
    
    # Step 2: Generate response using LLM
    print("\n🤖 Step 2: Generating response...")
    
    # Create prompt with context
    prompt = f"""Based on the following context, please answer the question. If the answer is not in the context, say so.

                Context:
                {context}

                Question: 
                {query}

                Answer:"""
    
    # Generate response
    llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0)
    response = llm.invoke([HumanMessage(content=prompt)])
    
    print("✅ Response generated!")
    print("="*80)
    print("🔊 RAG Response:")
    print(response.content)
    print("="*80)
    
    return {
        'query': query,
        'retrieved_chunks': relevant_docs,
        'context': context,
        'response': response.content
    }

# Test the complete RAG pipeline
if 'vector_store' in locals():
    print("🚀 Testing complete RAG pipeline:\n")
    
    # Test multiple queries
    test_queries = [
        "Did they ask alice to drink them?"
    ]
    
    for i, query in enumerate(test_queries, 1):
        print(f"\n{'='*20} Test Query {i} {'='*20}")
        result = do_rag_query(vector_store, query)
        print("\n")
else:
    print("❌ Cannot run RAG pipeline - vector store not available")


🚀 Testing complete RAG pipeline:


🎯 RAG Query: 'Did they ask alice to drink them?'
🔍 Step 1: Retrieving relevant information...
✅ Retrieved 3 relevant chunks
📄 Total context length: 1350 characters

🤖 Step 2: Generating response...
✅ Response generated!
🔊 RAG Response:
Yes, the bottle had a paper label with the words "DRINK ME" printed on it in large letters.




## Summary

Congratulations! You've successfully implemented a complete RAG system. Here's what we accomplished:

### ✅ What We Built:
1. **Document Loading**: Read a text file about AI/ML
2. **Text Chunking**: Split the document into manageable pieces with overlap
3. **Embeddings**: Converted text chunks into vector representations
4. **Vector Store**: Stored embeddings in FAISS for efficient similarity search
5. **Retrieval**: Found relevant chunks based on user queries
6. **Generation**: Combined retrieved context with LLM to generate informed responses

### 🎯 Key Benefits of RAG:
- **Current Information**: Can work with up-to-date documents
- **Domain Expertise**: Provides specialized knowledge beyond training data
- **Reduced Hallucination**: Grounds responses in actual source material
- **Traceable Sources**: Can identify which chunks informed the response

### 🔄 Try Your Own Query:
Modify the test query below to ask your own questions about the AI/ML document!


In [16]:
# 🎮 Interactive RAG Testing
# Change the query below to test your own questions!

if 'vector_store' in locals():
    # Try your own query here!
    custom_query = "What is the difference between supervised and unsupervised learning?"
    
    print("🎯 Testing your custom query...")
    result = do_rag_query(vector_store, custom_query)
    
    print("\n" + "="*60)
    print("💡 Pro Tips:")
    print("- Try questions about specific AI/ML concepts from the document")
    print("- Ask about applications, challenges, or comparisons")
    print("- Notice how the system retrieves relevant chunks first")
    print("- Observe how the LLM uses the context to generate responses")
    print("="*60)
else:
    print("❌ Vector store not available. Please run all previous cells first.")


🎯 Testing your custom query...
🎯 RAG Query: 'What is the difference between supervised and unsupervised learning?'
🔍 Step 1: Retrieving relevant information...
✅ Retrieved 3 relevant chunks
📄 Total context length: 1320 characters

🤖 Step 2: Generating response...
✅ Response generated!
🔊 RAG Response:
The answer is not in the context provided.

💡 Pro Tips:
- Try questions about specific AI/ML concepts from the document
- Ask about applications, challenges, or comparisons
- Notice how the system retrieves relevant chunks first
- Observe how the LLM uses the context to generate responses
