# Agentic RAG System for Domain Knowledge QA

This notebook demonstrates the complete RAG system with agentic capabilities.

## Setup and Configuration

Before running this notebook, ensure you have:
1. Set `GEMINI_API_KEY` in `.env` file
2. Installed dependencies: `pip install -r requirements.txt`
3. Added sample documents to the `documents/` folder

In [1]:
# Import required libraries
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
import json

# Load environment variables
load_dotenv()

# Add project to path
sys.path.insert(0, os.getcwd())

print("✓ Environment variables loaded")

✓ Environment variables loaded


In [None]:
# Import RAG components
from src.config import (
    AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT,
    DOCUMENTS_PATH, VECTOR_STORE_PATH,
    CHUNK_SIZE, CHUNK_OVERLAP, TOP_K_DOCUMENTS,
    CHAT_DEPLOYMENT_NAME, EMBEDDING_DEPLOYMENT_NAME,
    CHAT_API_VERSION, EMBEDDING_API_VERSION
)
from src.document_processor import DocumentProcessor
from src.embeddings import EmbeddingManager, FAISSVectorStore
from src.retriever import RAGRetriever
from src.agent import AgenticRAG

print("✓ RAG components imported")
print(f"  - Documents path: {DOCUMENTS_PATH}")
print(f"  - Vector store path: {VECTOR_STORE_PATH}")
print(f"  - Chunk size: {CHUNK_SIZE}")
print(f"  - Chunk overlap: {CHUNK_OVERLAP}")

INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.


✓ RAG components imported
  - Documents path: documents
  - Vector store path: vector_store/faiss_index
  - Chunk size: 1000
  - Chunk overlap: 200


## Step 1: Document Processing

In [None]:
# Initialize document processor
processor = DocumentProcessor(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP
)

print(f"Loading documents from: {DOCUMENTS_PATH}")
chunks = processor.process_documents(DOCUMENTS_PATH)

print(f"\n✓ Processing complete!")
print(f"  - Total chunks: {len(chunks)}")

# Show sample chunk
if chunks:
    print(f"\nSample chunk from '{chunks[0]['metadata']['source']}':")
    print(f"  Preview: {chunks[0]['content'][:200]}...")
    print(f"  Size: {len(chunks[0]['content'])} characters")

## Step 2: Generate Embeddings and Build Vector Store

In [None]:
# Initialize embedding manager  
embedding_manager = EmbeddingManager(
    api_key=AZURE_OPENAI_API_KEY,
    endpoint=AZURE_OPENAI_ENDPOINT,
    deployment_name=EMBEDDING_DEPLOYMENT_NAME,
    api_version=EMBEDDING_API_VERSION
)

print("Generating embeddings...")
texts = [chunk["content"] for chunk in chunks]

# Generate embeddings in batches
embeddings = embedding_manager.embed_batch(texts, batch_size=50)

print(f"\n✓ Embeddings generated!")
print(f"  - Number of embeddings: {len(embeddings)}")
print(f"  - Embedding dimension: {len(embeddings[0])}")

In [None]:
# Initialize and populate FAISS vector store
vector_store = FAISSVectorStore(vector_store_path=VECTOR_STORE_PATH)

print("Adding documents to vector store...")
vector_store.add_documents(chunks, embeddings)

print("Saving vector store to disk...")
vector_store.save()

print(f"\n✓ Vector store created and saved!")
print(f"  - Path: {VECTOR_STORE_PATH}")
print(f"  - Size: {len(vector_store.documents)} documents")

## Step 3: Initialize RAG Retriever

In [None]:
# Initialize retriever
retriever = RAGRetriever(
    embedding_manager=embedding_manager,
    vector_store=vector_store,
    top_k=TOP_K_DOCUMENTS
)

print("✓ RAG Retriever initialized")
print(f"  - Top-K: {TOP_K_DOCUMENTS}")

## Step 4: Test Retriever

In [None]:
# Test retriever with sample query
test_query = "What is machine learning?"

print(f"Test Query: {test_query}\n")

retrieved_docs = retriever.retrieve(test_query)

print(f"Retrieved {len(retrieved_docs)} documents:\n")

for i, (content, similarity, source) in enumerate(retrieved_docs, 1):
    print(f"[Document {i}] {source} (Similarity: {similarity:.2%})")
    print(f"  Preview: {content[:150]}...")
    print()

## Step 5: Initialize Agentic RAG

In [None]:
# Initialize agentic RAG system
agent = AgenticRAG(
    api_key=AZURE_OPENAI_API_KEY,
    endpoint=AZURE_OPENAI_ENDPOINT,
    retriever=retriever,
    deployment_name=CHAT_DEPLOYMENT_NAME,
    api_version=CHAT_API_VERSION,
    max_iterations=3
)

print("✓ Agentic RAG System initialized")
print("  - Agent ready for reasoning and generation")

## Step 6: Run Agentic RAG

In [None]:
# Example 1: Simple question
query1 = "What are the types of machine learning?"

print(f"Query: {query1}\n")
print("Running agentic RAG...\n")

result1 = agent.reason(query1)

print(f"Answer:\n{result1['answer']}\n")
print(f"---\nMetadata:")
print(f"  - Confidence: {result1['confidence']}%")
print(f"  - Iterations: {result1['iterations']}")
print(f"  - Reasoning steps: {len(result1['reasoning_steps'])}")

In [None]:
# Example 2: Another question
query2 = "What is the difference between supervised and unsupervised learning?"

print(f"Query: {query2}\n")
print("Running agentic RAG...\n")

result2 = agent.reason(query2)

print(f"Answer:\n{result2['answer']}\n")
print(f"---\nMetadata:")
print(f"  - Confidence: {result2['confidence']}%")
print(f"  - Iterations: {result2['iterations']}")

In [None]:
# Example 3: More complex question
query3 = "How do RAG systems improve upon traditional language models?"

print(f"Query: {query3}\n")
print("Running agentic RAG...\n")

result3 = agent.reason(query3)

print(f"Answer:\n{result3['answer']}")

## Step 7: Multi-turn Conversation

In [None]:
# Demonstrate multi-turn conversation
conversation = [
    "What is overfitting in machine learning?",
    "How can we prevent overfitting?",
    "What is the role of regularization?"
]

for query in conversation:
    print(f"\n{'='*60}")
    print(f"User: {query}")
    print(f"{'='*60}")
    
    response = agent.chat(query)
    
    print(f"\nAgent: {response}")
    print()

## Step 8: Analyze Conversation History

In [None]:
# Get conversation history
history = agent.get_conversation_history()

print(f"Conversation History ({len(history)} messages):\n")

for i, msg in enumerate(history, 1):
    role = msg['role'].upper()
    content_preview = msg['content'][:100] + "..." if len(msg['content']) > 100 else msg['content']
    print(f"{i}. [{role}] {content_preview}")
    
    if 'metadata' in msg:
        print(f"   Confidence: {msg['metadata'].get('confidence', 'N/A')}%")

## Summary

This notebook demonstrated:

1. **Document Processing**: Loading and chunking domain documents
2. **Embedding Generation**: Using Google's embeddings API
3. **Vector Storage**: FAISS for efficient similarity search
4. **Retrieval**: Context-aware document retrieval
5. **Agentic Reasoning**: Multi-step reasoning with reflection
6. **Answer Generation**: Grounded responses with citations
7. **Multi-turn Conversation**: Maintaining context across turns

### Key Features:
- **Tool Use**: Retriever as callable tool
- **Self-Reflection**: Critic evaluates document relevance
- **Iterative Refinement**: Refines queries if initial retrieval insufficient
- **Source Attribution**: Citations in generated answers
- **Minimal Hallucinations**: Grounded in retrieved documents