In [None]:
# notebooks/week11_rag.ipynb

"""
# Week 11: RAG & Vector Databases
## Healthcare Policy Q&A System

### Learning Objectives
1. Load and process documents for RAG
2. Implement text chunking strategies
3. Create and query vector stores
4. Build complete RAG pipelines
5. Evaluate RAG quality

### Setup
"""

# Cell 1: Setup
import os
import sys
from pathlib import Path
from dotenv import load_dotenv

project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

load_dotenv()

print("OpenAI API Key:", "‚úÖ" if os.getenv("OPENAI_API_KEY") else "‚ùå")


# Cell 2: Create Sample Documents
"""
## Part 1: Document Preparation

First, let's ensure we have policy documents to work with.
"""

# Create documents directory
docs_dir = project_root / "data" / "documents"
docs_dir.mkdir(parents=True, exist_ok=True)

# Check for existing documents
existing_docs = list(docs_dir.glob("*.md"))
print(f"Found {len(existing_docs)} markdown documents")

for doc in existing_docs:
    print(f"  - {doc.name}")


# Cell 3: Load Documents
"""
## Part 2: Document Loading
"""

from src.llm.rag import DocumentLoader, load_policy_documents

# Create loader
loader = DocumentLoader(base_path=str(docs_dir))

# Load all documents
documents = loader.load_directory()

print(f"\nLoaded {len(documents)} documents")
print(f"Loading stats: {loader.get_stats()}")

# Preview first document
if documents:
    print("\n--- First Document Preview ---")
    print(f"Source: {documents[0].metadata.get('source', 'Unknown')}")
    print(f"Content: {documents[0].page_content[:500]}...")


# Cell 4: Text Chunking
"""
## Part 3: Text Chunking

Split documents into manageable chunks for embedding.
"""

from src.llm.rag import TextChunker, ChunkingStrategy, analyze_chunks

# Create chunker
chunker = TextChunker(
    chunk_size=1000,
    chunk_overlap=200,
    strategy=ChunkingStrategy.RECURSIVE
)

# Chunk documents
chunks = chunker.chunk_documents(documents)

print(f"Created {len(chunks)} chunks from {len(documents)} documents")
print(f"\nChunk Analysis:")
analysis = analyze_chunks(chunks)
for key, value in analysis.items():
    print(f"  {key}: {value}")

# Preview chunks
print("\n--- Sample Chunks ---")
for i, chunk in enumerate(chunks[:3]):
    print(f"\nChunk {i+1}:")
    print(f"  Size: {len(chunk.page_content)} chars")
    print(f"  Source: {chunk.metadata.get('filename', 'Unknown')}")
    print(f"  Section: {chunk.metadata.get('section', 'N/A')}")
    print(f"  Content: {chunk.page_content[:150]}...")


# Cell 5: Embeddings
"""
## Part 4: Embeddings

Convert text chunks to vector embeddings.
"""

from src.llm.rag import EmbeddingsManager

# Create embeddings manager
embeddings_manager = EmbeddingsManager(
    provider="openai",
    model_name="text-embedding-3-small",
    use_cache=True
)

print(f"Embeddings Model: {embeddings_manager.get_model_info()}")

# Test embedding
sample_texts = [
    "What is the cancellation policy?",
    "How do I reschedule an appointment?",
    "What happens if I miss my appointment?"
]

embeddings = embeddings_manager.embed_texts(sample_texts)

print(f"\nGenerated {len(embeddings)} embeddings")
print(f"Embedding dimension: {len(embeddings[0])}")

# Check similarity
import numpy as np

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

print("\nSimilarity between questions:")
for i in range(len(sample_texts)):
    for j in range(i+1, len(sample_texts)):
        sim = cosine_similarity(embeddings[i], embeddings[j])
        print(f"  '{sample_texts[i][:30]}...' vs '{sample_texts[j][:30]}...': {sim:.3f}")


# Cell 6: Vector Store
"""
## Part 5: Vector Store

Create and query a FAISS vector store.
"""

from src.llm.rag import VectorStoreManager

# Create vector store
vector_store = VectorStoreManager(
    store_type="faiss",
    embeddings_manager=embeddings_manager
)

# Index documents
vector_store.create_from_documents(
    documents,
    chunk=True,
    chunk_size=1000,
    chunk_overlap=200
)

print(f"Vector store created: {vector_store.get_stats()}")

# Save for later use
vector_store.save("healthcare_policies")
print("Vector store saved!")


# Cell 7: Basic Search
"""
### Basic Similarity Search
"""

# Search
query = "What happens if a patient misses their appointment?"
results = vector_store.search(query, k=3)

print(f"Query: {query}\n")
print("Top 3 Results:")
print("-" * 50)

for i, doc in enumerate(results):
    print(f"\n{i+1}. {doc.metadata.get('filename', 'Unknown')}")
    print(f"   Section: {doc.metadata.get('section', 'N/A')}")
    print(f"   Content: {doc.page_content[:200]}...")


# Cell 8: Search with Scores
"""
### Search with Similarity Scores
"""

results_with_scores = vector_store.search_with_scores(query, k=5)

print(f"Query: {query}\n")
print("Results with scores:")
for doc, score in results_with_scores:
    print(f"  Score: {score:.4f} - {doc.metadata.get('filename', 'Unknown')}")


# Cell 9: MMR Search
"""
### Maximum Marginal Relevance (MMR) Search

MMR provides diverse results, not just the most similar.
"""

mmr_results = vector_store.mmr_search(
    query,
    k=4,
    fetch_k=10,
    lambda_mult=0.5  # 0 = max diversity, 1 = max relevance
)

print(f"Query: {query}\n")
print("MMR Results (diverse):")
for i, doc in enumerate(mmr_results):
    print(f"\n{i+1}. {doc.metadata.get('section', 'Unknown section')}")
    print(f"   {doc.page_content[:150]}...")


# Cell 10: RAG Chain
"""
## Part 6: RAG Chains

Build complete question-answering pipelines.
"""

from src.llm.rag.chains import RAGChain, ConversationalRAGChain

# Create RAG chain
rag = RAGChain(
    vector_store=vector_store,
    temperature=0.2,
    retriever_k=4
)

# Test questions
test_questions = [
    "What is the no-show policy?",
    "How many reminders do patients receive before their appointment?",
    "What should staff do for high-risk patients?",
    "Can no-show fees be waived?"
]

print("RAG Chain Responses:")
print("=" * 60)

for question in test_questions:
    result = rag.ask(question, return_sources=True)
    
    print(f"\nüìù Q: {question}")
    print(f"\nüí¨ A: {result['answer'][:400]}...")
    print(f"\nüìö Sources: {len(result.get('sources', []))} documents")
    print("-" * 60)


# Cell 11: Conversational RAG
"""
### Conversational RAG

Maintains context across multiple questions.
"""

conv_rag = ConversationalRAGChain(
    vector_store=vector_store,
    max_history=5
)

# Create session
session_id = conv_rag.create_session()
print(f"Session created: {session_id}\n")

# Multi-turn conversation
conversation = [
    "What is the cancellation policy?",
    "What if I need to cancel same-day?",
    "Are there any exceptions to these rules?",
    "How do I appeal a no-show fee?"
]

print("Conversational RAG:")
print("=" * 60)

for question in conversation:
    result = conv_rag.ask(session_id, question)
    
    print(f"\nüë§ User: {question}")
    print(f"\nü§ñ Assistant: {result['answer'][:300]}...")
    
    if result.get('standalone_question'):
        print(f"\n   [Rewritten: {result['standalone_question']}]")
    
    print("-" * 40)

# View history
print("\n\nConversation History:")
history = conv_rag.get_history(session_id)
for msg in history:
    role = "üë§" if msg["role"] == "user" else "ü§ñ"
    print(f"{role}: {msg['content'][:100]}...")


# Cell 12: Citation RAG
"""
### RAG with Citations
"""

from src.llm.rag.chains import CitationRAGChain

citation_rag = CitationRAGChain(vector_store=vector_store)

result = citation_rag.ask("What are the consequences of multiple no-shows?")

print("Citation RAG Response:")
print("=" * 60)
print(f"\nAnswer:\n{result['answer']}")
print(f"\nCitations:")
for cite in result['citations']:
    print(f"  [{cite['number']}] {cite['filename']} - {cite['section']}")


# Cell 13: Advanced Retriever
"""
## Part 7: Advanced Retrieval

Using query expansion and reranking.
"""

from src.llm.rag.retriever import PolicyRetriever, RetrievalConfig

# Configure advanced retrieval
config = RetrievalConfig(
    top_k=4,
    search_type="mmr",
    use_query_expansion=True,
    expansion_count=2
)

advanced_retriever = PolicyRetriever(
    vector_store=vector_store,
    config=config
)

# Test
query = "transportation help for appointments"
results = advanced_retriever.search_with_context(query)

print(f"Query: {query}")
print(f"\nExpanded search found {len(results['documents'])} documents")
print(f"\nContext preview:\n{results['context'][:500]}...")


# Cell 14: RAG Evaluation
"""
## Part 8: RAG Evaluation
"""

from src.llm.rag.evaluation import RAGEvaluator, create_healthcare_golden_set

# Create evaluator
evaluator = RAGEvaluator(
    thresholds={
        "faithfulness": 0.7,
        "answer_relevancy": 0.7,
        "context_used": 0.5
    }
)

# Create golden set
golden = create_healthcare_golden_set()
print(f"Golden set has {len(golden.questions)} questions")

# Add samples by running through RAG
questions, ground_truths = golden.to_eval_format()

# Limit for demo
demo_questions = questions[:5]
demo_truths = ground_truths[:5]

print("\nRunning evaluation on 5 questions...")
evaluator.add_samples_from_chain(rag, demo_questions, demo_truths)

# Run evaluation
results = evaluator.evaluate()

print("\n" + "=" * 60)
print("EVALUATION RESULTS")
print("=" * 60)
print(f"\nEvaluator: {results['evaluator']}")
print(f"Samples: {results['sample_count']}")
print(f"Passed: {results['passed_count']}")
print(f"Pass Rate: {results['pass_rate']:.1%}")

print("\nMetric Summary:")
for metric, values in results.get('summary', {}).items():
    print(f"  {metric}: mean={values['mean']:.3f}, range=[{values['min']:.3f}, {values['max']:.3f}]")


# Cell 15: Save Evaluation Results
"""
### Save Evaluation Results
"""

eval_dir = project_root / "evals" / "rag_eval_results"
eval_dir.mkdir(parents=True, exist_ok=True)

from datetime import datetime
eval_file = eval_dir / f"eval_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"

evaluator.save_results(str(eval_file))
print(f"Evaluation saved to: {eval_file}")


# Cell 16: Testing with API
"""
## Part 9: API Testing

Test the RAG endpoints (requires API to be running).
"""

import httpx

API_BASE = "http://localhost:8000/api/v1"

async def test_rag_api():
    async with httpx.AsyncClient() as client:
        # Create index
        print("Creating index...")
        response = await client.post(
            f"{API_BASE}/rag/index/create",
            params={"documents_path": "data/documents"}
        )
        print(f"Index creation: {response.json()}")
        
        # Ask question
        print("\nAsking question...")
        response = await client.post(
            f"{API_BASE}/rag/ask",
            json={
                "question": "What is the no-show policy?",
                "include_sources": True
            }
        )
        print(f"Answer: {response.json()['answer'][:200]}...")
        
        # Search
        print("\nSearching...")
        response = await client.get(
            f"{API_BASE}/rag/search",
            params={"query": "cancellation", "k": 3}
        )
        print(f"Found {response.json()['count']} results")

# Uncomment to run:
# import asyncio
# asyncio.run(test_rag_api())


# Cell 17: Exercises
"""
## Exercises

### Exercise 1: Chunking Comparison
Compare different chunking strategies and their impact on retrieval.
"""

# Your code here:
# chunking_strategies = [
#     {"strategy": "fixed", "size": 500},
#     {"strategy": "recursive", "size": 1000},
#     {"strategy": "markdown", "size": 1000}
# ]
# 
# for config in chunking_strategies:
#     # Create chunker with config
#     # Count chunks
#     # Evaluate retrieval quality
#     pass


"""
### Exercise 2: Custom Evaluation Set
Create your own evaluation questions specific to your use case.
"""

# Your code here:
# custom_golden = GoldenDataset("evals/custom_golden.json")
# 
# custom_golden.add_question(
#     question="...",
#     expected_answer="...",
#     category="..."
# )


"""
### Exercise 3: Hybrid Retrieval
Implement a hybrid retriever that combines keyword and semantic search.
"""

# Your code here:
# class HybridRetriever:
#     def __init__(self, vector_store, keyword_weight=0.3):
#         pass
#     
#     def search(self, query, k=4):
#         # Combine keyword and semantic results
#         pass


# Cell 18: Summary
"""
## Summary

This week you learned:

1. **Document Loading**
   - Load markdown, text, and other documents
   - Extract metadata for better retrieval

2. **Text Chunking**
   - Recursive splitting respects document structure
   - Overlap prevents information loss at boundaries
   - Chunk size affects retrieval precision

3. **Embeddings**
   - Convert text to vectors for similarity search
   - OpenAI and local embedding options
   - Caching for efficiency

4. **Vector Stores**
   - FAISS for fast local similarity search
   - Persistence for reloading indices
   - MMR for diverse results

5. **RAG Chains**
   - Basic Q&A with retrieval
   - Conversational RAG with history
   - Citation-aware responses

6. **Evaluation**
   - Ragas metrics for quality assessment
   - Golden datasets for regression testing
   - Custom evaluation thresholds

## Deliverables

1. ‚úÖ Working document loader
2. ‚úÖ Chunking pipeline
3. ‚úÖ Vector store with FAISS
4. ‚úÖ RAG chain for Q&A
5. ‚úÖ Conversational RAG
6. ‚úÖ Evaluation framework
7. üìù Complete exercises
8. üìù Custom golden set
"""

print("Week 11 Complete! üéâ")


# Cell 19: Stats
"""
### Final Statistics
"""

print("RAG Pipeline Stats:")
print("-" * 40)
print(f"Documents loaded: {len(documents)}")
print(f"Chunks created: {len(chunks)}")
print(f"Vector store: {vector_store.get_stats()}")
print(f"Embeddings: {embeddings_manager.get_stats()}")