# Notebook 6: Graph-Enhanced RAG

This notebook demonstrates:
1. Query rephrasing for better retrieval
2. Anchor document retrieval
3. Neighboring chunk retrieval (graph expansion)
4. Context graph construction
5. Answer composition with citations

In [None]:
# Import required modules
import os
from dotenv import load_dotenv
from src.graph_rag import (
    rephrase_query,
    retrieve_anchor_documents,
    find_neighboring_chunks,
    build_context_graph,
    compose_answer_with_graph,
    run_graph_rag_pipeline
)

load_dotenv()
print("✓ Imports successful")

## Step 1: Query Rephrasing

In [None]:
# Rephrase query into multiple variations
query = "What are the rights of data subjects?"

variations = rephrase_query(query, openai_api_key=os.getenv("OPENAI_API_KEY"))

print(f"Original query: {query}")
print(f"\nQuery variations:")
for i, variation in enumerate(variations, 1):
    print(f"  {i}. {variation}")

## Step 2: Retrieve Anchor Documents

In [None]:
# Retrieve anchor documents for all query variations
anchors = retrieve_anchor_documents(
    queries=variations,
    faiss_path="faiss_index/",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    top_k=2
)

print(f"\nRetrieved {len(anchors)} anchor documents")
print("\nAnchor Documents:")
for i, anchor in enumerate(anchors, 1):
    metadata = anchor['metadata']
    print(f"\n{i}. Article {metadata['article']}, Page {metadata['page']}")
    print(f"   Query: {variations[anchor.get('query_index', 0)]}")
    print(f"   Content: {anchor['content'][:80]}...")

## Step 3: Find Neighboring Chunks

In [None]:
# Expand around anchor documents
neighbors = find_neighboring_chunks(
    anchor_docs=anchors,
    faiss_path="faiss_index/",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    expansion_radius=2
)

print(f"\nFound {len(neighbors)} neighboring chunks")
print("\nSample Neighbors:")
for i, neighbor in enumerate(neighbors[:5], 1):
    metadata = neighbor['metadata']
    relation = metadata.get('relation', 'related')
    print(f"\n{i}. Article {metadata['article']}, Page {metadata['page']} ({relation})")
    print(f"   {neighbor['content'][:80]}...")

## Step 4: Build Context Graph

In [None]:
# Build context graph from anchors and neighbors
graph = build_context_graph(anchors, neighbors)

print("\nContext Graph:")
print(f"  Nodes: {graph['num_nodes']}")
print(f"  Edges: {graph['num_edges']}")

# Count node types
anchor_count = sum(1 for node in graph['nodes'] if node['type'] == 'anchor')
neighbor_count = sum(1 for node in graph['nodes'] if node['type'] == 'neighbor')

print(f"\nNode Types:")
print(f"  Anchor nodes: {anchor_count}")
print(f"  Neighbor nodes: {neighbor_count}")

## Step 5: Visualize Graph Structure (Conceptual)

In [None]:
# Display graph structure in text format
print("\nGraph Structure:")
print(f"{'='*60}")

# Show anchor nodes
print("\nANCHOR NODES:")
for node in graph['nodes']:
    if node['type'] == 'anchor':
        article = node['metadata'].get('article', '?')
        print(f"  [A{article}] Article {article}")

# Show some edges
print("\nEDGES (sample):")
for edge in graph['edges'][:5]:
    print(f"  {edge['source']} --({edge['relation']})--> {edge['target']}")

print(f"\n{'='*60}")

## Step 6: Compose Answer from Graph

In [None]:
# Generate answer using the context graph
result = compose_answer_with_graph(
    query=query,
    context_graph=graph,
    openai_api_key=os.getenv("OPENAI_API_KEY")
)

print(f"\nQuery: {query}")
print(f"\n{'='*60}")
print("Answer:")
print(result['answer'])
print(f"{'='*60}")

## Step 7: Analyze Citations

In [None]:
# Display citations
print("\nCitations:")
for i, citation in enumerate(result['citations'], 1):
    print(f"  {i}. Article {citation['article']}, Page {citation['page']} ({citation['type']})")

# Graph statistics
print(f"\nGraph Statistics:")
stats = result['graph_stats']
print(f"  Total nodes: {stats['num_nodes']}")
print(f"  Anchor nodes: {stats['num_anchors']}")
print(f"  Neighbor nodes: {stats['num_neighbors']}")
print(f"  Edges: {stats['num_edges']}")

## Step 8: Complete Graph RAG Pipeline

In [None]:
# Run the complete pipeline end-to-end
test_query = "What is the right to data portability?"

pipeline_result = run_graph_rag_pipeline(
    query=test_query,
    faiss_path="faiss_index/",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    top_k=3,
    expansion_radius=2
)

print(f"\nOriginal Query: {pipeline_result['original_query']}")
print(f"Number of variations: {pipeline_result['num_variations']}")
print(f"\n{'='*60}")
print("Final Answer:")
print(pipeline_result['answer'])
print(f"{'='*60}")

## Step 9: Compare with Baseline RAG

In [None]:
# Compare Graph RAG vs Baseline RAG
from src.rag_baseline import BaselineRAG

# Baseline RAG
baseline_rag = BaselineRAG(
    faiss_path="faiss_index/",
    openai_api_key=os.getenv("OPENAI_API_KEY")
)

baseline_result = baseline_rag.query(test_query)

print("\nComparison:")
print(f"{'='*60}")
print(f"\nBaseline RAG:")
print(f"  Sources retrieved: {baseline_result['num_sources']}")
print(f"\nGraph RAG:")
print(f"  Query variations: {pipeline_result['num_variations']}")
print(f"  Graph nodes: {pipeline_result['graph_stats']['num_nodes']}")
print(f"  Context richness: {pipeline_result['graph_stats']['num_edges']} connections")
print(f"\nGraph RAG provides richer context through:")
print(f"  - Multiple query perspectives")
print(f"  - Neighboring chunk expansion")
print(f"  - Explicit relationship modeling")

## Summary

In this notebook, we:
- ✓ Implemented query rephrasing
- ✓ Retrieved anchor documents
- ✓ Expanded to neighboring chunks
- ✓ Built context graphs
- ✓ Generated answers with rich citations
- ✓ Compared with baseline RAG

Next: Notebook 7 - Responsible AI Testing and LangSmith Tracing