# Graph-RAG vs Vector-RAG Evaluation
This notebook compares the answers and context length of **Stage 1** (vector-only) and **Stage 2** (Neo4j Graph-RAG).
You need to run **11_ingest_graph.py** first so the Neo4j database is populated.

In [None]:
import os, sys, json, math, textwrap, itertools
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()

# Path helpers
BASE = Path.cwd().parent  # stage2 folder
STAGE1 = BASE.parent / 'stage1_vector_rag'
sys.path.append(str(STAGE1))  # so we can import stage-1 modules

from stage1_vector_rag.02_build_index import embeddings, INDEX_DIR, PINECONE_API_KEY  # re-use models
from stage2_neo4j_graph_rag.12_graph_rag import retrieve as graph_retrieve, embed_model
from stage1_vector_rag.03_langgraph_rag import top_k as vector_retrieve

QUESTIONS = [
    'When was Ada Lovelace born?',
    'Which language did Guido van Rossum create?',
    'Name two sub-fields related to artificial intelligence.'
]

## Compare retrieved context length

In [None]:
from collections import defaultdict, Counter

def token_len(text):
    return len(text.split())

for q in QUESTIONS:
    # vector only
    v_hits = vector_retrieve(q, k=4)
    v_context = '\n'.join(src for src, _ in v_hits)
    # graph
    g_state = graph_retrieve({ 'question': q })
    g_context = g_state['context']
    
    print('
###', q)
    print('Vector-RAG context words:', token_len(v_context))
    print('Graph-RAG  context words:', token_len(g_context))