# GraphRAG Implementation with Neo4j

This notebook implements a Graph Retrieval Augmented Generation (GraphRAG) system that combines:
1. Vector embeddings for semantic similarity search
2. Knowledge graph in Neo4j for relationship-based context enrichment

The combination provides more comprehensive and accurate responses than either approach alone.

In [None]:
from llama_index.core import VectorStoreIndex, StorageContext, Settings, load_index_from_storage
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Document
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from neo4j import GraphDatabase
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import networkx as nx
from tqdm import tqdm
import pandas as pd
import warnings
import spacy
import glob
import json
import uuid
import os
import re

# Suppress warnings
warnings.filterwarnings("ignore")

# Load environment variables
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("API_KEY")

# NEO4J SETUP
neo4j_url = "bolt://localhost:7687"
neo4j_username = "neo4j"
neo4j_password = "123123123"

## Data Loading and Preparation

We'll start by loading the CSV files from the result directory and preparing the data for our GraphRAG system.

In [None]:
# Load CSV from result directory
def load_csv_data(directory="result"):
    csv_files = glob.glob(f'{directory}/*.csv')
    print(f"Found {len(csv_files)} CSV files in {directory}")
    
    all_dfs = []
    
    for csv_file in tqdm(csv_files, desc="Loading CSV files"):
        try:
            df = pd.read_csv(csv_file, encoding='utf-8-sig', sep=';')
            print(f"  - {csv_file}: {len(df)} rows")
            all_dfs.append(df)
        except Exception as e:
            print(f"Error loading {csv_file}: {e}")
    
    if all_dfs:
        combined_df = pd.concat(all_dfs, ignore_index=True)
        print(f"Combined dataset: {len(combined_df)} rows")
        
        combined_df['id'] = [str(uuid.uuid4()) for _ in range(len(combined_df))]
        
        return combined_df
    else:
        print("No data loaded.")
        return None

# Load the data
df = load_csv_data()

# Create documents for vector indexing
if df is not None:
    documents = [Document(text=row['text'], id_=row['id'], metadata={"label": row['label']}) 
                 for _, row in df.iterrows()]
    print(f"Created {len(documents)} documents for indexing")
else:
    documents = []
    print("No documents created due to data loading issues")

## Building Vector Index

We'll use LlamaIndex to create a vector index for our documents, which will enable semantic search.

In [None]:
# Set up the LLM and embedding model
embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

# Configure settings
Settings.embed_model = embed_model

# Split documents into nodes for better chunking
text_splitter = SentenceSplitter.from_defaults(chunk_size=512, chunk_overlap=50)

# Check if we have documents to process
if documents:
    # Create vector store index
    print("Creating vector index...")
    vector_index = VectorStoreIndex.from_documents(
        documents, 
        show_progress=True,
        transformations=[text_splitter]
    )
    
    # Save the index to disk
    print("Saving vector index to storage...")
    vector_index.storage_context.persist(persist_dir="./storage")
    
    # Load the index from storage to verify it works
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    loaded_index = load_index_from_storage(storage_context)
    print("Vector index successfully loaded")
else:
    print("Skipping vector index creation due to missing documents")

## Knowledge Graph Construction with Neo4j

Now we'll build a knowledge graph in Neo4j using the entities and relationships extracted from our documents.

In [None]:
# Initialize Neo4j connection
driver = GraphDatabase.driver(neo4j_url, auth=(neo4j_username, neo4j_password))

# Function to clear the database
def clear_database():
    with driver.session() as session:
        session.run("MATCH (n) DETACH DELETE n")
        print("Database cleared")

# Create constraints and indexes for better performance
def create_constraints():
    with driver.session() as session:
        # Create constraint on chunks
        session.run("""
        CREATE CONSTRAINT chunk_id IF NOT EXISTS
        FOR (c:Chunk) REQUIRE c.id IS UNIQUE
        """)
        
        # Create constraint on entities
        session.run("""
        CREATE CONSTRAINT entity_name IF NOT EXISTS
        FOR (e:Entity) REQUIRE e.name IS UNIQUE
        """)
        
        # Create constraint on categories
        session.run("""
        CREATE CONSTRAINT category_name IF NOT EXISTS
        FOR (c:Category) REQUIRE c.name IS UNIQUE
        """)
        
        print("Constraints created")

# Function to normalize Vietnamese text
def normalize_text(text):
    """Normalize Vietnamese text by removing diacritics and special characters"""
    # Replace Vietnamese diacritics with basic Latin characters
    replacements = {
        'àáảãạăằắẳẵặâầấẩẫậ': 'a',
        'èéẻẽẹêềếểễệ': 'e',
        'ìíỉĩị': 'i',
        'òóỏõọôồốổỗộơờớởỡợ': 'o',
        'ùúủũụưừứửữự': 'u',
        'ỳýỷỹỵ': 'y',
        'đ': 'd'
    }
    
    result = text.lower()
    for chars, replacement in replacements.items():
        for c in chars:
            result = result.replace(c, replacement)
    
    # Remove special characters and extra spaces
    result = re.sub(r'[^\w\s]', '', result)
    result = re.sub(r'\s+', ' ', result).strip()
    
    return result

# Load Vietnamese spaCy model
try:
    nlp = spacy.load("vi_core_news_lg")
    print("Loaded Vietnamese spaCy model")
except Exception as e:
    print(f"Error: {e}")

# Function to extract entities using Vietnamese spaCy model
def extract_entities(text):
    doc = nlp(text)
    
    # Extract named entities
    entities = [ent.text for ent in doc.ents]
    
    # Since noun_chunks is not available for Vietnamese,use a simple approach to extract potential noun phrases
    words = text.split()
    for i in range(len(words)):
        # Consider single words and bigrams as potential entities
        if i < len(words) - 1:
            # Bigram
            phrase = f"{words[i]} {words[i+1]}"
            if len(phrase) > 2:
                entities.append(phrase)
        
        # Single word
        if len(words[i]) > 2:
            entities.append(words[i])
    
    # Filter out very short entities and normalize
    entities = list(set([e.strip() for e in entities if len(e.strip()) > 2]))
    return entities

# Function to extract relationships (verbs) between entities using LLM
def extract_relationships(text, entities):
    if len(entities) < 2:
        return []
    
    # Create a prompt for the LLM to extract relationships
    template = """
    Extract meaningful relationships between entities in the following text. 
    The text is in Vietnamese about university regulations.
    
    Text: {text}
    
    Entities: {entities}
    
    For each pair of entities that have a clear relationship, identify the relationship verb or phrase.
    Format your response as a JSON array of objects with the following structure:
    [
      {{
        "source": "Entity1",
        "target": "Entity2",
        "relationship": "relationship_verb"
      }}
    ]
    
    Only include relationships that are explicitly stated or strongly implied in the text.
    If no clear relationships exist, return an empty array.
    Do not include any explanations, markdown formatting, or code blocks - just return the raw JSON.
    """
    
    prompt = ChatPromptTemplate.from_template(template)
    model = ChatOpenAI(temperature=0, model="gpt-4o-mini")
    
    chain = (
        prompt 
        | model 
        | StrOutputParser()
    )
    
    response = chain.invoke({
        "text": text,
        "entities": ", ".join(entities)
    })
    
    try:
        # Clean up the response to handle potential markdown formatting
        cleaned_response = response
        # Remove markdown code blocks if present
        if "```json" in cleaned_response:
            cleaned_response = cleaned_response.replace("```json", "").replace("```", "")
        # Remove any leading/trailing whitespace
        cleaned_response = cleaned_response.strip()
        
        # Parse the JSON response
        relationships = json.loads(cleaned_response)
        return relationships
    except Exception as e:
        print(f"Failed to parse relationship JSON: {e}\nResponse: {response[:100]}...")
        return []

# Function to populate Neo4j with chunks, entities, and relationships
def populate_graph(df):
    # Clear existing data and create constraints
    clear_database()
    create_constraints()
    
    # Create category nodes
    with driver.session() as session:
        # Extract unique categories from labels
        categories = df['label'].str.replace('__label__', '').unique()
        
        # Create category nodes
        for category in categories:
            session.run("""
            MERGE (c:Category {name: $name})
            """, name=category)
        
        print(f"Created {len(categories)} category nodes")
    
    # Process each document and add to graph
    for i, row in tqdm(df.iterrows(), total=len(df), desc="Building knowledge graph"):
        text = row['text']
        chunk_id = row['id']
        category = row['label'].replace('__label__', '')
        
        # Extract entities
        entities = extract_entities(text)
        
        # Extract relationships
        relationships = extract_relationships(text, entities)
        # Convert relationships to a readable format for debugging and saving
        if relationships:
            # Create a dictionary for easier access later
            rels_dict = {
                "chunk_id": chunk_id,
                "text": text,
                "category": category,
                "relationships": relationships
            }
            
            # Save to a JSON file for review
            if not os.path.exists("relationships"):
                os.makedirs("relationships")

            with open(f"relationships/{chunk_id}.json", "w", encoding="utf-8") as f:
                json.dump(rels_dict, f, ensure_ascii=False, indent=2)
        
        with driver.session() as session:
            # Create chunk node
            session.run("""
            MERGE (c:Chunk {id: $id})
            SET c.text = $text,
                c.category = $category
            """, id=chunk_id, text=text, category=category)
            
            # Connect chunk to category
            session.run("""
            MATCH (chunk:Chunk {id: $chunk_id})
            MATCH (category:Category {name: $category})
            MERGE (chunk)-[:BELONGS_TO]->(category)
            """, chunk_id=chunk_id, category=category)
            
            # Create entity nodes and connect to chunk
            for entity in entities:
                session.run("""
                MERGE (e:Entity {name: $name})
                """, name=entity)
                
                # Connect entity to chunk
                session.run("""
                MATCH (c:Chunk {id: $chunk_id})
                MATCH (e:Entity {name: $entity_name})
                MERGE (c)-[:CONTAINS]->(e)
                """, chunk_id=chunk_id, entity_name=entity)
            
            # Create relationships between entities
            for rel in relationships:
                source = rel['source']
                target = rel['target']
                relationship = normalize_text(rel['relationship']).upper().replace(' ', '_')
                
                if len(source) > 2 and len(target) > 2 and source != target:
                    # Create the relationship in Neo4j
                    query = f"""
                    MATCH (s:Entity {{name: $source}})
                    MATCH (t:Entity {{name: $target}})
                    MERGE (s)-[r:{relationship}]->(t)
                    """
                    
                    try:
                        session.run(query, source=source, target=target)
                    except Exception as e:
                        # If dynamic relationship creation fails, use a generic relationship
                        session.run("""
                        MATCH (s:Entity {name: $source})
                        MATCH (t:Entity {name: $target})
                        MERGE (s)-[r:RELATED_TO]->(t)
                        SET r.type = $relationship
                        """, source=source, target=target, relationship=rel['relationship'])

# Build the knowledge graph if we have data
if df is not None:
    populate_graph(df)
    print("Knowledge graph construction completed")
else:
    print("Skipping knowledge graph construction due to missing data")

## Graph Exploration and Visualization

Let's explore the knowledge graph to understand its structure and content.

In [None]:
# Function to get graph statistics
def get_graph_stats():
    with driver.session() as session:
        # Count nodes by label
        node_counts = session.run("""
        CALL apoc.meta.stats()
        YIELD labels
        RETURN labels
        """).single()['labels']
        
        # Count relationship types
        rel_counts = session.run("""
        CALL apoc.meta.stats()
        YIELD relTypes
        RETURN relTypes
        """).single()['relTypes']
        
        print("Graph Statistics:")
        print("=================")
        
        print("\nNode counts:")
        for label, count in node_counts.items():
            print(f"  {label}: {count}")
        
        print("\nRelationship counts:")
        for rel, count in rel_counts.items():
            print(f"  {rel}: {count}")
        
        # Get top entities by connections - fixed query using COUNT instead of size()
        top_entities = session.run("""
        MATCH (e:Entity)
        WITH e, COUNT { (e)--() } AS connections
        ORDER BY connections DESC
        LIMIT 10
        RETURN e.name AS entity, connections
        """).values()
        
        print("\nTop 10 entities by connections:")
        for entity, connections in top_entities:
            print(f"  {entity}: {connections} connections")

# Function to visualize a subgraph around a key entity
def visualize_entity_subgraph(entity_name, depth=1):
    with driver.session() as session:
        # Get subgraph around the entity
        result = session.run("""
        MATCH path = (e:Entity {name: $name})-[*1..2]-(related)
        RETURN path
        LIMIT 50
        """, name=entity_name).values()
        
        if not result:
            print(f"No subgraph found for entity: {entity_name}")
            return
        
        # Convert to NetworkX graph for visualization
        G = nx.Graph()
        
        for path in result:
            path_obj = path[0]  # Neo4j Path object
            
            # Add nodes and edges from the path
            for node in path_obj.nodes:
                # Get node properties
                props = dict(node.items())
                node_type = list(node.labels)[0]
                
                # Use name for all node types
                if 'name' in props:
                    node_name = props['name']
                elif 'id' in props:
                    node_name = props['id']
                else:
                    node_name = str(node.id)
                
                # Add node with its type as an attribute
                G.add_node(node_name, type=node_type)
            
            # Add edges
            for rel in path_obj.relationships:
                # Get source and target node IDs
                start_node = rel.start_node
                end_node = rel.end_node
                
                # Get node names
                if 'name' in start_node:
                    start_name = start_node['name']
                else:
                    start_name = start_node['id'] if 'id' in start_node else str(start_node.id)
                
                if 'name' in end_node:
                    end_name = end_node['name']
                else:
                    end_name = end_node['id'] if 'id' in end_node else str(end_node.id)
                
                # Add edge with relationship type as attribute
                G.add_edge(start_name, end_name, type=rel.type)
        
        # Visualize the graph
        plt.figure(figsize=(12, 10))
        
        # Define node colors based on type
        colors = {
            'Entity': 'skyblue',
            'Chunk': 'lightgreen',
            'Category': 'salmon'
        }
        
        # Get node positions using spring layout
        pos = nx.spring_layout(G, k=0.3, iterations=50)
        
        # Draw nodes
        for node_type, color in colors.items():
            nodes = [n for n, data in G.nodes(data=True) if data.get('type') == node_type]
            nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color=color, node_size=300, alpha=0.8)
        
        # Draw edges
        nx.draw_networkx_edges(G, pos, width=1, alpha=0.5)
        
        # Draw labels with smaller font
        nx.draw_networkx_labels(G, pos, font_size=8)
        
        plt.title(f"Subgraph around '{entity_name}'")
        plt.axis('off')
        plt.tight_layout()
        plt.show()
        
        return G

# Get graph statistics
get_graph_stats()

# Visualize a subgraph around a key entity - replace with an actual entity from your data
visualize_entity_subgraph("sinh viên")

## GraphRAG Implementation

Now we'll implement the GraphRAG query system that combines vector retrieval with graph-based context enrichment.

In [None]:
# Function to get graph context for a query
def get_graph_context(query, limit=5):
    # Normalize query terms for better matching
    query_terms = query.split()
    
    with driver.session() as session:
        # First approach: Find entities mentioned in the query
        entities_query = """
        MATCH (e:Entity)
        WHERE any(term IN $query_terms WHERE e.name CONTAINS term OR $query_text CONTAINS e.name)
        RETURN e.name AS entity
        LIMIT $limit
        """
        
        entities = session.run(
            entities_query, 
            query_terms=query_terms,
            query_text=query,
            limit=limit
        ).values()
        
        if not entities:
            return "No relevant graph context found."
        
        # Get context for each entity
        context_parts = []
        
        for entity_tuple in entities:
            entity_name = entity_tuple[0]
            
            # Find chunks containing this entity
            chunks_query = """
            MATCH (e:Entity {name: $entity_name})<-[:CONTAINS]-(c:Chunk)
            RETURN c.text AS chunk_text, c.category AS category
            LIMIT 2
            """
            
            chunks = session.run(chunks_query, entity_name=entity_name).values()
            
            # Find relationships for this entity
            rels_query = """
            MATCH (e:Entity {name: $entity_name})-[r]-(other:Entity)
            RETURN type(r) AS relationship, 
                   e.name AS source, 
                   other.name AS target,
                   r.type AS rel_type
            LIMIT 5
            """
            
            relationships = session.run(rels_query, entity_name=entity_name).values()
            
            # Format entity context
            entity_context = f"Entity: {entity_name}\n"
            
            if relationships:
                entity_context += "Relationships:\n"
                for rel_type, source, target, rel_name in relationships:
                    # Handle both dynamic and generic relationships
                    relationship = rel_name if rel_type == "RELATED_TO" else rel_type.lower().replace('_', ' ')
                    entity_context += f"  - {source or entity_name} {relationship} {target}\n"
            
            context_parts.append(entity_context)
        
        # Return combined context
        return "\n\n".join(context_parts)

# Function to retrieve chunks using vector search
def get_vector_results(query, top_k=5):
    # Load the index from storage
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    index = load_index_from_storage(storage_context)
    
    # Create a simple vector retriever
    retriever = index.as_retriever(similarity_top_k=top_k)
    
    # Retrieve nodes
    nodes = retriever.retrieve(query)
    
    # Format results
    results = []
    for i, node in enumerate(nodes):
        results.append({
            "text": node.node.text,
            "score": node.score,
            "id": node.node.id_,
            "metadata": node.node.metadata
        })
    
    return results

# Function to create a chatbot using GraphRAG
def create_graphrag_chatbot():
    # Define the prompt template
    template = """
    Bạn là trợ lý thông minh của Đại học Cần Thơ. Nhiệm vụ của bạn là trả lời câu hỏi về các quy định, 
    thủ tục và chính sách của trường một cách chính xác và hữu ích.
    
    Sử dụng cả thông tin từ tìm kiếm vector và từ đồ thị tri thức để trả lời câu hỏi một cách toàn diện.
    
    THÔNG TIN TỪ VECTOR:
    {vector_context}
    
    THÔNG TIN TỪ ĐỒ THỊ:
    {graph_context}
    
    Câu hỏi: {query}
    
    Yêu cầu:
    1. Trả lời câu hỏi dựa trên thông tin được cung cấp
    2. Nếu không có đủ thông tin, hãy nói rằng bạn không có thông tin đầy đủ
    3. Trình bày câu trả lời rõ ràng, dễ hiểu và có cấu trúc
    4. Nếu có thông tin mâu thuẫn, hãy nêu rõ các khả năng
    
    Trả lời:
    """
    
    # Create the prompt
    prompt = ChatPromptTemplate.from_template(template)
    
    # Initialize the LLM
    model = ChatOpenAI(temperature=0, model="gpt-4o-mini")
    
    # Create the chain
    chain = (
        prompt 
        | model 
        | StrOutputParser()
    )
    
    return chain

# Function to query the GraphRAG system
def query_graphrag(query):
    # Get vector search results
    vector_results = get_vector_results(query, top_k=5)
    
    # Format vector context
    vector_context = "\n\n".join([
        f"Đoạn {i+1} (Điểm tương đồng: {result['score']:.4f}):\n{result['text']}"
        for i, result in enumerate(vector_results)
    ])
    
    # Get graph context
    graph_context = get_graph_context(query)
    
    # Create chatbot
    chatbot = create_graphrag_chatbot()
    
    # Generate response
    response = chatbot.invoke({
        "query": query,
        "vector_context": vector_context,
        "graph_context": graph_context
    })
    
    return {
        "query": query,
        "response": response,
        "vector_results": vector_results,
        "graph_context": graph_context
    }

# Test the GraphRAG system with sample queries
sample_queries = [
    "Quy định về học phí của trường là gì?",
    "Làm thế nào để xin học bổng?",
    "Quy trình xét tốt nghiệp gồm những bước nào?",
    "Sinh viên cần điều kiện gì để được ở ký túc xá?",
    "Khi nào sinh viên bị cảnh cáo học vụ?"
]

for query in sample_queries:
    print("\n" + "="*80)
    print(f"QUERY: {query}")
    print("="*80)
    
    result = query_graphrag(query)
    
    print("\nRESPONSE:")
    print(result["response"])
    
    print("\nTOP VECTOR RESULTS:")
    for i, res in enumerate(result["vector_results"][:3]):
        print(f"\n--- Result {i+1} (Score: {res['score']:.4f}) ---")
        print(res["text"][:150] + "..." if len(res["text"]) > 150 else res["text"])
    
    print("\nGRAPH CONTEXT:")
    print(result["graph_context"])

## Interactive ChatBot Interface

Let's create an interactive interface for our GraphRAG chatbot.

In [None]:
def interactive_graphrag_chat():
    """
    Run an interactive GraphRAG chat session in the notebook.
    Enter 'quit', 'exit', or 'q' to end the session.
    """
    print("="*50)
    print("GRAPHRAG CHATBOT ĐẠI HỌC CẦN THƠ")
    print("="*50)
    print("Hỏi đáp về quy định, thủ tục và chính sách của trường")
    print("Nhập 'quit', 'exit', hoặc 'q' để kết thúc")
    print("Nhập 'debug' để xem thông tin truy xuất")
    print("="*50)
    
    debug_mode = False
    
    while True:
        # Get user input
        query = input("\nBạn: ")
        
        # Check if user wants to exit
        if query.lower() in ['quit', 'exit', 'q']:
            print("\nCảm ơn bạn đã sử dụng chatbot. Chúc bạn một ngày tốt lành!")
            break
            
        # Check if user wants to toggle debug mode
        if query.lower() == 'debug':
            debug_mode = not debug_mode
            print(f"\nDebug mode {'enabled' if debug_mode else 'disabled'}")
            continue
            
        # Process query and get response
        try:
            result = query_graphrag(query)
            
            # Display response
            print("\nTrợ lý: " + result["response"])
            
            # Show debug information if enabled
            if debug_mode:
                print("\n--- DEBUG INFO ---")
                print("\nTOP VECTOR RESULTS:")
                for i, res in enumerate(result["vector_results"][:3]):
                    print(f"\n- Result {i+1} (Score: {res['score']:.4f}) -")
                    print(res["text"][:150] + "..." if len(res["text"]) > 150 else res["text"])
                
                print("\nGRAPH CONTEXT:")
                print(result["graph_context"])
        except Exception as e:
            print(f"\nLỗi xử lý: {e}")

# Uncomment to run the interactive chat
# interactive_graphrag_chat()

## Evaluation and Comparison

We'll compare the performance of different retrieval methods:
1. Vector-only (traditional RAG)
2. Graph-only retrieval
3. Combined GraphRAG approach

In [None]:
# Function to query with vector-only approach
def query_vector_only(query):
    # Get vector search results
    vector_results = get_vector_results(query, top_k=5)
    
    # Format vector context
    vector_context = "\n\n".join([
        f"Đoạn {i+1}:\n{result['text']}"
        for i, result in enumerate(vector_results)
    ])
    
    # Create prompt template
    template = """
    Bạn là trợ lý thông minh của Đại học Cần Thơ. Trả lời câu hỏi dựa trên thông tin sau:
    
    {context}
    
    Câu hỏi: {query}
    
    Trả lời:
    """
    
    prompt = ChatPromptTemplate.from_template(template)
    model = ChatOpenAI(temperature=0, model="gpt-4o-mini")
    chain = prompt | model | StrOutputParser()
    
    # Generate response
    response = chain.invoke({
        "query": query,
        "context": vector_context
    })
    
    return response

# Function to query with graph-only approach
def query_graph_only(query):
    # Get graph context
    graph_context = get_graph_context(query)
    
    # Create prompt template
    template = """
    Bạn là trợ lý thông minh của Đại học Cần Thơ. Trả lời câu hỏi dựa trên thông tin sau:
    
    {context}
    
    Câu hỏi: {query}
    
    Trả lời:
    """
    
    prompt = ChatPromptTemplate.from_template(template)
    model = ChatOpenAI(temperature=0, model="gpt-4o-mini")
    chain = prompt | model | StrOutputParser()
    
    # Generate response
    response = chain.invoke({
        "query": query,
        "context": graph_context
    })
    
    return response

# Function to evaluate approaches
def compare_approaches(queries):
    results = []
    
    for query in queries:
        print(f"Processing query: {query}")
        
        # Get responses from each approach
        vector_response = query_vector_only(query)
        graph_response = query_graph_only(query)
        graphrag_result = query_graphrag(query)
        graphrag_response = graphrag_result["response"]
        
        # Store results
        results.append({
            "query": query,
            "vector_only": vector_response,
            "graph_only": graph_response,
            "graphrag": graphrag_response
        })
    
    # Display results in a table
    comparison_df = pd.DataFrame(results)
    
    # Save results to CSV
    comparison_df.to_csv("approach_comparison.csv", index=False)
    print("Results saved to approach_comparison.csv")
    
    return comparison_df

# Example evaluation queries
eval_queries = [
    "Quy định về kỷ luật sinh viên",
    "Làm thế nào để chuyển ngành học",
    "Điều kiện để được học bổng khuyến khích học tập",
    "Thủ tục xin bảo lưu kết quả học tập",
    "Quy định về chuẩn đầu ra ngoại ngữ"
]

# Uncomment to run comparison
# comparison_results = compare_approaches(eval_queries)
# comparison_results

## Conclusion

This notebook demonstrates a complete GraphRAG implementation that combines:

1. **Vector Retrieval**: Traditional semantic search using embeddings
2. **Knowledge Graph**: Relationship-aware context from Neo4j
3. **LLM Integration**: Structured prompting with context from both sources

The combined approach provides more comprehensive and accurate responses by leveraging both the semantic understanding of vector search and the structured relationships of a knowledge graph.

Next steps could include:
- Fine-tuning the prompts for better responses
- Adding more relationship types to the knowledge graph
- Implementing user feedback mechanisms to improve retrieval
- Deploying the system as a web service