# 04_05: Evaluating your GraphRAG pipeline

In [None]:
URI = "..."
USER = "neo4j"
PWD = "..."
OPENAI_API_KEY = "..."

In [None]:
from langchain_neo4j import Neo4jGraph
from langchain_core.documents import Document
from langchain_community.graphs.graph_document import GraphDocument
import os

from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_neo4j import GraphCypherQAChain
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.evaluation.qa.eval_chain import QAEvalChain


In [None]:
graph = Neo4jGraph(url=URI, username=USER, password=PWD)
enhanced_graph = Neo4jGraph(url=URI, username=USER, password=PWD, enhanced_schema=True)

In [None]:
examples = [
    {"query": "What sports is the International Ski And Snowboard Federation responsible for?",
     "answer": "Alpine Skiing, Freestyle Skiing, Snowboarding, Nordic Combined, Ski Jumping, Cross-Country Skiing"},
    {"query": "What activity are ski poles not used in?", "answer": "Ski jumping"},
    {"query": "Who do athletes get help from?", "answer": "Coaches, Peer Mentors, and Sports Psychologists"},
]

In [None]:
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-4o", temperature=0)
graph_chain = GraphCypherQAChain.from_llm(
    graph=enhanced_graph,
    llm=llm,
    verbose=True,
    allow_dangerous_requests=True,
)

In [None]:
predictions = []
for ex in examples:
    graph_response = graph_chain.invoke({"query": ex["query"]})
    predictions.append({"result": graph_response["result"].strip()})

In [None]:
eval_chain = QAEvalChain.from_llm(llm)
results = eval_chain.evaluate(examples, predictions)

In [None]:
correct = 0
for i, res in enumerate(results):
    print(f"Query: {examples[i]['query']}")
    print(f"Prediction from graph: {predictions[i]['result']}")
    print(f"Gold answer: {examples[i]['answer']}")
    print(f"Grade: {res['results']}")
    print("---")
    if res["results"] == "CORRECT":
        correct += 1

accuracy = correct / len(examples)
print(f"Graph QA Accuracy: {accuracy:.2f}")