# GraphRAG Relevance, Factuality and Synthesis Evaluation

## Step 0: environment set up

In [3]:
from dotenv import load_dotenv
import os
from langchain_neo4j import Neo4jGraph
from libs import create_vector_index, generate_response
import pandas as pd
from conn import connect2Googlesheet
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
# Force reload of the .env file
load_dotenv()

True

In [4]:
# Connect to Neo4j database
try:
    graph = Neo4jGraph(
        url=os.getenv("NEO4J_URL"),
        username=os.getenv("NEO4J_USERNAME"),
        password=os.getenv("NEO4J_PASSWORD")
    )
    print("Connected to Neo4j database successfully.")
except ValueError as e:
    print(f"Could not connect to Neo4j database: {e}")

Connected to Neo4j database successfully.


## Step 1: Create vector index

In [11]:
create_vector_index(graph, "entities")

Index 'entities' exists but has incorrect dimensions: None. Recreating...


## Step 2: Load questions from google sheet

In [12]:
spreadsheet = connect2Googlesheet()

# Select the worksheet: relevance
worksheet = spreadsheet.get_worksheet(2)  

# Get all records as a list of dictionaries
data = worksheet.get_all_records()

# Convert to Pandas DataFrame
df_MedQ = pd.DataFrame(data)

In [13]:
df_MedQ.head()

Unnamed: 0,condition,number,docs,Question,Mahmud's Note,status,comments,Unnamed: 8
0,ARDS,1,ACURASYS,Does early administration of neuromuscular blo...,Like,,,
1,ARDS,2,ACURASYS,Do patients with severe ARDS being treated wit...,Replace,fixed,,
2,ARDS,3,ROSE,"In patients with moderate to severe ARDS, does...",Maybe this question: In patients with moderate...,fixed,,
3,ARDS,4,ROSE,Do patients with moderate-to-severe ARDS have ...,Local question (not sure if this is the aim of...,fixed,Wrong concept since PEEP by itself is mandator...,Does the use of neuromuscular blockers in pati...
4,ARDS,5,FACTT,"Among patients with ALI/ARDS, does a conservat...",Local question (not sure if this is the aim of...,fixed,Check if studies defined conservative by CVP <...,


## Step 3: Relevance check for top 30 questions

In [14]:
# Extract the top 20 questions from df_MedQ
top_30_questions = df_MedQ.head(30)

# Initialize a list to store the results
results = []

# Iterate over the top 20 questions
for index, row in top_30_questions.iterrows():
    question_number = index + 1  # Assuming the question number is the index + 1
    question = row['Question']  # Replace 'Question' with the actual column name for questions in df_MedQ
    
    # Generate response for the question
    response, context = generate_response(graph, question)
    
    # Extract relevant documents from the response content
    docs = response.choices[0].message.content  # Adjust this based on the actual response structure
    
    # Append the result to the list
    results.append({'Question number': question_number, 'docs': docs})

# Convert the results to a DataFrame
results_df = pd.DataFrame(results, columns=['Question number', 'docs'])

In [6]:
question = 'Does the use of steriods reduce mortality of sepsis?'
response, context = generate_response(graph, question)
print(response.choices[0].message.content)

"CORTICUS.pdf"


In [37]:
# load the relevant documents
relevant_docs_df = pd.read_csv('relevant_docs.csv')

# Concatenate results_df with relevant_docs_df side by side based on their index
concatenated_df = pd.concat([results_df, relevant_docs_df], axis=1)

# Display the concatenated DataFrame
concatenated_df = concatenated_df[['Question','Docs','docs']].rename(columns={'Docs':'Relevant Docs', 'docs':'Generated Docs'})

# Function to calculate ROUGE score
def calculate_rouge(reference, candidate):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, candidate)
    return scores

# Calculate the ROUGE score for all questions and add to the DataFrame
# Calculate ROUGE scores for all questions and add to the DataFrame
rouge_scores = concatenated_df.apply(
    lambda row: calculate_rouge(str(row['Relevant Docs']), str(row['Generated Docs'])), axis=1
)

# Extract ROUGE-1 and ROUGE-L F1 scores and add them as new columns
concatenated_df['ROUGE-1 F1'] = rouge_scores.apply(lambda score: score['rouge1'].fmeasure)
# Display the concatenated DataFrame with BLEU Score
concatenated_df
# Save the concatenated DataFrame to a CSV file
#concatenated_df.to_csv('concatenated_results.csv', index=False)

Unnamed: 0,Question,Relevant Docs,Generated Docs,ROUGE-1 F1
0,Relevace Q1,"ROSE, ACURASYS","[""ACURASYS""]",0.666667
1,Relevace Q2,"ROSE, ACURASYS","""ACURASYS""",0.666667
2,Relevace Q3,ROSE,"""ARDS"", ""ACURASYS""",0.0
3,Relevace Q4,ROSE,"""ROSE"", ""ACURASYS""",0.666667
4,Relevace Q5,FACTT,"""ARDS"", ""ACURASYS""",0.0
5,Relevace Q6,FACTT,"[""FACTT.pdf""]",0.666667
6,Relevace Q7,ARDSNet,"""The Acute Respiratory Distress Syndrome Network""",0.0
7,Relevace Q8,ARDSNet,"""ARDS""",0.0
8,Relevace Q9,PROSEVA,"""ACURASYS""",0.0
9,Relevace Q10,OSCILLATE,[],0.0
