In [1]:
%load_ext dotenv
%dotenv

In [2]:
from datetime import datetime
import pandas as pd
from ch07_tools import get_answer
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
    answer_correctness,
    context_recall,
    faithfulness,
)


from tqdm import tqdm
from utils import neo4j_driver


  from pandas.core import (




In [3]:
get_answer("Who acted in the most movies?")

('Tom Hanks acted in the most movies, with a total of 12.',
 [{'role': 'assistant',
   'content': 'For the question: \'Which actor has acted in the most movies?\', we have the answer: \'[[{"actor": "Tom Hanks", "movies": 12}]]\''},
  {'role': 'assistant',
   'content': 'For the question: \'Who is the actor with the highest number of movie roles?\', we have the answer: \'[[{"actor": "Tom Hanks", "roles_count": 12}]]\''},
  {'role': 'assistant',
   'content': 'For the question: \'Who holds the record for acting in the most films?\', we have the answer: \'[[{"person": "Tom Hanks", "movies": 12}]]\''}])

In [4]:
test_data = pd.read_csv("../data/benchmark_data.csv", delimiter=";")
test_data

Unnamed: 0,question,cypher
0,Hello,"RETURN ""greeting and reminder it can only answ..."
1,What can you do?,"RETURN ""answer questions related to movies and..."
2,What is the weather like in Spain?,"RETURN ""irrelevant question as we can answer q..."
3,How many movies are there?,MATCH (m:Movie) RETURN count(*) AS movies
4,Who acted in the most movies?,MATCH (p:Person)-[:ACTED_IN]->(m:Movie)\nRETUR...
5,Which movies released between 1990 and 2000 ha...,MATCH (m:Movie) WHERE m.released >= 1990 AND m...
6,Who has the longest name among all actors?,MATCH (p:Person)-[:ACTED_IN]->(:Movie) RETURN ...
7,List people born before 1940.,MATCH (p:Person) WHERE p.born < 1940 RETURN p....
8,Who was born in 1965 and has directed a movie?,MATCH (p:Person)-[:DIRECTED]->(m:Movie) WHERE ...
9,Who acted in Top Gun?,MATCH (p:Person)-[:ACTED_IN]->(m:Movie {title:...


In [5]:
answers = []
ground_truths = []
latencies = []
contexts = []

for i, row in tqdm(test_data.iterrows(), total=len(test_data), desc="Processing rows"):
    ground_truth, _, _ = neo4j_driver.execute_query(row["cypher"])
    ground_truths.append([str(el.data()) for el in ground_truth])
    start = datetime.now()
    try:
        answer, context = get_answer(row["question"])
        context = [el['content'] for el in context]
    except Exception:
        answer, context = None, []
    latencies.append((datetime.now() - start).total_seconds())
    answers.append(answer)
    contexts.append(context)

Processing rows: 100%|██████████| 17/17 [04:39<00:00, 16.44s/it]


In [6]:
test_data['ground_truth'] = [str(el) for el in ground_truths]
test_data['answer'] = answers
test_data['latency'] = latencies
test_data['retrieved_contexts'] = contexts

In [7]:
dataset = Dataset.from_pandas(test_data.fillna("I don't know"))
result = evaluate(
    dataset,
    metrics=[
        answer_correctness,
        context_recall,
        faithfulness,
    ],
)
print(result)

Evaluating:   0%|          | 0/51 [00:00<?, ?it/s]

{'answer_correctness': 0.7774, 'context_recall': 0.7941, 'faithfulness': 0.9657}


In [8]:
for key in ["answer_correctness", "context_recall", "faithfulness"]:
    test_data[key] = [el[key] for el in result.scores]
test_data

Unnamed: 0,question,cypher,ground_truth,answer,latency,retrieved_contexts,answer_correctness,context_recall,faithfulness
0,Hello,"RETURN ""greeting and reminder it can only answ...","['{\'""greeting and reminder it can only answer...",I can only answer questions about movies and t...,6.841537,[For the question: 'Which actor has acted in t...,0.571927,0.0,1.0
1,What can you do?,"RETURN ""answer questions related to movies and...","['{\'""answer questions related to movies and t...",I can provide information about movies and the...,7.312087,[For the question: 'Which actor has acted in t...,0.698831,1.0,1.0
2,What is the weather like in Spain?,"RETURN ""irrelevant question as we can answer q...","['{\'""irrelevant question as we can answer que...",I can only answer questions about movies and t...,8.015248,[For the question: 'Which actor has acted in t...,0.208939,1.0,1.0
3,How many movies are there?,MATCH (m:Movie) RETURN count(*) AS movies,"[""{'movies': 38}""]",There are 38 movies in total.,24.523914,[For the question: 'Which actor has acted in t...,0.957427,1.0,1.0
4,Who acted in the most movies?,MATCH (p:Person)-[:ACTED_IN]->(m:Movie)\nRETUR...,"[""{'p.name': 'Tom Hanks', 'movieCount': 12}""]",The actor who acted in the most movies is Tom ...,18.863939,[For the question: 'Which actor has acted in t...,0.699852,1.0,1.0
5,Which movies released between 1990 and 2000 ha...,MATCH (m:Movie) WHERE m.released >= 1990 AND m...,"[""{'m.title': 'The Matrix', 'm.released': 1999...","The answer to the question is: '[[{""m.title"": ...",66.629885,[For the question: 'Which actor has acted in t...,0.717179,1.0,1.0
6,Who has the longest name among all actors?,MATCH (p:Person)-[:ACTED_IN]->(:Movie) RETURN ...,"[""{'name': 'Philip Seymour Hoffman'}""]",The information needed to determine who has th...,20.829585,[For the question: 'Which actor has acted in t...,0.186266,0.0,1.0
7,List people born before 1940.,MATCH (p:Person) WHERE p.born < 1940 RETURN p....,"[""{'p.name': 'Max von Sydow'}"", ""{'p.name': 'G...",Here is a list of people born before 1940:\n\n...,25.236618,[For the question: 'Which actor has acted in t...,0.947285,1.0,1.0
8,Who was born in 1965 and has directed a movie?,MATCH (p:Person)-[:DIRECTED]->(m:Movie) WHERE ...,"[""{'p.name': 'Lana Wachowski'}"", ""{'p.name': '...","Based on the information provided, the person ...",10.073612,[For the question: 'Which actor has acted in t...,0.692389,1.0,0.5
9,Who acted in Top Gun?,MATCH (p:Person)-[:ACTED_IN]->(m:Movie {title:...,"[""{'p.name': 'Val Kilmer'}"", ""{'p.name': 'Meg ...","The actors in the movie ""Top Gun"" are Val Kilm...",8.336806,[For the question: 'Which actor has acted in t...,0.950795,1.0,1.0
