In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

import json

import llama_cpp
import torch
import pandas as pd

from dotenv import load_dotenv
from openai import OpenAI

from curverag import utils
from curverag.curverag import CurveRAG, DEFAULT_ENTITY_TYPES, DEFAULT_GLINER_MODEL, DEFAULT_SENTENCE_TRANSFORMER_MODEL
from curverag.graph import KnowledgeGraph
from curverag.atth.kg_dataset import KGDataset
from curverag.atth.models.hyperbolic import AttH
from curverag.eval import evaluation, queries, context, expected_output

load_dotenv() 

True

# Explore dataset

In [6]:
with open('../datasets/2WikiMultihopQA/train.json', 'rb') as f:
    train = json.load(f)

with open('../datasets/2WikiMultihopQA/dev.json', 'rb') as f:
    dev = json.load(f)

with open('../datasets/2WikiMultihopQA/test.json', 'rb') as f:
    test = json.load(f)

In [7]:
train[0].keys()

dict_keys(['_id', 'type', 'question', 'context', 'supporting_facts', 'evidences', 'answer'])

In [8]:
train[0]['question']

'Are director of film Move (1970 Film) and director of film Méditerranée (1963 Film) from the same country?'

In [11]:
train[0]['context'][:3]

[['Stuart Rosenberg',
  ['Stuart Rosenberg (August 11, 1927 – March 15, 2007) was an American film and television director whose motion pictures include "Cool Hand Luke" (1967), "Voyage of the Damned" (1976), "The Amityville Horror" (1979), and "The Pope of Greenwich Village" (1984).',
   'He was noted for his work with actor Paul Newman.']],
 ['Méditerranée (1963 film)',
  ['Méditerranée is a 1963 French experimental film directed by Jean-Daniel Pollet with assistance from Volker Schlöndorff.',
   'It was written by Philippe Sollers and produced by Barbet Schroeder, with music by Antione Duhamel.',
   'The 45 minute film is cited as one of Pollet\'s most influential films, which according to Jonathan Rosenbaum directly influenced Jean-Luc Goddard\'s "Contempt", released later the same year.',
   'Footage for the film was shot around the Mediterranean, including at a Greek temple, a Sicilian garden, the sea, and also features a fisherman, a bullfighter, and a girl on an operating table

In [10]:
train[0]['answer']

'no'

# Run eval

## Train Model

In [12]:
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

In [None]:
entity_types = ['people', 'locations', 'entities', 'movies', 'directors']
rag = CurveRAG(
    openai_client=client,
    entity_types=entity_types,
)

In [None]:
new_docs = []
for d in train:
    d_ = str(d['context']).replace('[', '').replace(']', '. ').replace('\'', '')
    new_docs.append(d_)

In [None]:
dataset_size = 100
rag.fit(new_docs[:dataset_size], dataset_name='movies')

In [None]:
rag.graph.nodes

In [None]:
rag.graph.edges

## Test Model

In [None]:
responses_pp = []
for i in range(dataset_size):
    resp = rag.query(train[i]['question'], traversal = 'pp')
    responses_pp.append(resp)

In [None]:
responses_ = []
for i in range(dataset_size):
    resp = rag.query(train[i]['question'], traversal='hyperbolic')
    responses_.append(resp)

# Evaluate Results

In [3]:
pd.set_option('display.max_colwidth', None)

In [4]:
results_df = pd.read_csv('results.csv')

In [30]:
results_df.head(2)

Unnamed: 0,question,answer,response_hyperbolic,response_pp
0,Are director of film Move (1970 Film) and director of film Méditerranée (1963 Film) from the same country?,no,"The director of the film *Move* (1970) is Stuart Rosenberg, who is American. The director of the film *Méditerranée* (1963) is Jean-Daniel Pollet, who is French. Therefore, the directors of these two films are from different countries: one is from the United States and the other from France.","The director of the film *Move* (1970) is Stuart Rosenberg, and he is from the United States. The director of *Méditerranée* (1963) is Jean-Daniel Pollet, who is from France. Therefore, the directors of these two films are not from the same country."
1,Do both films The Falcon (Film) and Valentin The Good have the directors from the same country?,no,"The film *The Falcon (1981)* was directed by Vatroslav Mimica, who is from Yugoslavia. In contrast, *Valentin The Good (1942)* was directed by Martin Frič, who is from the Czech Republic. Therefore, the directors of both films do not come from the same country.","Yes, both films ""The Falcon"" (1981) and ""Valentin the Good"" (1942) have directors from the same country, specifically from the Czech Republic. ""Valentin the Good"" is directed by Martin Frič, who is a Czech film director, while ""The Falcon"" was influenced by the works of Martin Frič, indicating a connection to Czech cinema, although the director of ""The Falcon,"" Vatroslav Mimica, is Yugoslavian. Therefore, the directors are not from the same country; ""Valentin the Good"" is Czech, while ""The Falcon"" is Yugoslavian-German."


# Test OpenAI GPT Model

In [27]:
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

In [39]:
def get_prompt(question, answer_1, answer_2):
    prompt = f"""
        You are excellent at determining whether two answers to a question aer the same despite their syntax being different.
        You will be given a question and two answers. Reply True if the answer is essentially the same, otherwise reply with False.
        
        Given the question: {question} 
        
        Are these two answers essentially the same:
        - Answer 1: {answer_1}
        - Answer 2: {answer_2}
    """
    return prompt

In [35]:
q = results_df.iloc[0]['question']
a1 = results_df.iloc[0]['answer']
a2 = results_df.iloc[0]['response_hyperbolic']

In [37]:
print('q', q)
print('answer 1', a1)
print('answer 2', a2)

q Are director of film Move (1970 Film) and director of film Méditerranée (1963 Film) from the same country?
answer 1 no
answer 2 The director of the film *Move* (1970) is Stuart Rosenberg, who is American. The director of the film *Méditerranée* (1963) is Jean-Daniel Pollet, who is French. Therefore, the directors of these two films are from different countries: one is from the United States and the other from France.


In [44]:
response = client.responses.create(
    model='gpt-4.1',
    input=get_prompt(q, a1, a2)
)

In [45]:
response.output_text

'True'

In [58]:
is_the_same = []
for i, row in results_df.iterrows():
    if i % 10 ==0:
        print(i)
    q = row['question']
    a1 = row['answer']
    a2 = row['response_hyperbolic']
    try:
        response = client.responses.create(
            model='gpt-4.1',
            input=get_prompt(q, a1, a2)
        )
        is_the_same.append(response.output_text)
    except:
        is_the_same.append('Error')

0
10
20
30
40
50
60
70
80
90


In [71]:
is_the_same_pp = []
for i, row in results_df.iterrows():
    if i % 10 ==0:
        print(i)
    q = row['question']
    a1 = row['answer']
    a2 = row['response_pp']
    try:
        response = client.responses.create(
            model='gpt-4.1',
            input=get_prompt(q, a1, a2)
        )
        is_the_same_pp.append(response.output_text)
    except:
        is_the_same_pp.append('Error')

0
10
20
30
40
50
60
70
80
90


In [72]:
results_df['is_same'] = is_the_same
results_df['is_same_pp'] = is_the_same_pp

In [73]:
def clean_is_same(text):
    if text.lower().startswith('true'):
        return True
    else:
        return False

In [74]:
results_df['is_same_clean'] = results_df['is_same'].apply(clean_is_same)
results_df['is_same_pp_clean'] = results_df['is_same_pp'].apply(clean_is_same)

In [75]:
results_df['is_same_clean'].sum() / len(results_df)

np.float64(0.49)

In [76]:
results_df['is_same_pp_clean'].sum() / len(results_df)

np.float64(0.52)

In [70]:
results_df[results_df['is_same_clean']==False].head(10)

Unnamed: 0,question,answer,response_hyperbolic,response_pp,is_same,is_same_clean
2,"Which film whose director is younger, Charge It To Me or Danger: Diabolik?",Danger: Diabolik,"To determine which film's director is younger, we need to look at the directors of ""Charge It To Me"" and ""Danger: Diabolik.""\n\n1. The film ""Danger: Diabolik"" was directed by **Mario Bava**, but the birth date of Mario Bava is not provided in the data. \n2. The director of ""Charge It To Me"" is **Ian Barry**, who has also no birth date mentioned.\n\nSince I don't have specific birth dates for Mario Bava or Ian Barry from the provided data, I cannot definitively answer which director is younger. No relevant information was found to compare their ages.","To compare the directors' ages of ""Charge It To Me"" and ""Danger: Diabolik,"" we first identify that ""Danger: Diabolik"" was directed by **Mario Bava** in 1968. Unfortunately, the information about the director of ""Charge It To Me"" is not present in the provided context. Therefore, I cannot determine which film's director is younger based on the available data.\n\nIn short, no relevant information was found regarding the director of ""Charge It To Me.""","False\n\nThe first answer directly states ""Danger: Diabolik"" as the answer, implying its director is younger. The second answer states that there is not enough information to determine which director is younger. These are not essentially the same.",False
4,What nationality is the director of film Wedding Night In Paradise (1950 Film)?,Hungarian,"The director of the film ""Wedding Night In Paradise"" (1950) is Géza von Bolváry, who is of German nationality. The film is noted as a West German musical comedy and stars Johannes Heesters, Claude Farell, and Gretl Schörg. It is based on a 1942 stage work and was produced in the post-war German film industry context [8].","The director of the film ""Wedding Night in Paradise"" (1950) is Géza von Bolváry, who was of Hungarian nationality. The film is a West German musical comedy and is based on a 1942 stage work[2].",False,False
5,When is the composer of film Sruthilayalu 's birthday?,14 March 1918,"The composer of the film *Sruthilayalu* is K. V. Mahadevan. However, there is no specific information regarding his birthday available in the provided data. If you need further details or specific inquiries about his work, please let me know!","No relevant information was found regarding the birthday of the composer of the film 'Sruthilayalu', K. V. Mahadevan.",False,False
6,Who is Rhescuporis I (Odrysian)'s paternal grandfather?,Raizdos,"Rhescuporis I's paternal grandfather is Cotys II. Cotys III, Rhescuporis I's father, was the son of Cotys II, making him the paternal grandfather of Rhescuporis I. This lineage places Cotys II as the ancestor from whom Rhescuporis I descends within the Odrysian kingdom of Thrace.","Rhescuporis I of the Odrysian kingdom of Thrace's paternal grandfather is Cotys I. This is deduced from the information that Rhescuporis I succeeded his father, Cotys III, and that Rhoemetalces I, a relative noted as a descendant of Cotys I, was mentioned in relation to the Odrysian royal lineage. Therefore, Cotys I is indeed his paternal grandfather.",False,False
9,Where was the director of film The Fascist born?,Rome,"The film *The Fascist* was directed by Luciano Salce. However, the provided data does not include information about where Luciano Salce was born. Therefore, no relevant information was found regarding the birthplace of the director.","The director of the film ""The Fascist,"" Luciano Salce, was born in Italy. However, the specific place of his birth is not provided in the available information. Therefore, no relevant information was found regarding his exact birthplace.",False,False
15,Where was the place of burial of the performer of song There Is So Much World To See?,Graceland,"The song ""There Is So Much World To See"" was performed by Elvis Presley. However, the context provided does not contain information about his place of burial. Therefore, no relevant information was found regarding the burial location of Elvis Presley.","Elvis Presley, the performer of the song ""There Is So Much World to See,"" is buried at Graceland in Memphis, Tennessee. This historic site was also his home and is now a museum dedicated to his life and career.",False,False
16,What is the date of birth of the director of film Wandering Fires?,"November 28, 1919","The director of the film ""Wandering Fires"" is Maurice Campbell. His date of birth is not provided in the available data, so no relevant information regarding his date of birth was found.","The director of the film ""Wandering Fires"" is Maurice Campbell. However, there is no information available regarding his date of birth in the provided data. Thus, no relevant information was found about his birthdate.",False,False
17,When did Fatima Bint Mubarak Al Ketbi's husband die?,2 November 2004,No relevant information was found regarding the death of Fatima Bint Mubarak Al Ketbi's husband.,"Fatima Bint Mubarak Al Ketbi was married to Zayed bin Sultan Al Nahyan, who passed away on November 2, 2004[6].",False,False
22,Does Ted Swinford have the same nationality as Nirmala Sheoran?,no,"Yes, Ted Swinford and Nirmala Sheoran have the same nationality; both are associated with India. Ted Swinford is an American rower, while Nirmala Sheoran is an Indian sprinter specializing in the 400 meters[8][20].","Yes, Ted Swinford and Nirmala Sheoran have the same nationality; both are Indian and American, respectively, as per their descriptions in the provided data. Nirmala Sheoran is identified as an Indian sprinter, while Ted Swinford is described as an American rower and gold medalist. Therefore, they do not share the same nationality.",False,False
23,"Which film was released more recently, Die schöne Lurette or Sabhash?",Sabhash,"No relevant information was found regarding the release dates of the films ""Die schöne Lurette"" and ""Sabhash.""","No relevant information was found regarding the release dates of the films ""Die schöne Lurette"" and ""Sabhash"".",False,False


In [77]:
results_df.to_csv('results_with_eval.csv', index=False)