In [1]:
import pandas as pd

## Load data

In [2]:
ANSWER_FILE = '../test_results/multi_hop/questions.csv'
ans_df = pd.read_csv(ANSWER_FILE)
ans_df.head()

Unnamed: 0,query,answer,question_type,evidence_list
0,Who is the individual associated with the cryp...,Sam Bankman-Fried,inference_query,3
1,Which individual is implicated in both inflati...,Donald Trump,inference_query,2
2,Who is the figure associated with generative A...,Sam Altman,inference_query,2
3,Do the TechCrunch article on software companie...,Yes,comparison_query,2
4,Which online betting platform provides a welco...,Caesars Sportsbook,inference_query,3


In [3]:
RAG_ANSWER_FILE = '../test_results/multi_hop/RAG_result.csv'
rag_df = pd.read_csv(RAG_ANSWER_FILE)
rag_df.head()

Unnamed: 0,Question,RAG Answer
0,Who is the individual associated with the cryp...,Sam Bankman-Fried
1,Which individual is implicated in both inflati...,Donald Trump
2,Who is the figure associated with generative A...,Sam Altman
3,Do the TechCrunch article on software companie...,Yes
4,Which online betting platform provides a welco...,Caesars Sportsbook


In [4]:
NO_CONTEXT_FILE = '../test_results/multi_hop/no_context_result.csv'
no_context_df = pd.read_csv(NO_CONTEXT_FILE)
no_context_df.head()

Unnamed: 0,Question,RAG Answer
0,Who is the individual associated with the cryp...,Insufficient information
1,Which individual is implicated in both inflati...,Insufficient information
2,Who is the figure associated with generative A...,Insufficient information
3,Do the TechCrunch article on software companie...,Insufficient information
4,Which online betting platform provides a welco...,Insufficient information


In [5]:
GRAPH_RAG_FILE = '../test_results/multi_hop/GraphRAG_results.csv'
graphrag_df = pd.read_csv(GRAPH_RAG_FILE)
graphrag_df.head(5)

Unnamed: 0,Question,Graph RAG answer
0,Who is the individual associated with the cryp...,SAM BANKMAN-FRIED
1,Which individual is implicated in both inflati...,DONALD TRUMP
2,Who is the figure associated with generative A...,SAM ALTMAN
3,Do the TechCrunch article on software companie...,Insufficient information
4,Which online betting platform provides a welco...,BetMGM Sportsbook


## Analyze answers

In [13]:
def is_correct(answer1:str, answer2:str):
    return str(answer1).strip('.').lower() == str(answer2).strip('.').lower()


def add_info_to_test_df(test_df):
    num_answers = test_df.shape[0]
    test_df['answer'] = ans_df['answer'].iloc[:num_answers]
    test_df['correctness'] = test_df.apply(lambda row: is_correct(row.iloc[1], row['answer']), axis=1)


def cal_correct_rate(test_df, question_type = 'total'):
    question_types = ['inference_query', 'comparison_query', 'comparison_query', 'temporal_query', 'null_query', 'total']
    assert question_type in question_types, "invalid question types"

    num_answers = test_df.shape[0]

    right_answer_count = 0
    num_of_type_questions = 0
    
    for i in range(num_answers):
        if question_type == 'total' or ans_df['question_type'][i] == question_type:
            num_of_type_questions += 1
            if test_df['correctness'][i]:
                right_answer_count += 1

    return right_answer_count / num_of_type_questions

In [7]:
def correct_rates_test_df(test_df):
    add_info_to_test_df(test_df)
    question_types = ['inference_query', 'comparison_query', 'temporal_query', 'null_query', 'total']

    columns = ['question type', 'correct rate']
    correct_df = pd.DataFrame(columns=columns)

    for i in range(len(question_types)):
        type = question_types[i]
        correct_df.loc[i] = [type, cal_correct_rate(test_df, type)]
    return correct_df

In [8]:
rag_correct_df = correct_rates_test_df(rag_df)
rag_correct_df.head()

Unnamed: 0,question type,correct rate
0,inference_query,0.909091
1,comparison_query,0.542857
2,temporal_query,0.26087
3,null_query,0.888889
4,total,0.63


In [9]:
no_context_correct_df = correct_rates_test_df(no_context_df)
no_context_correct_df

Unnamed: 0,question type,correct rate
0,inference_query,0.090909
1,comparison_query,0.0
2,temporal_query,0.0
3,null_query,1.0
4,total,0.12


In [14]:
graphrag_correct_df = correct_rates_test_df(graphrag_df)
graphrag_correct_df

Unnamed: 0,question type,correct rate
0,inference_query,0.848485
1,comparison_query,0.314286
2,temporal_query,0.043478
3,null_query,0.888889
4,total,0.48


In [16]:
graphrag_df[:50]

Unnamed: 0,Question,Graph RAG answer,answer,correctness
0,Who is the individual associated with the cryp...,SAM BANKMAN-FRIED,Sam Bankman-Fried,True
1,Which individual is implicated in both inflati...,DONALD TRUMP,Donald Trump,True
2,Who is the figure associated with generative A...,SAM ALTMAN,Sam Altman,True
3,Do the TechCrunch article on software companie...,Insufficient information,Yes,False
4,Which online betting platform provides a welco...,BetMGM Sportsbook,Caesars Sportsbook,False
5,Who is the individual alleged to have built a ...,SAM BANKMAN-FRIED,Sam Bankman-Fried,True
6,Does the TechCrunch article on Twitch's subscr...,Yes,Yes,True
7,Does 'The New York Times' article attribute th...,Insufficient information,Yes,False
8,What is the name of the organization discussed...,OpenAI,OpenAI,True
9,"Which company, as reported by both TechCrunch ...",GOOGLE,Google,True
