# Setup

## Modify These

In [None]:
# Path to grab the results from the model you're evaluating
model1_results_path = '/content/drive/MyDrive/266/source_of_truth/baseline_models/generic_qa/eval_predictions.json' # Baseline
model2_results_path = '/content/drive/MyDrive/266/experiments/models/4_8__5-3_contin_learn_tdapt_b_m/eval_predictions.json' # Winning Model

## Will Not Need Regular Changes

In [None]:
# Mounting Google Drive to Colab
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Imports
import pandas as pd
import json

In [None]:
# Path to grab the ground truth results
ground_truth_path = '/content/drive/MyDrive/266/source_of_truth/data/squad.film.test.squad_format.json'

In [None]:
## Making the model predictions DF for model 1

# Reading in the JSON
f = open(model1_results_path, "r")
data = json.load(f)

# Turning the JSON into a DF
model1_predictions = pd.DataFrame.from_dict(data, orient= 'index')
model1_predictions.reset_index(inplace=True)
model1_predictions.rename(columns={'index': 'id', 0: 'model1_prediction'}, inplace=True)

In [None]:
## Making the model predictions DF for model 2

# Reading in the JSON
f = open(model2_results_path, "r")
data = json.load(f)

# Turning the JSON into a DF
model2_predictions = pd.DataFrame.from_dict(data, orient= 'index')
model2_predictions.reset_index(inplace=True)
model2_predictions.rename(columns={'index': 'id', 0: 'model2_prediction'}, inplace=True)

In [None]:
## Making the ground truth DF

# Reading in the JSON
f = open(ground_truth_path, "r")
data = json.load(f)

# Making the JSON into a DF
ground_truth = pd.DataFrame(columns=['id', 'context', 'question', 'true_answer'])

data_list = []

for i in range(len(data)):
    row_dict = dict.fromkeys(['id', 'context', 'question', 'true_answer'])

    row_dict['id'] = data[i]['id']
    # row_dict['title'] = data[i]['title']
    row_dict['context'] = data[i]['context']
    row_dict['question'] = data[i]['question']
    # row_dict['answer_start'] = ' '.join([str(elem) for elem in data[i]['answers']['answer_start']])
    row_dict['true_answer'] = ' '.join([str(elem) for elem in data[i]['answers']['text']])

    data_list.append(row_dict)

ground_truth = pd.DataFrame(data_list, columns=['id', 'context', 'question', 'true_answer'])

In [None]:
# Merging the results into one DF
combined = pd.merge(ground_truth[['id','context','question','true_answer']], model1_predictions[['id','model1_prediction']], on='id')
combined = pd.merge(combined, model2_predictions[['id','model2_prediction']], on='id')

# Noting exact matches
combined['exact_match_model1'] = combined['model1_prediction'] == combined['true_answer']
combined['exact_match_model2'] = combined['model2_prediction'] == combined['true_answer']

# Analyzing Results

## Exact Match Rates

In [None]:
combined['exact_match_model1'].value_counts()

exact_match_model1
True     387
False    368
Name: count, dtype: int64

In [None]:
combined['exact_match_model2'].value_counts()

exact_match_model2
True     409
False    346
Name: count, dtype: int64

## Swap Sets

### Swap-Ins
Model1 gets it wrong, model 2 gets it right

In [None]:
len(combined[(combined['exact_match_model1'] == False) & (combined['exact_match_model2'] == True)])

36

In [None]:
combined[(combined['exact_match_model1'] == False) & (combined['exact_match_model2'] == True)]

Unnamed: 0,id,context,question,true_answer,model1_prediction,model2_prediction,exact_match_model1,exact_match_model2
5,5,"Andy’s dad — let’s call him Dad Davis, since i...",what is andy's last name in toy story,Davis,Dad Davis,Davis,False,True
28,28,"Teen Wolf premiered on June 5, 2011, following...",what channel is teen wolf on,MTV,2011 MTV,MTV,False,True
35,35,$50 Million. M. Night Shyamalan net worth and ...,net worth m night shyamalan,$50 Million,M. Night Shyamalan is an Indian-American direc...,$50 Million,False,True
61,61,Vengeance (comics) Vengeance is a fictional Am...,what ghost rider comics have vengeance in them,Vengeance,Vengeance (comics) Vengeance is a fictional Am...,Vengeance,False,True
64,64,Get ready for another addition to the line of ...,what disney movie is moana in,Moana,"Disney heroines: Moana, an ocean adventurer. M...",Moana,False,True
65,65,Hebrew Meaning: The name Annie is a Hebrew bab...,meaning of name annie,Prayer,In Hebrew the meaning of the name Annie is: Pr...,Prayer,False,True
111,111,1 Percival Graves (played by Colin Farrell) – ...,who plays percival graves,Colin Farrell,A powerful auror,Colin Farrell,False,True
162,162,Matt Bomer. Matthew Staton Matt Bomer (born Oc...,who plays neil caffrey,Matt Bomer,Matt Bomer. Matthew Staton Matt Bomer,Matt Bomer,False,True
163,163,About the Author. Adam Rubin and Daniel Salmie...,who is the author of dragons love tacos,Adam Rubin,Adam Rubin and Daniel Salmieri,Adam Rubin,False,True
174,174,Danny Masterson Net Worth. Danny Masterson Net...,danny masterson net worth,$22 Million,Danny Masterson Net Worth is $22 Million,$22 Million,False,True


### Swap-Outs
Model1 gets it right, model 2 gets it wrong

In [None]:
len(combined[(combined['exact_match_model1'] == True) & (combined['exact_match_model2'] == False)])

14

In [None]:
combined[(combined['exact_match_model1'] == True) & (combined['exact_match_model2'] == False)]

Unnamed: 0,id,context,question,true_answer,model1_prediction,model2_prediction,exact_match_model1,exact_match_model2
0,0,The Sea of Monsters received good reviews and ...,how many copies of sea monsters,100000,100000,"over 100,000",True,False
24,24,"Doctor Dolittle has a few close human friends,...",what animal is gub gub,a pig,a pig,pig,True,False
42,42,Merida (Disney) Princess Merida of DunBroch (S...,disney princess brave,Merida,Merida,Merida of DunBroch,True,False
54,54,"The pickups of Season 2, 3 and now 4 all have ...",how many seasons of black sails are there,4,4,3,True,False
245,245,A large part of the success of the character o...,who is the character ducky on ncis based on,David McCallum,David McCallum,Dr. Donald Ducky Mallard,True,False
318,318,Alexandra Daddario. Alexandra Anna Daddario (b...,who is the alexandra daddario,an American actress,an American actress,actress,True,False
431,431,The 1995 theatrical film Gordy was originally ...,what was the pig's name on green acres,Arnold Ziffel,Arnold Ziffel,Gordy,True,False
583,583,ABC. The season 1 finale of How To Get Away Wi...,who did wes kill on how to get away with murder,Rebecca Sutter,Rebecca Sutter,Lila Stangard,True,False
603,603,1 Marvel Knights Daredevil 2099 is Samuel Fisk...,what is the name of daredevil 2099,Samuel Fisk,Samuel Fisk,Daredevil,True,False
642,642,As awesome and satisfying and all-around amazi...,who plays aria in game of thrones,Maisie Williams,Maisie Williams,Arya Stark,True,False


### Out-Outs
Both models get it wrong

In [None]:
len(combined[(combined['exact_match_model1'] == False) & (combined['exact_match_model2'] == False)])

332

In [None]:
combined[(combined['exact_match_model1'] == False) & (combined['exact_match_model2'] == False)]

Unnamed: 0,id,context,question,true_answer,model1_prediction,model2_prediction,exact_match_model1,exact_match_model2
1,1,Bob Parr is the first human protagonist in a P...,bob par meaning,Bob Parr is the first human protagonist in a P...,the first Pixar hero to kill people,to kill people,False,False
6,6,What is chumlee from pawn stars real name? The...,what is chumlee's real name,Chumlee’s real name is Austin Lee Russell.,Austin Lee Russell,Austin Lee Russell,False,False
8,8,Blue Panther Jr. Blue Panther Jr. is the ring ...,what is blue panther,Blue Panther Jr. is the ring name of a Mexican...,"a Mexican luchador enmascarado, or masked prof...","a Mexican luchador enmascarado, or masked prof...",False,False
9,9,Alison Schapker. Alison Schapker is an America...,alison schapker,Alison Schapker is an American television writ...,Alison Schapker is an American television writ...,Alison Schapker is an American television writ...,False,False
11,11,Disney XD (TV channel on DISH Network) DISH Ne...,what is disney channel number,DISH Network Channel Number: 174.,174,174,False,False
...,...,...,...,...,...,...,...,...
748,748,Ice Age: Continental Drift. Ice Age: Continent...,ice age continental drift steffie wiki,a 2012 American 3-D computer-animated adventur...,Ice Age: Continental Drift,Ice Age: Continental Drift,False,False
749,749,Cellular (film) Cellular is a 2004 American ac...,what is cellular,a 2004 American action crime thriller film dir...,a 2004 American action crime thriller film,action crime thriller film,False,False
752,752,Barkley is a Muppet dog who appears on Sesame ...,what breed of dog is barkley,a Muppet dog who appears on Sesame Street.,Muppet,Muppet dog,False,False
753,753,"Starring: Joseph Gordon-Levitt, Melissa Leo, S...",cast of snowden,"Joseph Gordon-Levitt, Melissa Leo, Shailene Wo...",Snowden,Snowden,False,False
