In [1]:
import pickle
import pandas as pd
import math
from utils import *

config = load_config()

In [2]:
# PROJECT_PATH = config.project_path
PROJECT_PATH = config.file_path
DATA_PATH = PROJECT_PATH.joinpath("data/processed")

def load_data( name ) :
    with open(DATA_PATH.joinpath(name), 'rb') as f :
        data = pickle.load(f)
    return data


In [3]:
def print_results(df) :
    df = df.drop(columns = ['cv', 'topN']).copy()
    df = df.groupby(['model', 'shots']).agg({
                                        'f1': lambda x : "{mean:.3f} ({hi:.3f}/{lo:.3f})".format(mean = x.mean(), hi=x.mean() + 1.96*x.std()/math.sqrt(5),lo=x.mean() - 1.96*x.std()/math.sqrt(5)),
                                        'precision':lambda x : "{mean:.3f} ({hi:.3f}/{lo:.3f})".format(mean = x.mean(), hi=x.mean() + 1.96*x.std()/math.sqrt(5),lo=x.mean() - 1.96*x.std()/math.sqrt(5)),
                                        'recall': lambda x : "{mean:.3f} ({hi:.3f}/{lo:.3f})".format(mean = x.mean(), hi=x.mean() + 1.96*x.std()/math.sqrt(5),lo=x.mean() - 1.96*x.std()/math.sqrt(5)),
                                        'mrr': lambda x : "{mean:.3f} ({hi:.3f}/{lo:.3f})".format(mean = x.mean(), hi=x.mean() + 1.96*x.std()/math.sqrt(5),lo=x.mean() - 1.96*x.std()/math.sqrt(5)),
                                        })
    return df.sort_values(by=['model','shots'], ascending=False)

In [4]:
def show_results(topn, model_names : list) :
    eval_results = []
    for model_name in model_names :
        eval_results.append(load_data(f"{topn}_{model_name}_zeroshot_evaluation.pkl"))
        eval_results.append(load_data(f"{topn}_{model_name}_fewshot_evaluation.pkl"))
    results = []
    for evaluation in eval_results :
        for record in evaluation :
            results.append(record)
    myResults = pd.DataFrame(results)
    return print_results(myResults)

In [7]:
show_results("top3", ["mistral7b_mimic", "biomistral7b_mimic"])

Unnamed: 0_level_0,Unnamed: 1_level_0,f1,precision,recall,mrr
model,shots,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
mistral7b_mimic_finetuned,zeroshot,0.311 (0.326/0.297),0.349 (0.364/0.334),0.281 (0.296/0.266),0.484 (0.511/0.457)
mistral7b_mimic_finetuned,fewshot,0.348 (0.357/0.338),0.323 (0.333/0.312),0.378 (0.390/0.365),0.693 (0.715/0.672)
biomistral7b_avigon_mimic_finetuned,zeroshot,0.287 (0.297/0.277),0.329 (0.339/0.319),0.255 (0.265/0.245),0.549 (0.575/0.523)
biomistral7b_avigon_mimic_finetuned,fewshot,0.283 (0.290/0.275),0.367 (0.374/0.359),0.230 (0.239/0.221),0.551 (0.577/0.525)


In [8]:
# top 5
show_results("top5", ["mistral7b_mimic", "biomistral7b_mimic"])

Unnamed: 0_level_0,Unnamed: 1_level_0,f1,precision,recall,mrr
model,shots,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
mistral7b_mimic_finetuned,zeroshot,0.341 (0.352/0.330),0.360 (0.373/0.347),0.324 (0.334/0.313),0.481 (0.506/0.456)
mistral7b_mimic_finetuned,fewshot,0.377 (0.384/0.370),0.334 (0.341/0.326),0.433 (0.443/0.423),0.672 (0.685/0.658)
biomistral7b_avigon_mimic_finetuned,zeroshot,0.306 (0.321/0.292),0.344 (0.358/0.330),0.277 (0.292/0.262),0.568 (0.594/0.543)
biomistral7b_avigon_mimic_finetuned,fewshot,0.304 (0.309/0.300),0.360 (0.366/0.353),0.264 (0.270/0.258),0.544 (0.567/0.521)


In [9]:
# top 10
show_results("top10", ["mistral7b_mimic", "biomistral7b_mimic"])

Unnamed: 0_level_0,Unnamed: 1_level_0,f1,precision,recall,mrr
model,shots,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
mistral7b_mimic_finetuned,zeroshot,0.398 (0.410/0.387),0.357 (0.370/0.343),0.452 (0.463/0.440),0.535 (0.555/0.515)
mistral7b_mimic_finetuned,fewshot,0.381 (0.390/0.372),0.301 (0.310/0.292),0.520 (0.531/0.509),0.672 (0.690/0.653)
biomistral7b_avigon_mimic_finetuned,zeroshot,0.332 (0.338/0.325),0.324 (0.335/0.313),0.340 (0.347/0.333),0.569 (0.602/0.535)
biomistral7b_avigon_mimic_finetuned,fewshot,0.348 (0.355/0.341),0.338 (0.345/0.331),0.358 (0.367/0.349),0.565 (0.584/0.546)


In [8]:
# top10

mistral7b_finetuned_zeroshot = load_data("top10_mistral7b_finetuned_zeroshot_evaluation.pkl")
mistral7b_finetuned_fewshot = load_data("top10_mistral7b_finetuned_fewshot_evaluation.pkl")
# biomistral7b_finetuned_zeroshot = load_data("top10_biomistral7b_avigon_zeroshot_evaluation.pkl")
# biomistral7b_finetuned_fewshot = load_data("top10_biomistral7b_avigon_fewshot_evaluation.pkl")
biomistral7b_avigon_finetuned_zeroshot = load_data("top10_biomistral7b_finetuned_zeroshot_evaluation.pkl")
biomistral7b_avigon_finetuned_fewshot = load_data("top10_biomistral7b_finetuned_fewshot_evaluation.pkl")

evaluation_results = [ mistral7b_finetuned_zeroshot, mistral7b_finetuned_fewshot,
                      # biomistral7b_finetuned_zeroshot, biomistral7b_finetuned_fewshot]
                      biomistral7b_avigon_finetuned_zeroshot, biomistral7b_avigon_finetuned_fewshot]

# mistral7b_finetuned_zeroshot = load_data("top5_mistral7b_modified_zeroshot_evaluation.pkl")
# mistral7b_finetuned_fewshot = load_data("top5_mistral7b_modified_fewshot_evaluation.pkl")
# biomistral7b_finetuned_zeroshot = load_data("top5_biomistral7b_avigon_modified_zeroshot_evaluation.pkl")
# biomistral7b_finetuned_fewshot = load_data("top5_biomistral7b_avigon_modified_fewshot_evaluation.pkl")
# biomistral7b_avigon_finetuned_zeroshot = load_data("top5_biomistral7b_finetuned_zeroshot_evaluation.pkl")
# biomistral7b_avigon_finetuned_fewshot = load_data("top5_biomistral7b_finetuned_fewshot_evaluation.pkl")

# evaluation_results = [mistral7b_finetuned_zeroshot, mistral7b_finetuned_fewshot,
#                       biomistral7b_finetuned_zeroshot, biomistral7b_finetuned_fewshot,]
                      # biomistral7b_avigon_finetuned_zeroshot, biomistral7b_avigon_finetuned_fewshot]
results = []
for evaluation in evaluation_results :
    for record in evaluation :
        results.append(record)

myResults = pd.DataFrame(results)

In [9]:
t = print_results(myResults)
t

Unnamed: 0_level_0,Unnamed: 1_level_0,f1,precision,recall,mrr
model,shots,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
mistral7b_finetuned,zeroshot,0.368 (0.392/0.343),0.342 (0.378/0.306),0.403 (0.436/0.370),0.576 (0.591/0.561)
mistral7b_finetuned,fewshot,0.386 (0.399/0.374),0.335 (0.343/0.327),0.458 (0.492/0.424),0.680 (0.701/0.658)
biomistral7b_finetuned,zeroshot,0.333 (0.354/0.311),0.320 (0.333/0.306),0.349 (0.385/0.312),0.589 (0.620/0.557)
biomistral7b_finetuned,fewshot,0.280 (0.318/0.242),0.313 (0.358/0.268),0.257 (0.301/0.213),0.435 (0.499/0.372)


These are just left for insurance. I may need them someday

In [13]:
import math
stats = myResults.groupby(['model','shots'])[['precision', 'recall', 'f1', 'mrr']].agg(['mean', 'count', 'std'])

for score in ['precision','recall', 'f1', 'mrr'] : 
    ci95_hi = []
    ci95_lo = []
    for i in stats.index:
        m, c, s = stats.loc[i][score]
        ci95_hi.append(m + 1.96*s/math.sqrt(c))
        ci95_lo.append(m - 1.96*s/math.sqrt(c))

    # stats[score]['ci95_hi'] = ci95_hi
    # stats[score]['ci95_lo'] = ci95_lo
    stats.loc[:,(score,'ci95_hi')] = ci95_hi
    stats.loc[:,(score,'ci95_lo')] = ci95_lo

In [10]:
# precision
stats['precision']

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,count,std,ci95_hi,ci95_lo
model,shots,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mistral7b_mimic_finetuned,fewshot,0.1376,5,0.002074,0.139418,0.135782
mistral7b_mimic_finetuned,zeroshot,0.144,5,0.005568,0.14888,0.13912


In [11]:
# recall
stats['recall']

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,count,std,ci95_hi,ci95_lo
model,shots,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mistral7b_mimic_finetuned,fewshot,0.1426,5,0.006693,0.148467,0.136733
mistral7b_mimic_finetuned,zeroshot,0.1424,5,0.004775,0.146585,0.138215


In [12]:
# f1
stats['f1']

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,count,std,ci95_hi,ci95_lo
model,shots,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mistral7b_mimic_finetuned,fewshot,0.1398,5,0.004087,0.143382,0.136218
mistral7b_mimic_finetuned,zeroshot,0.1432,5,0.004712,0.14733,0.13907


In [13]:
# mrr
stats['mrr']

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,count,std,ci95_hi,ci95_lo
model,shots,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mistral7b_mimic_finetuned,fewshot,0.1822,5,0.017754,0.197762,0.166638
mistral7b_mimic_finetuned,zeroshot,0.0918,5,0.011692,0.102048,0.081552
