# Results extraction for paper

In [1]:
import os
import glob
import json
import shutil
import pandas as pd
import numpy as np

## general blurb results table

In [3]:
exp_results = []
for pred_path in glob.glob("out/*/predict_results.json"):
    if "debug" in pred_path: continue
    metrics_results = json.load(open(pred_path))
    metrics_results = {k.replace("predict_",""):v for k,v in metrics_results.items()}
    
    if "ncbi_disease" in pred_path:
        pred_path = pred_path.replace("ncbi_disease","ncbi-disease")
    if "bert-base" not in pred_path:
        pred_path = pred_path.replace("_","-",1)
    model, corpus, seed = pred_path.split('/')[-2].split('_')
    config = {"model":model,"corpus":corpus,"seed":int(seed.replace('seed',''))}
    exp_results.append(config|metrics_results)

In [8]:
df = pd.DataFrame(exp_results).sort_values(by=["model","corpus","seed"])
df = df.drop(columns = ['loss', 'precision','recall', 'runtime', 'samples_per_second', 'steps_per_second','train_mse', 'test_mse'])
df.head(50)

Unnamed: 0,model,corpus,seed,accuracy,f1,pearsonr
2,bert-base-uncased,bioasq-task-b,0,0.754083,,
25,bert-base-uncased,bioasq-task-b,1,0.754083,,
52,bert-base-uncased,bioasq-task-b,2,0.757925,,
62,bert-base-uncased,bioasq-task-b,3,0.556196,,
66,bert-base-uncased,bioasq-task-b,4,0.761768,,
58,bert-base-uncased,biosses,0,,,0.866163
76,bert-base-uncased,biosses,1,,,0.864847
27,bert-base-uncased,biosses,2,,,0.866266
40,bert-base-uncased,biosses,3,,,0.866764
65,bert-base-uncased,biosses,4,,,0.865847


In [19]:
metric_dataset = {
    "f1":[
        "blurb-bc5chem",
        "blurb-bc5disease",
        "blurb-jnlpba",
        "blurb-ncbi-disease",
        "blurb-bc2gm",
        "hoc"
    ],
    "accuracy":[
        "pubmed-qa",
        "bioasq-task-b",
    ],
    "pearsonr":[
        "biosses"
    ],
}


def get_perf(row):
    for m,d in metric_dataset.items():
        if row["corpus"] in t:
            return row[m]

In [22]:
# Group by model and corpus and calculate mean and std
str_mean_and_std = lambda x : "{:.2f}%+/-{:.2f}".format(np.mean(x)*100,np.std(x)*100)
agg_df = df.groupby(["model","corpus"]).agg(str_mean_and_std)
agg_df = agg_df.reset_index()
agg_df["performance"] = agg_df.apply(get_perf,axis=1)
agg_df = agg_df[["model","corpus","performance"]]
agg_df

Unnamed: 0,model,corpus,performance
0,bert-base-uncased,bioasq-task-b,71.68%+/-8.04
1,bert-base-uncased,biosses,86.52%+/-0.11
2,bert-base-uncased,blurb-bc2gm,75.32%+/-0.79
3,bert-base-uncased,blurb-bc5chem,87.31%+/-0.57
4,bert-base-uncased,blurb-bc5disease,77.09%+/-1.01
5,bert-base-uncased,blurb-jnlpba,76.77%+/-0.86
6,bert-base-uncased,blurb-ncbi-disease,81.59%+/-1.03
7,bert-base-uncased,hoc,79.22%+/-1.14
8,bert-base-uncased,pubmed-qa,55.40%+/-0.25
9,h-index-mid25%,blurb-bc5chem,89.27%+/-0.28


In [25]:
pivoted_df = agg_df.pivot(index='model', columns='corpus', values='performance')
pivoted_df.columns = [col for col in pivoted_df.columns]
pivoted_df

Unnamed: 0_level_0,bioasq-task-b,biosses,blurb-bc2gm,blurb-bc5chem,blurb-bc5disease,blurb-jnlpba,blurb-ncbi-disease,hoc,pubmed-qa
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
bert-base-uncased,71.68%+/-8.04,86.52%+/-0.11,75.32%+/-0.79,87.31%+/-0.57,77.09%+/-1.01,76.77%+/-0.86,81.59%+/-1.03,79.22%+/-1.14,55.40%+/-0.25
h-index-mid25%,,,,89.27%+/-0.28,,,84.93%+/-1.35,,
random-25%,,,,90.03%+/-0.47,,,84.80%+/-0.78,,
sjr-mid25%,,,78.57%+/-0.00,89.72%+/-0.40,81.53%+/-0.00,78.08%+/-0.68,85.31%+/-0.00,,
sjr-top25%,,,,89.70%+/-0.80,,,83.49%+/-0.74,,
