# Results

Update *results.json* from *index.json*

In [6]:
import pickle
from sklearn.metrics import f1_score
import json
import matplotlib.pyplot as plt
import pandas as pd


with open("results/index.json", "r") as f:
    experiments = json.load(f)

ids = []
tasks = []
llm = []
emb_types = []
max_lens = []
balanced = []
dropouts = []
freezes = []

test_zf1s = []
test_of1s = []
test_mf1s = []

rw_zf1s = []
rw_of1s = []
rw_mf1s = []

for exp, params in experiments.items():

    with open("results/experiments/" + exp + ".pkl", "rb") as f:
        out_exp = pickle.load(f)

    ids.append(exp)
    tasks.append(params["task"])
    llm.append(params["llm"])
    emb_types.append(params['emb_type'])
    max_lens.append(params["max_len"])
    balanced.append(params["balanced"])
    dropouts.append(params["dropout"])
    freezes.append(params["freeze"])        


    if params['task'] in ["Q_CONCETTI", "CONCETTI"]:

        # Test.

        all_preds = []
        all_labels = []

        for l, p, w, ans in zip(out_exp['test_labels'], out_exp['test_preds'], out_exp['test_iConcPeso'], out_exp['test_iAnswerId']):
            all_preds.append((p*w, ans))
            all_labels.append((l, ans))

        test_preds = []
        test_labels = []

        for iAnsId in set(out_exp['test_iAnswerId']):
            pAns = [p for (p, ans) in all_preds if ans == iAnsId]
            lAns = [l for (l, ans) in all_labels if ans == iAnsId]

            if sum(lAns) == 0 or (sum(lAns) > 0 and sum(lAns) == len(lAns)):
                test_labels.append(lAns[0])
                test_preds.append(1 if sum(pAns) > params['threshold'] else 0)   

        # Real-World.

        all_preds = []
        all_labels = []

        for l, p, w, ans in zip(out_exp['rw_labels'], out_exp['rw_preds'], out_exp['rw_iConcPeso'], out_exp['rw_iAnswerId']):
            all_preds.append((p*w, ans))
            all_labels.append((l, ans))


        rw_preds = []
        rw_labels = []
        for iAnsId in set(out_exp['rw_iAnswerId']):
            pAns = [p for (p, ans) in all_preds if ans == iAnsId]
            lAns = [l for (l, ans) in all_labels if ans == iAnsId]

            if sum(lAns) == 0 or (sum(lAns) > 0 and sum(lAns) == len(lAns)):
                rw_labels.append(lAns[0])
                rw_preds.append(1 if sum(pAns) > params['threshold'] else 0)   

    else:
        test_labels = out_exp['test_labels']
        test_preds = out_exp['test_preds']

        rw_labels = out_exp['rw_labels']
        rw_preds = out_exp['rw_preds']


    test_zf1s.append(round(f1_score(test_labels, test_preds, average='binary', pos_label=0, zero_division=0), 2))
    test_of1s.append(round(f1_score(test_labels, test_preds, average='binary', pos_label=1, zero_division=0), 2))
    test_mf1s.append(round(f1_score(test_labels, test_preds, average='macro', pos_label=1, zero_division=0), 2))

    rw_zf1s.append(round(f1_score(rw_labels, rw_preds, average='binary', pos_label=0, zero_division=0), 2))
    rw_of1s.append(round(f1_score(rw_labels, rw_preds, average='binary', pos_label=1, zero_division=0), 2))
    rw_mf1s.append(round(f1_score(rw_labels, rw_preds, average='macro', pos_label=1, zero_division=0), 2))

    
df_res = pd.DataFrame({"iId": ids, "task": tasks, "llm": llm, "emb_type": emb_types, "max_len": max_lens, "balanced": balanced, "dropout": dropouts, "freeze": freezes, "TE-0F1": test_zf1s, "TE-1F1": test_of1s, "TE-MF1": test_mf1s, "RW-0F1": rw_zf1s, "RW-1F1": rw_of1s, "RW-MF1": rw_mf1s})

df_res

1
1
1
1
0
1
1
1
1
1
1
1
1


Unnamed: 0,iId,task,llm,emb_type,max_len,balanced,dropout,freeze,TE-0F1,TE-1F1,TE-MF1,RW-0F1,RW-1F1,RW-MF1
0,1690536127622,DOPPIA,MULTI_UNCASED,EMB_MAX,128,1,0.1,1,0.24,0.0,0.12,0.06,0.0,0.03
