In [1]:
%load_ext autoreload
%autoreload 2
import glob 
import json 

evaluation_files = glob.glob("../evaluations/emotion_*.json")

evaluations = []

for file in evaluation_files:
    print(file)
    with open(file) as f:
        evaluation = json.load(f)
        evaluation["file"] = file.split("/")[-1]
        evaluations.append(evaluation)

../evaluations/emotion_bertweet_base.json
../evaluations/emotion_beto.json
../evaluations/emotion_mbert_es.json
../evaluations/emotion_roberta.json
../evaluations/emotion_bert_base.json
../evaluations/emotion_distilbert_es.json


In [2]:
import pandas as pd 

df = pd.DataFrame([
    {**evaluation, **evaluation["metrics"]} for evaluation in evaluations
])

df.drop(labels=["predictions", "labels", "metrics", "file"], inplace=True, axis=1)
df["model"] = df["model"].str.replace("models/", "")
df["model"] = df["model"].str.replace("-emotion-analysis/", "")
df.columns = [col.replace("test_", "").replace("_", " ").capitalize() for col in df.columns]
#df.set_index("Model", inplace=True)
df = df.sort_values(["Lang", "Macro f1"]) 

df

Unnamed: 0,Model,Lang,Loss,Others f1,Others precision,Others recall,Joy f1,Joy precision,Joy recall,Sadness f1,...,Disgust recall,Fear f1,Fear precision,Fear recall,Macro f1,Macro precision,Macro recall,Acc,Runtime,Samples per second
4,bert-base,en,1.448668,0.660011,0.609007,0.720339,0.680121,0.700624,0.660784,0.565445,...,0.371728,0.253968,0.32,0.210526,0.441858,0.486801,0.415784,0.601862,15.6117,116.964
0,bertweet-base,en,1.188743,0.663127,0.64672,0.680387,0.711198,0.712598,0.709804,0.58296,...,0.486911,0.166667,0.4,0.105263,0.45978,0.497312,0.452507,0.618291,15.1347,120.65
3,roberta-base,en,1.182252,0.65,0.654791,0.645278,0.7228,0.698355,0.74902,0.564315,...,0.549738,0.273973,0.285714,0.263158,0.473204,0.502806,0.471303,0.620482,14.9586,122.07
5,distilbert-es,es,1.096623,0.735577,0.731183,0.740024,0.589849,0.585831,0.593923,0.707617,...,0.0,0.564103,0.52381,0.611111,0.477111,0.492448,0.487962,0.652355,8.1527,205.698
2,mbert-es,es,1.138236,0.737153,0.720554,0.754534,0.591281,0.583333,0.599448,0.696682,...,0.0,0.606061,0.666667,0.555556,0.487876,0.514672,0.479579,0.655337,8.7886,190.815
1,beto,es,1.216769,0.762615,0.725191,0.804111,0.639087,0.660767,0.618785,0.756892,...,0.0,0.511628,0.44,0.611111,0.500856,0.50761,0.505274,0.68873,13.9726,120.021


In [4]:
f1_columns = [col for col in df.columns if "f1" in col and "Macro" not in col]
print(df[["Model", "Lang"] + f1_columns + ["Macro f1"]].to_latex(index=False, float_format="{0:.3f}".format))

\begin{tabular}{llrrrrrrrr}
\toprule
        Model & Lang &  Others f1 &  Joy f1 &  Sadness f1 &  Anger f1 &  Surprise f1 &  Disgust f1 &  Fear f1 &  Macro f1 \\
\midrule
    bert-base &   en &      0.660 &   0.680 &       0.565 &     0.299 &        0.233 &       0.401 &    0.254 &     0.442 \\
bertweet-base &   en &      0.663 &   0.711 &       0.583 &     0.312 &        0.305 &       0.477 &    0.167 &     0.460 \\
 roberta-base &   en &      0.650 &   0.723 &       0.564 &     0.329 &        0.258 &       0.515 &    0.274 &     0.473 \\
distilbert-es &   es &      0.736 &   0.590 &       0.708 &     0.520 &        0.222 &       0.000 &    0.564 &     0.477 \\
     mbert-es &   es &      0.737 &   0.591 &       0.697 &     0.513 &        0.271 &       0.000 &    0.606 &     0.488 \\
         beto &   es &      0.763 &   0.639 &       0.757 &     0.558 &        0.278 &       0.000 &    0.512 &     0.501 \\
\bottomrule
\end{tabular}

