In [1]:
%load_ext autoreload
%autoreload 2
import glob 
import json 

evaluation_files = glob.glob("../evaluations/emotion_*.json")

evaluations = []

for file in evaluation_files:
    print(file)
    with open(file) as f:
        evaluation = json.load(f)
        evaluation["file"] = file.split("/")[-1]
        evaluations.append(evaluation)

../evaluations/emotion_bertweet_base.json
../evaluations/emotion_beto.json
../evaluations/emotion_mbert_es.json
../evaluations/emotion_mbert_en.json
../evaluations/emotion_roberta.json
../evaluations/emotion_bert_base.json
../evaluations/emotion_distilbert_en.json
../evaluations/emotion_distilbert_es.json


In [2]:
import pandas as pd 

df = pd.DataFrame([
    {**evaluation, **evaluation["metrics"]} for evaluation in evaluations
])

df.drop(labels=["predictions", "labels", "metrics", "file"], inplace=True, axis=1)
df["model"] = df["model"].str.replace("models/", "")
df["model"] = df["model"].str.replace("-emotion-analysis/", "")
df.columns = [col.replace("test_", "").replace("_", " ").capitalize() for col in df.columns]
#df.set_index("Model", inplace=True)
df = df.sort_values(["Lang", "Macro f1"]) 

df

Unnamed: 0,Model,Lang,Loss,Others f1,Others precision,Others recall,Joy f1,Joy precision,Joy recall,Sadness f1,...,Disgust recall,Fear f1,Fear precision,Fear recall,Macro f1,Macro precision,Macro recall,Acc,Runtime,Samples per second
6,distilbert-en,en,1.378156,0.509407,0.633094,0.42615,0.665484,0.60778,0.735294,0.502165,...,0.413613,0.201835,0.15493,0.289474,0.382939,0.366219,0.418844,0.503286,5.4081,337.642
3,mbert-en,en,1.386493,0.547718,0.63871,0.479419,0.650691,0.613913,0.692157,0.544643,...,0.434555,0.255319,0.214286,0.315789,0.393619,0.379647,0.419032,0.515882,9.2533,197.335
5,bert-base,en,1.27454,0.59398,0.663677,0.53753,0.683521,0.654122,0.715686,0.541485,...,0.434555,0.252632,0.210526,0.315789,0.439238,0.420341,0.468907,0.559146,9.6085,190.041
4,roberta-base,en,1.233089,0.573888,0.704225,0.484262,0.689527,0.653779,0.729412,0.547945,...,0.633508,0.255814,0.229167,0.289474,0.445122,0.431733,0.490829,0.562979,9.5375,191.455
0,bertweet-base,en,1.172043,0.606019,0.696541,0.53632,0.711069,0.681655,0.743137,0.608696,...,0.52356,0.26,0.209677,0.342105,0.475686,0.45633,0.513214,0.584337,9.2916,196.522
7,distilbert-es,es,1.118434,0.678962,0.78022,0.600967,0.60223,0.546067,0.671271,0.716707,...,0.090909,0.461538,0.352941,0.666667,0.463233,0.437966,0.512605,0.600477,7.912,211.957
2,mbert-es,es,1.136714,0.691127,0.770833,0.62636,0.585242,0.542453,0.635359,0.710462,...,0.151515,0.431373,0.333333,0.611111,0.473628,0.449092,0.516691,0.610018,13.7742,121.749
1,beto,es,1.094156,0.74937,0.781866,0.719468,0.671916,0.64,0.707182,0.753695,...,0.121212,0.533333,0.444444,0.666667,0.547506,0.541121,0.568213,0.687537,10.6171,157.953


In [3]:
f1_columns = [col for col in df.columns if "f1" in col and "Macro" not in col]
print(df[["Model", "Lang"] + f1_columns + ["Macro f1"]].to_latex(index=False, float_format="{0:.3f}".format))

\begin{tabular}{llrrrrrrrr}
\toprule
        Model & Lang &  Others f1 &  Joy f1 &  Sadness f1 &  Anger f1 &  Surprise f1 &  Disgust f1 &  Fear f1 &  Macro f1 \\
\midrule
distilbert-en &   en &      0.509 &   0.665 &       0.502 &     0.303 &        0.148 &       0.351 &    0.202 &     0.383 \\
     mbert-en &   en &      0.548 &   0.651 &       0.545 &     0.297 &        0.103 &       0.357 &    0.255 &     0.394 \\
    bert-base &   en &      0.594 &   0.684 &       0.541 &     0.356 &        0.238 &       0.409 &    0.253 &     0.439 \\
 roberta-base &   en &      0.574 &   0.690 &       0.548 &     0.364 &        0.212 &       0.473 &    0.256 &     0.445 \\
bertweet-base &   en &      0.606 &   0.711 &       0.609 &     0.434 &        0.258 &       0.452 &    0.260 &     0.476 \\
distilbert-es &   es &      0.679 &   0.602 &       0.717 &     0.434 &        0.264 &       0.085 &    0.462 &     0.463 \\
     mbert-es &   es &      0.691 &   0.585 &       0.710 &     0.491 &        

In [4]:
f1_columns = [col for col in df.columns if "f1" in col and "Macro" not in col]
print(df[["Model", "Lang"] + f1_columns + ["Macro f1"]].to_markdown(index=False))

| Model         | Lang   |   Others f1 |   Joy f1 |   Sadness f1 |   Anger f1 |   Surprise f1 |   Disgust f1 |   Fear f1 |   Macro f1 |
|:--------------|:-------|------------:|---------:|-------------:|-----------:|--------------:|-------------:|----------:|-----------:|
| distilbert-en | en     |    0.509407 | 0.665484 |     0.502165 |   0.30303  |      0.147541 |     0.351111 |  0.201835 |   0.382939 |
| mbert-en      | en     |    0.547718 | 0.650691 |     0.544643 |   0.29703  |      0.102941 |     0.356989 |  0.255319 |   0.393619 |
| bert-base     | en     |    0.59398  | 0.683521 |     0.541485 |   0.355769 |      0.238411 |     0.408867 |  0.252632 |   0.439238 |
| roberta-base  | en     |    0.573888 | 0.689527 |     0.547945 |   0.363636 |      0.212389 |     0.472656 |  0.255814 |   0.445122 |
| bertweet-base | en     |    0.606019 | 0.711069 |     0.608696 |   0.433862 |      0.257669 |     0.452489 |  0.26     |   0.475686 |
| distilbert-es | es     |    0.678962 | 0.60223