In [1]:
%load_ext autoreload
%autoreload 2
import glob 
import json 

evaluation_files = glob.glob("../evaluations/sentiment_*.json")

evaluations = []

for file in evaluation_files:
    print(file)
    with open(file) as f:
        evaluation = json.load(f)
        evaluation["file"] = file.split("/")[-1]
        evaluations.append(evaluation)

../evaluations/sentiment_beto.json
../evaluations/sentiment_roberta_base.json
../evaluations/sentiment_mbert_en.json
../evaluations/sentiment_bert_base.json
../evaluations/sentiment_distilbert_es.json
../evaluations/sentiment_bertweet_base.json
../evaluations/sentiment_mbert_es.json
../evaluations/sentiment_distilbert_en.json


In [2]:
import pandas as pd 

df = pd.DataFrame([
    {**evaluation, **evaluation["metrics"]} for evaluation in evaluations
])

df.drop(labels=["predictions", "labels", "metrics", "file"], inplace=True, axis=1)
df["model"] = df["model"].str.replace("models/", "")
df["model"] = df["model"].str.replace("-sentiment-analysis/", "")
df.columns = [col.replace("test_", "").replace("_", " ").capitalize() for col in df.columns]
#df.set_index("Model", inplace=True)
df = df.sort_values(["Lang", "Macro f1"]) 

df

Unnamed: 0,Lang,Model,Loss,Neg f1,Neg precision,Neg recall,Neu f1,Neu precision,Neu recall,Pos f1,Pos precision,Pos recall,Macro f1,Macro precision,Macro recall,Acc,Runtime,Samples per second
7,en,distilbert-en,0.763219,0.62282,0.660921,0.588872,0.672543,0.658175,0.687553,0.629931,0.609123,0.652211,0.641765,0.64274,0.642878,0.648811,42.2713,290.599
2,en,mbert-en,0.855557,0.665551,0.597556,0.751007,0.634016,0.697533,0.581102,0.629104,0.632992,0.625263,0.642891,0.642694,0.652457,0.644578,63.8805,192.297
1,en,roberta-base,1.206942,0.702262,0.689731,0.715257,0.682952,0.70305,0.663972,0.665721,0.641908,0.691368,0.683645,0.678229,0.690199,0.685852,62.7816,195.662
3,en,bert-base,0.7199,0.693249,0.72622,0.663142,0.688558,0.703125,0.674583,0.670165,0.60385,0.752842,0.683991,0.677732,0.696856,0.686014,62.1945,197.509
5,en,bertweet-base,1.236025,0.733372,0.680379,0.795317,0.677449,0.730657,0.631464,0.678403,0.660159,0.697684,0.696408,0.690398,0.708155,0.697248,64.9213,189.214
4,es,distilbert-es,0.975005,0.649262,0.689484,0.613474,0.479947,0.42164,0.556968,0.667876,0.717836,0.624417,0.599028,0.609653,0.598286,0.601735,23.688,306.653
6,es,mbert-es,1.359172,0.667889,0.724344,0.619599,0.475637,0.424242,0.5412,0.66942,0.686887,0.652819,0.604315,0.611824,0.604539,0.609169,37.1661,195.447
0,es,beto,1.462894,0.728972,0.772571,0.690031,0.535633,0.483607,0.600203,0.734971,0.76171,0.710047,0.666526,0.672629,0.66676,0.672219,41.9681,173.084


In [3]:
print(df[["Model", "Lang", "Neg f1", "Pos f1", "Neu f1", "Macro f1"]].to_latex(index=False, float_format="{0:.3f}".format))

\begin{tabular}{llrrrr}
\toprule
        Model & Lang &  Neg f1 &  Pos f1 &  Neu f1 &  Macro f1 \\
\midrule
distilbert-en &   en &   0.623 &   0.630 &   0.673 &     0.642 \\
     mbert-en &   en &   0.666 &   0.629 &   0.634 &     0.643 \\
 roberta-base &   en &   0.702 &   0.666 &   0.683 &     0.684 \\
    bert-base &   en &   0.693 &   0.670 &   0.689 &     0.684 \\
bertweet-base &   en &   0.733 &   0.678 &   0.677 &     0.696 \\
distilbert-es &   es &   0.649 &   0.668 &   0.480 &     0.599 \\
     mbert-es &   es &   0.668 &   0.669 &   0.476 &     0.604 \\
         beto &   es &   0.729 &   0.735 &   0.536 &     0.667 \\
\bottomrule
\end{tabular}



In [11]:
print(
    df[["Model", "Lang", "Neg f1", "Pos f1", "Neu f1", "Macro f1"]].to_markdown(index=False)
)

| Model         | Lang   |   Neg f1 |   Pos f1 |   Neu f1 |   Macro f1 |
|:--------------|:-------|---------:|---------:|---------:|-----------:|
| distilbert-en | en     | 0.62282  | 0.629931 | 0.672543 |   0.641765 |
| mbert-en      | en     | 0.665551 | 0.629104 | 0.634016 |   0.642891 |
| roberta-base  | en     | 0.702262 | 0.665721 | 0.682952 |   0.683645 |
| bert-base     | en     | 0.693249 | 0.670165 | 0.688558 |   0.683991 |
| bertweet-base | en     | 0.733372 | 0.678403 | 0.677449 |   0.696408 |
| distilbert-es | es     | 0.649262 | 0.667876 | 0.479947 |   0.599028 |
| mbert-es      | es     | 0.667889 | 0.66942  | 0.475637 |   0.604315 |
| beto          | es     | 0.728972 | 0.734971 | 0.535633 |   0.666526 |
