# Results

In this notebook we show the results of our experiments for both tasks

In [1]:
%load_ext autoreload
%autoreload 2
!ls ../evaluations

emotion			    irony
emotion_bert_base.json	    sentiment
emotion_bertweet_base.json  sentiment_bert_base.json
emotion_beto.json	    sentiment_bertweet_base.json
emotion_distilbert_en.json  sentiment_beto.json
emotion_distilbert_es.json  sentiment_distilbert_en.json
emotion_mbert_en.json	    sentiment_distilbert_es.json
emotion_mbert_es.json	    sentiment_mbert_en.json
emotion_roberta.json	    sentiment_mbert_es.json
es			    sentiment_roberta_base.json
hate_speech


In [10]:

import glob 
import json 

tasks = ["sentiment", "emotion"]

files = [(task, f) for task in tasks for f in glob.glob(f"../evaluations/{task}_*.json")]

evaluations = []

for task, file in files:
    print(file)
    with open(file) as f:
        evaluation = json.load(f)
        evaluation["task"] = task
        evaluation["file"] = file.split("/")[-1]
        evaluations.append(evaluation)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
../evaluations/sentiment_beto.json
../evaluations/sentiment_roberta_base.json
../evaluations/sentiment_mbert_en.json
../evaluations/sentiment_bert_base.json
../evaluations/sentiment_distilbert_es.json
../evaluations/sentiment_bertweet_base.json
../evaluations/sentiment_mbert_es.json
../evaluations/sentiment_distilbert_en.json
../evaluations/emotion_bertweet_base.json
../evaluations/emotion_beto.json
../evaluations/emotion_mbert_es.json
../evaluations/emotion_mbert_en.json
../evaluations/emotion_roberta.json
../evaluations/emotion_bert_base.json
../evaluations/emotion_distilbert_en.json
../evaluations/emotion_distilbert_es.json


In [11]:
import pandas as pd 


dfs = {}

for task in tasks:
    df =  pd.DataFrame([
        {**evaluation, **evaluation["metrics"]} for evaluation in evaluations if evaluation["task"] == task
    ])

    df.drop(labels=["predictions", "labels", "metrics", "file"], inplace=True, axis=1)
    df["model"] = df["model"].str.replace("models/", "")

    df["model"] = df["model"].str.replace(f"-{task}-analysis/", "")
    df.columns = [col.replace("test_", "").replace("_", " ").capitalize() for col in df.columns]
    #df.set_index("Model", inplace=True)
    df = df.sort_values(["Lang", "Macro f1"]) 
    dfs[task] = df

df = dfs["sentiment"].merge(dfs["emotion"], on="Model", suffixes=("", "_emotion"))

In [12]:
df.columns

Index(['Lang', 'Model', 'Task', 'Loss', 'Neg f1', 'Neg precision',
       'Neg recall', 'Neu f1', 'Neu precision', 'Neu recall', 'Pos f1',
       'Pos precision', 'Pos recall', 'Micro f1', 'Macro f1',
       'Macro precision', 'Macro recall', 'Acc', 'Runtime',
       'Samples per second', 'Lang_emotion', 'Task_emotion', 'Loss_emotion',
       'Others f1', 'Others precision', 'Others recall', 'Joy f1',
       'Joy precision', 'Joy recall', 'Sadness f1', 'Sadness precision',
       'Sadness recall', 'Anger f1', 'Anger precision', 'Anger recall',
       'Surprise f1', 'Surprise precision', 'Surprise recall', 'Disgust f1',
       'Disgust precision', 'Disgust recall', 'Fear f1', 'Fear precision',
       'Fear recall', 'Micro f1_emotion', 'Macro f1_emotion',
       'Macro precision_emotion', 'Macro recall_emotion', 'Acc_emotion',
       'Runtime_emotion', 'Samples per second_emotion'],
      dtype='object')

In [20]:
base_columns = ["Model", "Lang"]

df.sort_values(["Lang", "Macro f1"], inplace=True) 
sentiment_columns = [
    #"Pos f1", 
    #"Neu f1", 
    #"Neg f1", 
    "Micro f1",
    "Macro f1"
]
emotion_columns = [
    #"Joy f1",
    #"Others f1",
    #"Sadness f1",
    #"Anger f1",
    #"Disgust f1",
    "Micro f1_emotion",
    "Macro f1_emotion"
]

print(df[base_columns + sentiment_columns + emotion_columns].to_latex(index=False, float_format="{0:.3f}".format, col_space=12))

\begin{tabular}{llrrrr}
\toprule
        Model &         Lang &     Micro f1 &     Macro f1 &  Micro f1\_emotion &  Macro f1\_emotion \\
\midrule
distilbert-en &           en &        0.649 &        0.642 &             0.503 &             0.383 \\
     mbert-en &           en &        0.645 &        0.643 &             0.516 &             0.394 \\
 roberta-base &           en &        0.686 &        0.684 &             0.563 &             0.445 \\
    bert-base &           en &        0.686 &        0.684 &             0.559 &             0.439 \\
bertweet-base &           en &        0.697 &        0.696 &             0.584 &             0.476 \\
distilbert-es &           es &        0.602 &        0.599 &             0.600 &             0.463 \\
     mbert-es &           es &        0.609 &        0.604 &             0.610 &             0.474 \\
         beto &           es &        0.672 &        0.667 &             0.688 &             0.548 \\
\bottomrule
\end{tabular}



In [4]:
f1_columns = [col for col in df.columns if "f1" in col and "Macro" not in col]
print(df[["Model", "Lang"] + f1_columns + ["Macro f1"]].to_markdown(index=False))

| Model         | Lang   |   Others f1 |   Joy f1 |   Sadness f1 |   Anger f1 |   Surprise f1 |   Disgust f1 |   Fear f1 |   Macro f1 |
|:--------------|:-------|------------:|---------:|-------------:|-----------:|--------------:|-------------:|----------:|-----------:|
| distilbert-en | en     |    0.509407 | 0.665484 |     0.502165 |   0.30303  |      0.147541 |     0.351111 |  0.201835 |   0.382939 |
| mbert-en      | en     |    0.547718 | 0.650691 |     0.544643 |   0.29703  |      0.102941 |     0.356989 |  0.255319 |   0.393619 |
| bert-base     | en     |    0.59398  | 0.683521 |     0.541485 |   0.355769 |      0.238411 |     0.408867 |  0.252632 |   0.439238 |
| roberta-base  | en     |    0.573888 | 0.689527 |     0.547945 |   0.363636 |      0.212389 |     0.472656 |  0.255814 |   0.445122 |
| bertweet-base | en     |    0.606019 | 0.711069 |     0.608696 |   0.433862 |      0.257669 |     0.452489 |  0.26     |   0.475686 |
| distilbert-es | es     |    0.678962 | 0.60223