# Results

In this notebook we explore the results of the experiments, which are stored in wandb

In [7]:
# Get wandb api object
import wandb

api = wandb.Api()

# Get all runs
project = api.project("pysentimiento")


# Get all runs for sentiment and lang es
runs = [r for r in api.runs("pysentimiento", {"config.lang": "it"}) if not r.sweep]

In [23]:
data = []

renames = {
    "Musixmatch/umberto-commoncrawl-cased-v1": "UmBERTo",
    "dbmdz/bert-base-italian-xxl-uncased": "BERT-it",
    "dbmdz/electra-base-italian-xxl-cased-discriminator": "Electra-it",
    "pysentimiento/robertuito-base-uncased": "RoBERTuito",
    "m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0": "AlBERTo",
}

for run in runs:
    # Get model name
    model_name = run.config["model"]
    # Get task 
    task = run.config["task"]
    # Get Macro f1 performance
    try:
        macro_f1 = run.summary["test_macro_f1"]
    except KeyError:
        print(f"Run {run.name} has no macro f1")
        print(run.summary)
        continue

    data.append({
        "model": model_name,
        "task": task,
        "macro_f1": macro_f1,
    })

Run true-elevator-360 has no macro f1
{'_wandb': {'runtime': 55}}
Run solar-armadillo-359 has no macro f1
{'_wandb': {'runtime': 61}}
Run deep-puddle-358 has no macro f1
{'_wandb': {'runtime': 13}}
Run cool-pond-357 has no macro f1
{'_wandb': {'runtime': 15}}
Run hearty-bee-356 has no macro f1
{'_wandb': {'runtime': 12}}
Run light-silence-355 has no macro f1
{'_wandb': {'runtime': 33}}
Run silver-feather-354 has no macro f1
{'_wandb': {'runtime': 60}}


In [26]:
import pandas as pd

df = pd.DataFrame(data)

df["model"] = df["model"].map(lambda x: renames[x])

# Group by model and task

grouped = df.groupby(["task", "model"])

# Get mean macro f1 plus standard deviation in a single line with +- inbetween

df_results = (grouped.mean() * 100).round(1).astype(str) + " +- " + (grouped.std() * 100).round(1).astype(str)

df_results = df_results.unstack("task")
df_results.columns = df_results.columns.droplevel(0)

df_results

task,emotion,hate_speech,irony,sentiment
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AlBERTo,72.0 +- 1.3,88.1 +- 0.4,53.7 +- 0.6,57.8 +- 0.7
BERT-it,73.6 +- 4.0,92.4 +- 0.4,62.0 +- 4.4,61.4 +- 0.9
Electra-it,64.7 +- 7.7,87.8 +- 3.0,50.0 +- 6.5,62.3 +- 0.7
RoBERTuito,64.1 +- 3.0,92.6 +- 0.3,55.6 +- 3.6,55.2 +- 2.8
UmBERTo,69.7 +- 4.6,87.3 +- 0.4,60.0 +- 2.2,62.6 +- 1.1


In [27]:
print(df_results.to_markdown())

| model      | emotion     | hate_speech   | irony       | sentiment   |
|:-----------|:------------|:--------------|:------------|:------------|
| AlBERTo    | 72.0 +- 1.3 | 88.1 +- 0.4   | 53.7 +- 0.6 | 57.8 +- 0.7 |
| BERT-it    | 73.6 +- 4.0 | 92.4 +- 0.4   | 62.0 +- 4.4 | 61.4 +- 0.9 |
| Electra-it | 64.7 +- 7.7 | 87.8 +- 3.0   | 50.0 +- 6.5 | 62.3 +- 0.7 |
| RoBERTuito | 64.1 +- 3.0 | 92.6 +- 0.3   | 55.6 +- 3.6 | 55.2 +- 2.8 |
| UmBERTo    | 69.7 +- 4.6 | 87.3 +- 0.4   | 60.0 +- 2.2 | 62.6 +- 1.1 |


In [28]:
mean_df = pd.DataFrame(data).groupby(["model", "task"]).mean()
std_df = pd.DataFrame(data).groupby(["model", "task"]).std()

# Concat mean and std
df = pd.concat([mean_df, std_df], axis=1)

df.columns = ["mean macro f1", "std macro f1"]

df = df * 100

df

Unnamed: 0_level_0,Unnamed: 1_level_0,mean macro f1,std macro f1
model,task,Unnamed: 2_level_1,Unnamed: 3_level_1
Musixmatch/umberto-commoncrawl-cased-v1,emotion,69.724572,4.60015
Musixmatch/umberto-commoncrawl-cased-v1,hate_speech,87.262057,0.363911
Musixmatch/umberto-commoncrawl-cased-v1,irony,60.035865,2.183473
Musixmatch/umberto-commoncrawl-cased-v1,sentiment,62.601565,1.063719
dbmdz/bert-base-italian-xxl-uncased,emotion,73.588963,4.019144
dbmdz/bert-base-italian-xxl-uncased,hate_speech,92.383168,0.409217
dbmdz/bert-base-italian-xxl-uncased,irony,61.974809,4.37835
dbmdz/bert-base-italian-xxl-uncased,sentiment,61.391095,0.863827
dbmdz/electra-base-italian-xxl-cased-discriminator,emotion,64.682508,7.675118
dbmdz/electra-base-italian-xxl-cased-discriminator,hate_speech,87.751349,2.963175


In [29]:
df.reset_index().to_csv("../data/results_it.csv", index=False)

In [30]:
df = pd.read_csv("../data/results_it.csv").set_index(["model", "task"])

In [31]:
df.loc[("pysentimiento/robertuito-base-uncased", "emotion"), "mean macro f1"]

64.05466397603354