# Public Models Assessment

---

## Imports

In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics import f1_score
from transformers import logging
from transformers import pipeline
from tqdm import tqdm

In [2]:
logging.set_verbosity_error()

In [3]:
MAX_SEQUENCE_LENGTH = 200

---

## Data Loading

In [4]:
dataset = pd.read_parquet("../data/clean_data.parquet")

In [5]:
ge_labels = []
with open("../data/emotions.txt", "r", encoding="utf-8") as f:
    for line in f:
        ge_labels.append(line.rstrip())

In [6]:
target_names = sorted(list(dataset["goemotion"].unique()))
test_pv = (
    dataset.loc[lambda f: f["set"] == "test"]
    .pivot_table(index=["code", "text"], columns="goemotion", values="set", aggfunc="count", fill_value=0)
)

---

## BERT Finetuned 

https://huggingface.co/justin871030/bert-base-uncased-goemotions-group-finetuned

https://github.com/justin871030/GoEmotions/tree/main

- Macro F1: 0.532031

## DistilBERT Finetuned 

https://huggingface.co/jungealexander/distilbert-base-uncased-finetuned-go_emotions_20220608_1

- Loss: 0.0857
- F1: 0.5575
- Roc Auc: 0.7242
- Accuracy: 0.4364

In [7]:
dbt = pipeline(
    'sentiment-analysis', 
    model="jungealexander/distilbert-base-uncased-finetuned-go_emotions_20220608_1", 
    return_all_scores=True
)



In [8]:
dbt_labels = [dbt(text) for text in tqdm(test_pv.reset_index().text.tolist())]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5427/5427 [01:13<00:00, 74.19it/s]


In [9]:
dbt_pred = np.zeros((len(dbt_labels), len(target_names)))
for i, label_list in enumerate(dbt_labels):
    for label in label_list[0]:
        if label["score"] > 0.2:
            dbt_pred[i, int(label["label"].replace("LABEL_", ""))] = 1
            
print(
    "Macro F1:", f1_score(test_pv[ge_labels].values, dbt_pred, average="macro"),
    "\nMicro F1:", f1_score(test_pv[ge_labels].values, dbt_pred, average="micro"),
)

Macro F1: 0.44244223148216955 
Micro F1: 0.5871635043496131


## Roberta Base 

https://huggingface.co/SamLowe/roberta-base-go_emotions

- Micro F1 0.585
- ROC AUC 0.751
- Accuracy 0.474

In [10]:
roberta = pipeline('sentiment-analysis', model='SamLowe/roberta-base-go_emotions', return_all_scores=True)



In [11]:
roberta_labels = [roberta(text) for text in tqdm(test_pv.reset_index().text.tolist())]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5427/5427 [02:24<00:00, 37.51it/s]


In [12]:
roberta_pred = np.zeros((len(roberta_labels), len(target_names)))
for i, label_list in enumerate(roberta_labels):
    for label in label_list[0]:
        if label["score"] > 0.2:
            roberta_pred[i, ge_labels.index(label["label"])] = 1
            
print(
    "Macro F1:", f1_score(test_pv[ge_labels].values, roberta_pred, average="macro"),
    "\nMicro F1:", f1_score(test_pv[ge_labels].values, roberta_pred, average="micro"),
)

Macro F1: 0.4992045852054491 
Micro F1: 0.5950699533644237


## Roberta Large 

https://huggingface.co/IsaacZhy/roberta-large-goemotions

- Loss: 0.0568
- F1: 0.5868
- Roc Auc: 0.7616
- Accuracy: 0.4821

In [13]:
roberta_large = pipeline('sentiment-analysis', model='IsaacZhy/roberta-large-goemotions', return_all_scores=True)



In [14]:
robertal_labels = [roberta_large(text) for text in tqdm(test_pv.reset_index().text.tolist())]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5427/5427 [08:27<00:00, 10.69it/s]


In [15]:
robertal_pred = np.zeros((len(robertal_labels), len(target_names)))
for i, label_list in enumerate(robertal_labels):
    for label in label_list[0]:
        if label["score"] > 0.2:
            robertal_pred[i, ge_labels.index(label["label"])] = 1

print(
    "Macro F1:", f1_score(test_pv[ge_labels].values, robertal_pred, average="macro"),
    "\nMicro F1:", f1_score(test_pv[ge_labels].values, robertal_pred, average="micro"),
)

Macro F1: 0.5343813883997255 
Micro F1: 0.5910508474576271


## Roberta BNE 

https://huggingface.co/mrm8488/roberta-large-bne-finetuned-go_emotions-es

- Loss: 3.2457
- Accuracy: 0.5668
- F1: 0.5572

In [16]:
roberta_bne = pipeline(
    'sentiment-analysis', 
    model='mrm8488/roberta-large-bne-finetuned-go_emotions-es', 
    return_all_scores=True
)



In [17]:
roberta_bne_labels = [roberta_bne(text) for text in tqdm(test_pv.reset_index().text.tolist())]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5427/5427 [09:16<00:00,  9.76it/s]


In [18]:
roberta_bne_pred = np.zeros((len(roberta_bne_labels), len(target_names)))
for i, label_list in enumerate(roberta_bne_labels):
    for label in label_list[0]:
        if label["score"] > 0.2:
            roberta_bne_pred[i, int(label["label"].replace("LABEL_", ""))] = 1

print(
    "Macro F1:", f1_score(test_pv[ge_labels].values, roberta_bne_pred, average="macro"),
    "\nMicro F1:", f1_score(test_pv[ge_labels].values, roberta_bne_pred, average="micro"),
)

Macro F1: 0.08742868052796397 
Micro F1: 0.3482438701126574


## EmoRoBERTa 

https://huggingface.co/arpanghoshal/EmoRoBERTa

- Macro F1 - 0.493

In [19]:
emoroberta = pipeline('sentiment-analysis', model='arpanghoshal/EmoRoBERTa', return_all_scores=True)



In [20]:
emoroberta_labels = [emoroberta(text) for text in tqdm(test_pv.reset_index().text.tolist())]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5427/5427 [32:34<00:00,  2.78it/s]


In [27]:
emoroberta_pred = np.zeros((len(emoroberta_labels), len(target_names)))
for i, label_list in enumerate(emoroberta_labels):
    for label in label_list[0]:
        if label["score"] > 0.2:
            emoroberta_pred[i, ge_labels.index(label["label"])] = 1
            
print(
    "Macro F1:", f1_score(test_pv[ge_labels].values, emoroberta_pred, average="macro"),
    "\nMicro F1:", f1_score(test_pv[ge_labels].values, emoroberta_pred, average="micro"),
)

Macro F1: 0.4353215043422026 
Micro F1: 0.5119217444467087


## EmoGPT 

https://huggingface.co/tingtone/go_emo_gpt

- Loss: 0.0964
- F1: 0.6010
- Roc Auc: 0.7659
- Accuracy: 0.4996

In [22]:
emogpt = pipeline('sentiment-analysis', model='tingtone/go_emo_gpt', return_all_scores=True)



In [23]:
emogpt_labels = [emogpt(text) for text in tqdm(test_pv.reset_index().text.tolist())]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5427/5427 [02:55<00:00, 30.94it/s]


In [24]:
emogpt_pred = np.zeros((len(emogpt_labels), len(target_names)))
for i, label_list in enumerate(emogpt_labels):
    for label in label_list[0]:
        if label["score"] > 0.2:
            emogpt_pred[i, ge_labels.index(label["label"])] = 1
            
print(
    "Macro F1:", f1_score(test_pv[ge_labels].values, emogpt_pred, average="macro"),
    "\nMicro F1:", f1_score(test_pv[ge_labels].values, emogpt_pred, average="micro"),
)

Macro F1: 0.5018997608753327 
Micro F1: 0.6058181818181818


---