In [8]:
from torch.utils.data import DataLoader
from model import TransformersModel

from preprocess import ProcessGoEmotions, TokenizeDataset

In [9]:
PATH1 = "./data/full_dataset/goemotions_1.csv"
PATH2 = "./data/full_dataset/goemotions_2.csv"
PATH3 = "./data/full_dataset/goemotions_3.csv"

# LABEL = "emotions" # all 28 labels
LABEL = "emotion_category" # positive negative ambiguous and neutral

process = ProcessGoEmotions(label_choice=LABEL)
train_dataset, test_dataset = process.get_datasets(paths=[PATH1], test_size = 0.2)

Index(['text', 'id', 'author', 'subreddit', 'link_id', 'parent_id',
       'created_utc', 'rater_id', 'example_very_unclear', 'admiration',
       'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
       'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust',
       'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy',
       'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief',
       'remorse', 'sadness', 'surprise', 'neutral', 'emotions',
       'emotion_category'],
      dtype='object')


In [7]:
import pandas as pd

df = pd.read_csv(PATH1)
df.columns

Index(['text', 'id', 'author', 'subreddit', 'link_id', 'parent_id',
       'created_utc', 'rater_id', 'example_very_unclear', 'admiration',
       'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
       'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust',
       'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy',
       'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief',
       'remorse', 'sadness', 'surprise', 'neutral'],
      dtype='object')

In [3]:
tonekizer = TokenizeDataset(train_dataset)
tokenized_train = tonekizer.tokenize_process(tokenizer_name="bert-base-cased")

tonekizer = TokenizeDataset(test_dataset)
tokenized_test = tonekizer.tokenize_process(tokenizer_name="bert-base-cased")

                                                                    

In [4]:
small_train_dataset = tokenized_train.shuffle(seed=42).select(range(200))
small_eval_dataset = tokenized_test.shuffle(seed=42).select(range(200))

In [5]:
train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)

In [6]:
num_epochs = 3

transformers_model = TransformersModel(
    optimizer_name="AdamW",
    num_epochs=num_epochs,
    train_dataloader=train_dataloader,
    eval_dataloader=eval_dataloader,
    num_labels=len(tokenized_train['labels'].unique()),
    model_name="bert-base-cased"
)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [7]:
transformers_model.train()

100%|██████████| 75/75 [08:10<00:00,  6.54s/it, loss=tensor(0.5492, grad_fn=<NllLossBackward0>)]


In [10]:
from sklearn.metrics import classification_report
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    report = classification_report(labels, predictions, digits=4, output_dict=True)
    return {
        "accuracy": report["accuracy"],
        "class_0_f1": report["0"]["f1-score"],
        "class_1_f1": report["1"]["f1-score"],
        "class_2_f1": report["2"]["f1-score"],
        "class_3_f1": report["3"]["f1-score"],
        "class_4_f1": report["4"]["f1-score"],
        "class_5_f1": report["5"]["f1-score"],
        "class_6_f1": report["6"]["f1-score"],
        "class_7_f1": report["7"]["f1-score"],
    }

In [28]:
import torch
import evaluate

device = transformers_model.device
model = transformers_model.model

def model_evaluate(model, device, metric = "accuracy"):
    metric = evaluate.load(metric)
    model.eval()
    for batch in eval_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        metric.add_batch(predictions=predictions, references=batch["labels"])

    return metric.compute()

def model_evaluate_custom(model, device):
    all_predictions, all_references = [], []
    model.eval()
    for batch in eval_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        all_predictions += list(predictions.numpy())
        all_references += list(batch["labels"].numpy())
    return all_predictions, all_references

predictions, references = model_evaluate_custom(model, device)

In [29]:
len(references)

200

In [38]:
[str(i) for i in tokenized_train['labels'].unique().numpy()]

['0', '1', '2', '3']

In [43]:
mapping = {0: "neutral", 1: "ambiguous", 2: "negative", 3: "positive"}
report = classification_report(references, predictions, target_names=[f"{mapping[i]}" for i in tokenized_train['labels'].unique().numpy()])

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [44]:
print(report)

              precision    recall  f1-score   support

     neutral       0.00      0.00      0.00        19
   ambiguous       0.27      0.32      0.29        57
    negative       0.38      0.28      0.32        50
    positive       0.52      0.68      0.58        74

    accuracy                           0.41       200
   macro avg       0.29      0.32      0.30       200
weighted avg       0.36      0.41      0.38       200



In [13]:
result

{'accuracy': 0.41}

----

In [None]:
dfvg

In [None]:
result = model.evaluate(metric="accuracy")

In [None]:
result

{'accuracy': 0.35}