In [None]:
!pip install -q transformers datasets evaluate wandb

In [None]:
from datasets import load_dataset

from evaluate import evaluator

from huggingface_hub import notebook_login

from sklearn.metrics import accuracy_score, f1_score

import torch

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import Trainer, TrainingArguments

import wandb

In [None]:
%env WANDB_PROJECT=emotion_classifier

In [None]:
wandb.login()

In [None]:
notebook_login()

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

## Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

In [None]:
def tokenization(sample):
    return tokenizer(sample["text"], padding=True, truncation=True)

## Dataset

In [None]:
dataset = load_dataset("emotion")

In [None]:
dataset = dataset.map(tokenization, batched=True, batch_size=None)

In [None]:
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

In [None]:
id2label =  {
    0: "sadness",
    1: "joy",
    2: "love",
    3: "anger",
    4: "fear",
    5: "surprise"
  }

label2id = {
    "sadness": 0,
    "joy": 1,
    "love": 2,
    "anger": 3,
    "fear": 4,
    "surprise": 5
  }

## Model

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=6,
                                                           id2label=id2label, label2id=label2id).to(device)

## Training

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1}

In [None]:
batch_size = 64
training_args = TrainingArguments(output_dir="results",
                                  num_train_epochs=8,
                                  learning_rate=2e-5,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  load_best_model_at_end=True,
                                  metric_for_best_model="f1",
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  save_strategy="epoch",
                                  report_to="wandb",
                                  disable_tqdm=False)

In [None]:
trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=dataset["train"],
                  eval_dataset=dataset["validation"])
trainer.train()

wandb.finish()

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.245056,0.916,0.916588
2,0.442700,0.168471,0.9295,0.930128
3,0.442700,0.13662,0.936,0.936377
4,0.124000,0.131442,0.934,0.934007


In [None]:
model.push_to_hub("roberta-emotion")
tokenizer.push_to_hub("roberta-emotion")

## Evaluation

In [None]:
task_evaluator = evaluator("text-classification")

In [None]:
results = task_evaluator.compute(
    model_or_pipeline=model,
    tokenizer=tokenizer,
    data="emotion",
    subset="split",
    split="test",
    metric="accuracy",
    label_mapping=label2id,
    strategy="bootstrap",
    n_resamples=10,
    random_state=0
)

In [None]:
results