In [None]:
!git clone https://github.com/ma2za/emotion-classification.git

In [None]:
!mv emotion-classification/emotion_classification/src/roberta_emotion roberta_emotion

In [None]:
!pip install -q transformers datasets evaluate wandb "ray[tune]"

In [1]:
import os
import random
from functools import partial

import numpy as np
import torch
import wandb
from datasets import load_dataset
from evaluate import evaluator
from huggingface_hub import notebook_login
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from sklearn.metrics import accuracy_score, f1_score
from torch.optim import AdamW
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
from transformers import AutoTokenizer
from transformers.data.data_collator import default_data_collator
from transformers.optimization import get_linear_schedule_with_warmup
from datasets import concatenate_datasets
import itertools
import pandas as pd
from datasets import Dataset

from datasets import ClassLabel, Features, Value

In [2]:
from roberta_emotion.modeling_roberta_emotion import RobertaEmotion
from roberta_emotion.configuration_roberta_emotion import RobertaEmotionConfig

In [3]:
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

In [4]:
%env RAY_PICKLE_VERBOSE_DEBUG=1

env: RAY_PICKLE_VERBOSE_DEBUG=1


In [5]:
%env WANDB_PROJECT=emotion_classifier

env: WANDB_PROJECT=emotion_classifier


In [None]:
wandb.login()

In [None]:
notebook_login()

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"

## Tokenizer

In [7]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

In [8]:
def tokenization(sample):
    return tokenizer(sample["text"], padding=True, truncation=True)

## Dataset

In [9]:
dataset = load_dataset("emotion")



  0%|          | 0/3 [00:00<?, ?it/s]

In [10]:
daily_dialog = load_dataset("daily_dialog")



  0%|          | 0/3 [00:00<?, ?it/s]

In [11]:
daily_dialog = concatenate_datasets([daily_dialog["train"], daily_dialog["validation"], daily_dialog["test"]])

In [12]:
def merge_daily_dialog(examples):
    dd2emo = {
        1: "anger",
        3: "fear",
        4: "joy",
        5: "sadness",
        6: "surprise"
    }

    return {"chunks": [ {"text": d, "label": dd2emo[e]} for d, e in zip(examples["dialog"], examples["emotion"]) if e in [1, 3, 4, 5, 6]]}

In [13]:
temp = daily_dialog.map(merge_daily_dialog, remove_columns=daily_dialog.column_names)

Map:   0%|          | 0/13118 [00:00<?, ? examples/s]

In [14]:
features = Features({'label': ClassLabel(names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], id=None) ,
 'text': Value(dtype='string', id=None)})

In [15]:
daily_dialog = Dataset.from_pandas(pd.DataFrame(list(itertools.chain(*temp["chunks"]))),
                                   features=features)

In [16]:
DATASETS = ["emotion"]
#DATASETS = ["emotion", "daily_dialog"]

In [17]:
if len(DATASETS) != 1:
    dataset["train"]  = concatenate_datasets([dataset["train"], daily_dialog])

In [18]:
dataset = dataset.map(tokenization, batched=True, batch_size=None)

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [19]:
dataset.set_format("torch", columns=["input_ids", "label"])

In [20]:
id2label =  {
    0: "sadness",
    1: "joy",
    2: "love",
    3: "anger",
    4: "fear",
    5: "surprise"
  }

label2id = {
    "sadness": 0,
    "joy": 1,
    "love": 2,
    "anger": 3,
    "fear": 4,
    "surprise": 5
  }

In [21]:
train_dataset = dataset["train"]
train_dataset.remove_columns(["text"])

valid_dataset = dataset["validation"]
valid_dataset.remove_columns(["text"])

Dataset({
    features: ['label', 'input_ids', 'attention_mask'],
    num_rows: 2000
})

## Model

In [22]:
RobertaEmotionConfig.register_for_auto_class()

In [23]:
RobertaEmotion.register_for_auto_class("AutoModel")

In [24]:
emotion_config = RobertaEmotionConfig(id2label = id2label, 
                              label2id = label2id, 
                              hidden_size = 768,
                              num_labels = 6)

## Training

In [25]:
def compute_metrics(preds, labels):
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return acc, f1

In [26]:
def evaluation(model, dataloader):
    model.eval()
    total_samples = 0
    total_loss = 0
    total_acc = 0
    total_f1 = 0
    for step, batch in tqdm(enumerate(dataloader), total=len(dataloader)):
        input_ids = batch["input_ids"].to(device)
        labels = batch["labels"].to(device)
        outputs = model(input_ids=input_ids, labels=labels)
        acc, f1 = compute_metrics(outputs.logits.argmax(-1).detach().cpu(), labels.detach().cpu())
        total_acc += acc*len(labels)
        total_f1 += f1*len(labels)
        total_samples += len(labels)
        total_loss += outputs.loss.detach().cpu()*len(labels)
    return total_acc/total_samples, total_f1/total_samples, total_loss/total_samples

In [27]:
def train(model, checkpoint_dir, optimizer, lr_scheduler, train_loader, 
          valid_loader, tune_flag=False, config={}):
    wandb.init(project="emotion_classifier", config=config)

    best_f1 = 0
    model.backbone.requires_grad = False
    for epoch in range(8):
        model.train()
        if epoch > 2:
            model.backbone.requires_grad = True
        for step, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
            model.zero_grad()
            input_ids = batch["input_ids"].to(device)
            labels = batch["labels"].to(device)
            outputs = model(input_ids=input_ids, labels=labels)
            outputs.loss.backward()

            optimizer.step()
            lr_scheduler.step()
        valid_acc, valid_f1, valid_loss = evaluation(model, valid_loader)
        wandb.log({"eval/loss": valid_loss, "eval/f1": valid_f1, "eval/accuracy": valid_acc})

        if tune_flag:

            with tune.checkpoint_dir(epoch) as checkpoint_dir:
                path = os.path.join(checkpoint_dir, "checkpoint")
                torch.save((model.state_dict(), optimizer.state_dict()), path)

            tune.report(loss=valid_loss, accuracy=valid_acc)
        else:
            if best_f1 < valid_f1:
                best_f1 = valid_f1
                path = os.path.join(checkpoint_dir, "pytorch_model.bin")
                torch.save(model.state_dict(), path)

    wandb.finish()

In [28]:
def train_roberta(config, checkpoint_dir=None):
    train_loader = DataLoader(
                train_dataset,
                batch_size=int(config["batch_size"]),
                collate_fn=default_data_collator,
                drop_last=False,
                num_workers=0,
                pin_memory=True
                )

    valid_loader = DataLoader(
                valid_dataset,
                batch_size=int(config["batch_size"]),
                collate_fn=default_data_collator,
                drop_last=False,
                num_workers=0,
                pin_memory=True
            )
    
    num_training_steps=len(train_loader)*config["epochs"]

    model = RobertaEmotion(emotion_config).to(device)
    optimizer = AdamW(model.parameters(),lr=config["lr"], betas= (0.9, 0.999), eps= 1e-08)
    lr_scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=int(num_training_steps/3), 
                                                   num_training_steps=num_training_steps)


    train(model, checkpoint_dir, optimizer, lr_scheduler, train_loader, valid_loader, config)
    return model

In [29]:
def tuning():
    data_dir = os.path.abspath("./data")
    config = {
        "batch_size": tune.choice([32, 64, 128])
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=10,
        grace_period=1,
        reduction_factor=2)
    reporter = CLIReporter(
        # ``parameter_columns=["l1", "l2", "lr", "batch_size"]``,
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        partial(train_roberta, data_dir=data_dir),
        resources_per_trial={"cpu": 1, "gpu": 1},
        config=config,
        num_samples=10,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")

    best_trained_model = RobertaEmotion(emotion_config).to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

In [30]:
train_config = {"batch_size": 64, 
                       "epochs": 10,
                       "lr": 5e-05, 
                       "datasets": DATASETS}

In [None]:
model = train_roberta(train_config, checkpoint_dir=".")

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[34m[1mwandb[0m: Currently logged in as: [33mmeraxes[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/250 [00:00<?, ?it/s]

In [None]:
model_state = torch.load(os.path.join(".", "pytorch_model.bin"))
model.load_state_dict(model_state)

In [None]:
model.push_to_hub("roberta-emotion")
tokenizer.push_to_hub("roberta-emotion")

## Evaluation

In [None]:
task_evaluator = evaluator("text-classification")

In [None]:
results = task_evaluator.compute(
    model_or_pipeline=model,
    tokenizer=tokenizer,
    data="emotion",
    subset="split",
    split="test",
    metric="accuracy",
    label_mapping=label2id,
    strategy="bootstrap",
    n_resamples=10,
    random_state=0
)

results