In [1]:
!git clone https://github.com/ma2za/emotion-classification.git

Cloning into 'emotion-classification'...
remote: Enumerating objects: 64, done.[K
remote: Counting objects: 100% (64/64), done.[K
remote: Compressing objects: 100% (35/35), done.[K
remote: Total 64 (delta 21), reused 40 (delta 12), pack-reused 0[K
Unpacking objects: 100% (64/64), 89.92 KiB | 1.70 MiB/s, done.


In [2]:
!mv emotion-classification/emotion_classification/src/roberta_emotion roberta_emotion

In [3]:
!pip install -q transformers datasets evaluate wandb "ray[tune]"

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m45.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.6/58.6 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
import torch
import wandb
from datasets import load_dataset
from evaluate import evaluator
from huggingface_hub import notebook_login
from sklearn.metrics import accuracy_score, f1_score
from torch.optim import AdamW
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
from transformers import AutoTokenizer
from transformers.data.data_collator import default_data_collator
from transformers.optimization import get_linear_schedule_with_warmup

import numpy as np
import os
import random
from functools import partial

In [5]:
from roberta_emotion.modeling_roberta_emotion import RobertaEmotion
from roberta_emotion.configuration_roberta_emotion import RobertaEmotionConfig

In [6]:
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

In [7]:
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

In [8]:
%env RAY_PICKLE_VERBOSE_DEBUG=1

env: RAY_PICKLE_VERBOSE_DEBUG=1


In [9]:
%env WANDB_PROJECT=emotion_classifier

env: WANDB_PROJECT=emotion_classifier


In [10]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [11]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"

## Tokenizer

In [13]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

Downloading (…)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [14]:
def tokenization(sample):
    return tokenizer(sample["text"], padding=True, truncation=True)

## Dataset

In [15]:
dataset = load_dataset("emotion")

Downloading builder script:   0%|          | 0.00/3.97k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/3.28k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/8.78k [00:00<?, ?B/s]



Downloading and preparing dataset emotion/split to /root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/592k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/74.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/74.9k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/16000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Dataset emotion downloaded and prepared to /root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [16]:
dataset = dataset.map(tokenization, batched=True, batch_size=None)

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [17]:
dataset.set_format("torch", columns=["input_ids", "label"])

In [18]:
id2label =  {
    0: "sadness",
    1: "joy",
    2: "love",
    3: "anger",
    4: "fear",
    5: "surprise"
  }

label2id = {
    "sadness": 0,
    "joy": 1,
    "love": 2,
    "anger": 3,
    "fear": 4,
    "surprise": 5
  }

In [19]:
train_dataset = dataset["train"]
train_dataset.remove_columns(["text"])

valid_dataset = dataset["validation"]
valid_dataset.remove_columns(["text"])

Dataset({
    features: ['label', 'input_ids', 'attention_mask'],
    num_rows: 2000
})

## Model

In [20]:
RobertaEmotionConfig.register_for_auto_class()

In [21]:
RobertaEmotion.register_for_auto_class("AutoModel")

In [22]:
emotion_config = RobertaEmotionConfig(id2label = id2label, 
                              label2id = label2id, 
                              hidden_size = 768,
                              num_labels = 6)

## Training

In [24]:
def compute_metrics(preds, labels):
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return acc , f1

In [25]:
def evaluation(model, dataloader):
    model.eval()
    total_samples = 0
    total_loss = 0
    total_acc = 0
    total_f1 = 0
    for step, batch in tqdm(enumerate(dataloader), total=len(dataloader)):
        input_ids = batch["input_ids"].to(device)
        labels = batch["labels"].to(device)
        outputs = model(input_ids=input_ids, labels=labels)
        acc, f1 = compute_metrics(outputs.logits.argmax(-1).detach().cpu(), labels.detach().cpu())
        total_acc += acc*len(labels)
        total_f1 += f1*len(labels)
        total_samples += len(labels)
        total_loss += outputs.loss.detach().cpu()*len(labels)
    return total_acc/total_samples, total_f1/total_samples, total_loss/total_samples

In [32]:
def train(model, checkpoint_dir, optimizer, lr_scheduler, train_loader, valid_loader, tune_flag=False):
    wandb.init(project="emotion_classifier")
    best_f1 = 0
    model.backbone.requires_grad = False
    for epoch in range(8):
        model.train()
        if epoch > 2:
            model.backbone.requires_grad = True
        for step, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
            model.zero_grad()
            input_ids = batch["input_ids"].to(device)
            labels = batch["labels"].to(device)
            outputs = model(input_ids=input_ids, labels=labels)
            outputs.loss.backward()

            optimizer.step()
            lr_scheduler.step()
        valid_acc, valid_f1, valid_loss = evaluation(model, valid_loader)
        wandb.log({"eval/loss": valid_loss, "eval/f1": valid_f1, "eval/accuracy": valid_acc})

        if tune_flag:

            with tune.checkpoint_dir(epoch) as checkpoint_dir:
                path = os.path.join(checkpoint_dir, "checkpoint")
                torch.save((model.state_dict(), optimizer.state_dict()), path)

            tune.report(loss=valid_loss, accuracy=valid_acc)
        else:
            if best_f1 < valid_f1:
                best_f1 = valid_f1
                path = os.path.join(checkpoint_dir, "pytorch_model.bin")
                torch.save(model.state_dict(), path)

    wandb.finish()

In [33]:
def train_roberta(config, checkpoint_dir=None):
    model = RobertaEmotion(emotion_config).to(device)
    optimizer = AdamW(model.parameters(),lr= 5e-05, betas= (0.9, 0.999), eps= 1e-08)
    lr_scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=500, num_training_steps=2000)

    train_loader = DataLoader(
                train_dataset,
                batch_size=int(config["batch_size"]),
                collate_fn=default_data_collator,
                drop_last=False,
                num_workers=0,
                pin_memory=True
                )

    valid_loader = DataLoader(
                valid_dataset,
                batch_size=int(config["batch_size"]),
                collate_fn=default_data_collator,
                drop_last=False,
                num_workers=0,
                pin_memory=True
            )

    train(model, checkpoint_dir, optimizer, lr_scheduler, train_loader, valid_loader)
    return model

In [34]:
def tuning():
    data_dir = os.path.abspath("./data")
    config = {
        "batch_size": tune.choice([32, 64, 128])
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=10,
        grace_period=1,
        reduction_factor=2)
    reporter = CLIReporter(
        # ``parameter_columns=["l1", "l2", "lr", "batch_size"]``,
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        partial(train_roberta, data_dir=data_dir),
        resources_per_trial={"cpu": 1, "gpu": 1},
        config=config,
        num_samples=10,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")

    best_trained_model = RobertaEmotion(emotion_config).to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

In [None]:
model = train_roberta({"batch_size": 64}, checkpoint_dir=".")

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/accuracy,▁▇▆▇██
eval/f1,▁▇▆███
eval/loss,█▃▂▂▂▁

0,1
eval/accuracy,0.936
eval/f1,0.93664
eval/loss,0.12032


  0%|          | 0/250 [00:00<?, ?it/s]

In [None]:
model_state = torch.load(os.path.join(".", "pytorch_model.bin"))
model.load_state_dict(model_state)

In [None]:
model.push_to_hub("roberta-emotion")
tokenizer.push_to_hub("roberta-emotion")

## Evaluation

In [None]:
task_evaluator = evaluator("text-classification")

In [None]:
results = task_evaluator.compute(
    model_or_pipeline=model,
    tokenizer=tokenizer,
    data="emotion",
    subset="split",
    split="test",
    metric="f1",
    label_mapping=label2id,
    strategy="bootstrap",
    n_resamples=10,
    random_state=0
)

results