## Train BERT model

In this notebook we will train a BERT model for Emotion detection

In [5]:
%load_ext autoreload
%autoreload 2
import os
import pandas as pd
from pysentimiento.preprocessing import preprocess_tweet
from pysentimiento.emotion import load_datasets


train_dataset, dev_dataset, test_dataset = load_datasets(lang="en")

#limit = 1500
limit = None

if limit:
    """
    Smoke test
    """
    print("\n\n", f"Limiting to {limit} instances")
    train_dataset = train_dataset.select(range(limit))
    dev_dataset = dev_dataset.select(range(limit))
    test_dataset = test_dataset.select(range(limit))


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
%load_ext autoreload
%autoreload 2
import os
import torch
from pysentimiento.tass import load_model
from pysentimiento.emotion.datasets import id2label, label2id

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

base_model = "roberta-base"

model, tokenizer = load_model(base_model, 
    id2label=id2label, 
    label2id=label2id
)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

model.train();

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Loading model roberta-base


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [7]:
def tokenize(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True)

batch_size = 32

eval_batch_size = 16

train_dataset = train_dataset.map(tokenize, batched=True, batch_size=batch_size)
dev_dataset = dev_dataset.map(tokenize, batched=True, batch_size=eval_batch_size)
test_dataset = test_dataset.map(tokenize, batched=True, batch_size=eval_batch_size)

HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=86.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=115.0), HTML(value='')))




In [11]:
'token_type_ids' in train_dataset.features

False

In [12]:
def format_dataset(dataset):
    dataset = dataset.map(lambda examples: {'labels': examples['label']})
    columns = ['input_ids', 'attention_mask', 'labels']
    if 'token_type_ids' in dataset.features:
        columns.append('token_type_ids')
    dataset.set_format(type='torch', columns=columns)
    print(columns)
    return dataset

train_dataset = format_dataset(train_dataset)
dev_dataset = format_dataset(dev_dataset)
test_dataset = format_dataset(test_dataset)

HBox(children=(FloatProgress(value=0.0, max=4107.0), HTML(value='')))


['input_ids', 'attention_mask', 'labels']


HBox(children=(FloatProgress(value=0.0, max=1370.0), HTML(value='')))


['input_ids', 'attention_mask', 'labels']


HBox(children=(FloatProgress(value=0.0, max=1826.0), HTML(value='')))


['input_ids', 'attention_mask', 'labels']


In [13]:
from transformers import TrainingArguments, Trainer
from pysentimiento.metrics import compute_metrics
epochs = 3



total_steps = (epochs * len(train_dataset)) // batch_size
warmup_steps = total_steps // 10

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=eval_batch_size,
    warmup_steps=warmup_steps,
    evaluation_strategy="epoch",
    do_eval=False,
    weight_decay=0.01,
    logging_dir='./logs',
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
)

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=lambda x: compute_metrics(x, id2label=id2label),
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Others F1,Others Precision,Others Recall,Joy F1,Joy Precision,Joy Recall,Sadness F1,Sadness Precision,Sadness Recall,Anger F1,Anger Precision,Anger Recall,Surprise F1,Surprise Precision,Surprise Recall,Disgust F1,Disgust Precision,Disgust Recall,Fear F1,Fear Precision,Fear Recall,Macro F1,Macro Precision,Macro Recall,Acc
1,No log,1.103243,0.602168,0.623489,0.582258,0.690952,0.626866,0.769634,0.589147,0.745098,0.487179,0.0,0.0,0.0,0.0,0.0,0.0,0.42029,0.322222,0.604167,0.0,0.0,0.0,0.328937,0.331096,0.349034,0.569343
2,No log,0.991414,0.661775,0.606999,0.727419,0.703185,0.684864,0.722513,0.666667,0.710145,0.628205,0.191489,0.45,0.121622,0.192308,0.625,0.113636,0.405904,0.433071,0.381944,0.0,0.0,0.0,0.403047,0.50144,0.385049,0.616788
3,No log,1.052326,0.654631,0.637615,0.672581,0.708075,0.673759,0.746073,0.662162,0.7,0.628205,0.280374,0.454545,0.202703,0.238806,0.347826,0.181818,0.440129,0.412121,0.472222,0.066667,0.5,0.035714,0.435835,0.532267,0.419902,0.615328


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


TrainOutput(global_step=387, training_loss=0.9977444592064357, metrics={'train_runtime': 190.5238, 'train_samples_per_second': 2.031, 'total_flos': 0, 'epoch': 3.0})

In [5]:
from pysentimiento import compute_metrics

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
)

trainer.train()


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.781631,0.653295,0.641696,0.642772,0.650757


TrainOutput(global_step=151, training_loss=0.8435698250271627)

In [14]:
trainer.evaluate(test_dataset)

{'eval_loss': 1.021466612815857,
 'eval_others_f1': 0.6768328445747801,
 'eval_others_precision': 0.6564277588168373,
 'eval_others_recall': 0.698547215496368,
 'eval_joy_f1': 0.7212178877259753,
 'eval_joy_precision': 0.7005545286506469,
 'eval_joy_recall': 0.7431372549019608,
 'eval_sadness_f1': 0.6009389671361502,
 'eval_sadness_precision': 0.5871559633027523,
 'eval_sadness_recall': 0.6153846153846154,
 'eval_anger_f1': 0.34042553191489366,
 'eval_anger_precision': 0.5581395348837209,
 'eval_anger_recall': 0.24489795918367346,
 'eval_surprise_f1': 0.33333333333333337,
 'eval_surprise_precision': 0.43243243243243246,
 'eval_surprise_recall': 0.2711864406779661,
 'eval_disgust_f1': 0.5247524752475248,
 'eval_disgust_precision': 0.49765258215962443,
 'eval_disgust_recall': 0.5549738219895288,
 'eval_fear_f1': 0.14285714285714285,
 'eval_fear_precision': 0.75,
 'eval_fear_recall': 0.07894736842105263,
 'eval_macro_f1': 0.47719404101371765,
 'eval_macro_precision': 0.5974803566932678,
 