## Train BERT model

In this notebook we will train a BERT model for Emotion detection

In [9]:
%load_ext autoreload
%autoreload 2
import os
import pandas as pd
from datasets import Dataset, Value, ClassLabel, Features
from pysentimiento.preprocessing import preprocess_tweet
from pysentimiento.emotion import load_datasets


train_dataset, dev_dataset, *args = load_datasets()


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
%load_ext autoreload
%autoreload 2
import os
from pysentimiento.tass import load_model
from pysentimiento.emotion.datasets import id2label, label2id

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

base_model = "dccuchile/bert-base-spanish-wwm-cased"

model, tokenizer = load_model(base_model, 
    id2label=id2label, 
    label2id=label2id
)




The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Loading model dccuchile/bert-base-spanish-wwm-cased


Some weights of the model checkpoint at dccuchile/bert-base-spanish-wwm-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchi

In [11]:
def tokenize(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True)

batch_size = 32

eval_batch_size = 16

train_dataset = train_dataset.map(tokenize, batched=True, batch_size=batch_size)
dev_dataset = dev_dataset.map(tokenize, batched=True, batch_size=eval_batch_size)


HBox(children=(FloatProgress(value=0.0, max=184.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))




In [12]:
def format_dataset(dataset):
    dataset = dataset.map(lambda examples: {'labels': examples['label']})
    dataset.set_format(type='torch', columns=['input_ids', 'token_type_ids', 'attention_mask', 'labels'])
    return dataset

train_dataset = format_dataset(train_dataset)
dev_dataset = format_dataset(dev_dataset)

HBox(children=(FloatProgress(value=0.0, max=5886.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=857.0), HTML(value='')))




In [15]:
import torch
from sklearn.metrics import precision_recall_fscore_support, accuracy_score


def compute_metrics(pred):
    """
    Compute metrics for Trainer
    """
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    ret = {}

    f1s = []
    precs = []
    recalls = []

    for i, cat in enumerate(id2label):
        cat_labels, cat_preds = labels == i, preds == i
        precision, recall, f1, _ = precision_recall_fscore_support(
            cat_labels, cat_preds, average='binary'
        )

        f1s.append(f1)
        precs.append(precision)
        recalls.append(recall)

        ret[cat.lower()+"_f1"] = f1
    
    
    ret["macro_f1"] = torch.Tensor(f1s).mean()
    ret["macro_precision"] = torch.Tensor(precs).mean()
    ret["macro_recall"] = torch.Tensor(recalls).mean()


    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    ret["f1"] = f1
    ret["acc"] = accuracy_score(labels, preds)

    return ret 
    


In [17]:
from transformers import TrainingArguments, Trainer
epochs = 10

total_steps = (epochs * len(train_dataset)) // batch_size
warmup_steps = total_steps // 10
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=eval_batch_size,
    warmup_steps=warmup_steps,
    evaluation_strategy="epoch",
    do_eval=False,
    weight_decay=0.01,
    logging_dir='./logs',
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
)

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
)

trainer.train()


Epoch,Training Loss,Validation Loss,Others F1,Joy F1,Sadness F1,Anger F1,Surprise F1,Disgust F1,Fear F1,Macro F1,Macro Precision,Macro Recall,F1,Acc
1,No log,1.009661,0.747948,0.644501,0.78392,0.625698,0.269231,0.0,0.777778,0.549868,0.57972,0.534252,0.549868,0.691949
2,No log,1.266071,0.745721,0.668281,0.771429,0.674033,0.175439,0.0,0.631579,0.523783,0.524773,0.528689,0.523783,0.695449
3,0.241800,1.279564,0.730159,0.640669,0.785047,0.657005,0.285714,0.0,0.571429,0.524289,0.509538,0.544956,0.524289,0.679113
4,0.241800,1.712996,0.74424,0.626781,0.795918,0.604651,0.307692,0.0,0.631579,0.530123,0.540208,0.522808,0.530123,0.677946
5,0.241800,1.891702,0.730083,0.640625,0.769953,0.6125,0.222222,0.117647,0.736842,0.547125,0.557803,0.539774,0.547125,0.673279
6,0.081900,2.074566,0.74407,0.661578,0.77512,0.693467,0.25,0.0,0.5,0.517748,0.499124,0.545518,0.517748,0.690782
7,0.081900,2.178063,0.734839,0.663239,0.76555,0.613636,0.305085,0.0,0.5,0.511764,0.514932,0.512257,0.511764,0.683781
8,0.081900,2.243828,0.757192,0.633721,0.792079,0.635417,0.28125,0.0,0.526316,0.517996,0.524978,0.515093,0.517996,0.691949
9,0.016900,2.292483,0.735577,0.643243,0.759259,0.634409,0.30303,0.0,0.5,0.510788,0.504568,0.518582,0.510788,0.677946
10,0.016900,2.293652,0.742317,0.650273,0.776119,0.641304,0.277778,0.0,0.47619,0.50914,0.503609,0.515749,0.50914,0.682614


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


TrainOutput(global_step=1840, training_loss=0.09296004240927488, metrics={'train_runtime': 822.315, 'train_samples_per_second': 2.238, 'total_flos': 0, 'epoch': 10.0})

In [5]:
from pysentimiento import compute_metrics

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
)

trainer.train()


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.781631,0.653295,0.641696,0.642772,0.650757


TrainOutput(global_step=151, training_loss=0.8435698250271627)

In [7]:
trainer.evaluate(test_dataset)

{'eval_loss': 0.7347931861877441,
 'eval_accuracy': 0.686398678414097,
 'eval_f1': 0.6673146659667489,
 'eval_precision': 0.6676487102833413,
 'eval_recall': 0.6695717138336971,
 'epoch': 1.0}

In [None]:
path = "../models/beto-sentiment-analysis"
model.save_pretrained(path)
tokenizer.save_pretrained(path)