## Train BERT model

In this notebook we will train a BERT model for Sentiment Analysis in English

In [1]:
%load_ext autoreload
%autoreload 2
import os
import pandas as pd
from datasets import Dataset, Value, ClassLabel, Features
from pysentimiento.preprocessing import preprocess_tweet
from pysentimiento.semeval import load_datasets


train_dataset, dev_dataset, test_dataset = load_datasets(
    preprocessing_args={"user_token": "@USER", "url_token": "HTTPURL"}
)


39716 9929 20632


In [2]:
%load_ext autoreload
%autoreload 2
import os
from pysentimiento.tass import load_model
from pysentimiento.emotion.datasets import id2label, label2id

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

base_model = "vinai/bertweet-base"

model, tokenizer = load_model(base_model, 
    id2label=id2label, 
    label2id=label2id
)




The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Loading model vinai/bertweet-base


Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: 

In [3]:
def tokenize(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True)

batch_size = 32

eval_batch_size = 16

train_dataset = train_dataset.map(tokenize, batched=True, batch_size=batch_size)
dev_dataset = dev_dataset.map(tokenize, batched=True, batch_size=eval_batch_size)
test_dataset = test_dataset.map(tokenize, batched=True, batch_size=eval_batch_size)


HBox(children=(FloatProgress(value=0.0, max=1242.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=621.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1290.0), HTML(value='')))




In [4]:
def format_dataset(dataset):
    dataset = dataset.map(lambda examples: {'labels': examples['label']})
    dataset.set_format(type='torch', columns=['input_ids', 'token_type_ids', 'attention_mask', 'labels'])
    return dataset

train_dataset = format_dataset(train_dataset)
dev_dataset = format_dataset(dev_dataset)
test_dataset = format_dataset(test_dataset)

HBox(children=(FloatProgress(value=0.0, max=39716.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=9929.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20632.0), HTML(value='')))




In [5]:
train_dataset = train_dataset.select(range(2000))
dev_dataset = dev_dataset.select(range(1000))

In [7]:
from transformers import TrainingArguments, Trainer
from pysentimiento.metrics import compute_metrics
from pysentimiento.semeval.datasets import id2label

epochs = 3

total_steps = (epochs * len(train_dataset)) // batch_size
warmup_steps = total_steps // 10
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=eval_batch_size,
    warmup_steps=warmup_steps,
    evaluation_strategy="epoch",
    do_eval=False,
    weight_decay=0.01,
    logging_dir='./logs',
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
)

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=lambda x: compute_metrics(x, id2label=id2label),
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
)

trainer.train()


Epoch,Training Loss,Validation Loss,Neg F1,Neg Precision,Neg Recall,Neu F1,Neu Precision,Neu Recall,Pos F1,Pos Precision,Pos Recall,Macro F1,Macro Precision,Macro Recall,Acc
1,No log,0.825749,0.081395,1.0,0.042424,0.681954,0.596859,0.795349,0.753939,0.740476,0.767901,0.505763,0.779112,0.535225,0.66
2,No log,0.738455,0.620253,0.649007,0.593939,0.649874,0.708791,0.6,0.759551,0.696907,0.834568,0.676559,0.684902,0.676169,0.694
3,No log,0.738773,0.639053,0.624277,0.654545,0.658625,0.684211,0.634884,0.753902,0.733645,0.775309,0.68386,0.680711,0.688246,0.695


TrainOutput(global_step=189, training_loss=0.8189254034133184, metrics={'train_runtime': 120.4721, 'train_samples_per_second': 1.569, 'total_flos': 0, 'epoch': 3.0})

In [None]:
trainer.evaluate(test_dataset)

{'eval_loss': 0.7367324233055115,
 'eval_neg_f1': 0.6306330837304288,
 'eval_neg_precision': 0.5629557608167234,
 'eval_neg_recall': 0.7168059424326834,
 'eval_neu_f1': 0.6978179882916445,
 'eval_neu_precision': 0.7760416666666666,
 'eval_neu_recall': 0.6339199381164184,
 'eval_pos_f1': 0.7457201401282305,
 'eval_pos_precision': 0.6990086741016109,
 'eval_pos_recall': 0.7991216886244511,
 'eval_macro_f1': 0.691390335559845,
 'eval_macro_precision': 0.679335355758667,
 'eval_macro_recall': 0.7166158556938171,
 'eval_acc': 0.7034218689414502,
 'eval_runtime': 110.237,
 'eval_samples_per_second': 187.16,
 'epoch': 3.0}