<a href="https://colab.research.google.com/github/mkacki98/emotion_recogniser/blob/main/emotion_recogniser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In this notebook, I wanted to fine-tune a BERT model to do emotion classification (transfer learning). I used HuggingFace and PyTorch libraries



In [None]:
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn import metrics
from datasets import load_dataset

import transformers
from transformers import BertTokenizer, BertModel

import torch
from torch import cuda
from torch.utils.data import Dataset, DataLoader

In [None]:
device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
! nvidia-smi

# Get the data loaders for fine-tuning



In [None]:
dataset_emotions = load_dataset("go_emotions")




  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:

def pick_label(data):
  """ Pick a label to be the first one out of (possibly) multiple labels. """
  return {'label': data['labels'][0]}

def one_hot_encode(data):
  """ One hot encode the label. """
  return {'label': torch.nn.functional.one_hot(torch.tensor(data['label']), 28)}

dataset_emotions = dataset_emotions.map(pick_label)
dataset_emotions = dataset_emotions.map(one_hot_encode)

dataset_emotions = dataset_emotions.remove_columns(['labels'])




In [None]:
class CustomDataset(Dataset):

    def __init__(self, dataset, tokenizer, max_len, data_type):
        self.tokenizer = tokenizer
        self.data = dataset[data_type]
        self.text = dataset[data_type]['text']
        self.targets = dataset[data_type]['label']
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )

        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'target': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [None]:
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

MAX_LEN = 200

TRAIN_BATCH_SIZE = 64
TEST_BATCH_SIZE = 64
VALID_BATCH_SIZE = 32

In [None]:
training_data = CustomDataset(dataset = dataset_emotions, tokenizer = bert_tokenizer, max_len = MAX_LEN, data_type = 'train')
testing_data = CustomDataset(dataset = dataset_emotions, tokenizer = bert_tokenizer, max_len = MAX_LEN, data_type = 'test')
validation_data = CustomDataset(dataset = dataset_emotions, tokenizer = bert_tokenizer, max_len = MAX_LEN, data_type = 'validation')

In [None]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 2
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 2
                }

val_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 2
                }

training_loader = DataLoader(training_data, **train_params)
testing_loader = DataLoader(testing_data, **test_params)
validation_loader = DataLoader(validation_data, **val_params)

# Define a model

In [None]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', return_dict=False)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 28)
    
    def forward(self, ids, mask, token_type_ids):
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

# Train

In [None]:
def loss_fn(outputs, targets):
    """ Define the loss function. """ 

    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

def train_epoch(epoch, optimizer):
    """ Define an epoch pass. """

    model.train()

    running_loss = 0.
    last_loss = 0.

    for i, data in enumerate(training_loader, 0):

        # Get features
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)

        # Get labels
        targets = data['target'].to(device, dtype = torch.float)

        # Get predictions
        outputs = model(ids, mask, token_type_ids)

        # Zero the gradient
        optimizer.zero_grad()

        # Compute the loss for the predictions and the gradients
        loss = loss_fn(outputs, targets)
        #optimizer.zero_grad()
        loss.backward()

        # Adjust weights
        optimizer.step()
        
        running_loss += loss.item()

        if i% 1000 == 999:
          last_loss = running_loss / 1000 

    return last_loss
    

def plot_training_curve(losses):
  sns.lineplot(losses)
  fig.show()

def train(EPOCHS_N, LEARNING_RATE, EPSILON, WEIGHT_DECAY):
  losses = []

  optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE, eps=EPSILON, weight_decay=WEIGHT_DECAY)

  for epoch in range(EPOCHS_N):
    losses.append(train_epoch(epoch, optimizer))

  plot_training_curve(losses)

Epoch: 0, Loss:  0.17028836905956268
Epoch: 0, Loss:  0.07216055691242218


In [None]:
model = BERTClass()
model.to(device)

train(EPOCHS_N = 10, LEARNING_RATE = 0.5)

In [None]:
model = BERTClass()
model.to(device)

train(EPOCHS_N = 3, LEARNING_RATE = 1e-05, EPSILON = 1e-08, WEIGHT_DECAY = 0)

In [None]:
model = BERTClass()
model.to(device)

train(EPOCHS_N = 3, LEARNING_RATE = 1e-05)

# Validation


In [None]:
def evaluate(epoch, data_loader):
    """ Evaluate the model given current set of parameters. """

    model.eval()
    fin_targets=[]
    fin_outputs=[]

    with torch.no_grad():
        for _, data in enumerate(data_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['target'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())

    return fin_outputs, fin_targets

In [None]:
for epoch in range(EPOCHS):

    outputs, targets = evaluate(epoch, validation_loader)
    outputs = np.array(outputs) >= 0.5

    accuracy = metrics.accuracy_score(targets, outputs)

    f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

    print(f"Accuracy Score = {accuracy}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.3246729316381058
F1 Score (Micro) = 0.4500063848806027
F1 Score (Macro) = 0.1875923477420955


# Testing

In [None]:
for epoch in range(EPOCHS):

    outputs, targets = evaluate(epoch, testing_loader)
    outputs = np.array(outputs) >= 0.5

    accuracy = metrics.accuracy_score(targets, outputs)

    f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

    print(f"Accuracy Score = {accuracy}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")

# Summary

The model has xyz
