<a href="https://colab.research.google.com/github/kaaath-i/emotion-classification-bert-models/blob/main/Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Final Project**


**Task**: Sentiment Analysis - Emotion Classification

*Dataset* : https://huggingface.co/datasets/mteb/emotion

*Model 1* : https://huggingface.co/distilbert/distilbert-base-cased

*Model 2* : https://huggingface.co/FacebookAI/roberta-base

In [None]:
!pip install transformers
!pip install datasets
!pip install evaluate
!pip install bertviz transformers
!pip install accelerate --upgrade

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Tokenizer

In [None]:
from transformers import AutoTokenizer

# distilbert-base-cased
tokenizer1 = AutoTokenizer.from_pretrained("distilbert/distilbert-base-cased")
print(tokenizer1)

# roberta-base
tokenizer2 = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")
print(tokenizer2)

### Models

In [None]:
from transformers import AutoModelForSequenceClassification

# distilbert-base-cased
model1 = AutoModelForSequenceClassification.from_pretrained('distilbert/distilbert-base-cased', num_labels=6)

# roberta-base
model2 = AutoModelForSequenceClassification.from_pretrained('FacebookAI/roberta-base', num_labels=6)

### Dataset

In [None]:
from datasets import load_dataset, DatasetDict

ds = load_dataset("mteb/emotion")

def truncate(example):
    return {
        'text': " ".join(example['text'].split()),
        'label': example['label'],
        'label_text': example['label_text']
    }

small_ds = DatasetDict(
    train=ds['train'].shuffle(seed=24).select(range(800)).map(truncate),
    val=ds['validation'].shuffle(seed=24).select(range(100)).map(truncate),
    test=ds['test'].shuffle(seed=24).select(range(100)).map(truncate)
    )

In [None]:
small_ds

In [None]:
print(small_ds["train"][:10])

### Training #1: distilbert-base-cased

From above:

```
tokenizer1 = AutoTokenizer.from_pretrained("distilbert/distilbert-base-cased")

model1 = AutoModelForSequenceClassification.from_pretrained('distilbert/distilbert-base-cased', num_labels=6)
```



In [None]:
from transformers import DataCollatorWithPadding

def tokenize_function1(examples):
    return tokenizer1(examples["text"], padding=True, truncation=True)

small_tokenized_ds = small_ds.map(tokenize_function1, batched=True, batch_size=10)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer1)

In [None]:
import random
import numpy as np
import torch

def set_seed(seed=224):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

In [None]:
import numpy as np
import evaluate
from transformers import TrainingArguments, Trainer
from transformers import AutoModelForSequenceClassification

set_seed(224)
model1 = AutoModelForSequenceClassification.from_pretrained('distilbert/distilbert-base-cased', num_labels=6)

accuracy = evaluate.load("accuracy")

arguments1 = TrainingArguments(
    output_dir="/content/drive/MyDrive/Colab Notebooks/distilbert_checkpoints",
    per_device_train_batch_size=10,
    per_device_eval_batch_size=10,
    logging_steps=5,
    num_train_epochs=3,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to='none',
    seed=224
)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

trainer1 = Trainer(
    model=model1,
    args=arguments1,
    train_dataset=small_tokenized_ds['train'],
    eval_dataset=small_tokenized_ds['val'],
    processing_class=tokenizer1,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

In [None]:
trainer1.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.9983,0.868674,0.7
2,0.4069,0.457512,0.86
3,0.3553,0.422741,0.82


TrainOutput(global_step=240, training_loss=0.7374678904811541, metrics={'train_runtime': 34.732, 'train_samples_per_second': 69.101, 'train_steps_per_second': 6.91, 'total_flos': 38345837680800.0, 'train_loss': 0.7374678904811541, 'epoch': 3.0})

Evaluation

In [None]:
result1 = trainer1.evaluate(small_tokenized_ds['test'])
print(result1)

{'eval_loss': 0.40671998262405396, 'eval_accuracy': 0.89, 'eval_runtime': 0.23, 'eval_samples_per_second': 434.721, 'eval_steps_per_second': 43.472, 'epoch': 3.0}


In [None]:
from sklearn.metrics import accuracy_score

fine_tuned_model1 = AutoModelForSequenceClassification.from_pretrained("/content/drive/MyDrive/Colab Notebooks/distilbert_checkpoints/checkpoint-160")

model_inputs = tokenizer1(
    list(small_tokenized_ds['test']['text']),
    padding=True,
    truncation=True,
    return_tensors='pt'
)

outputs = fine_tuned_model1(**model_inputs)
predictions = torch.argmax(outputs.logits, dim=-1).numpy()
true_labels = small_tokenized_ds['test']['label']

label_names = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']

print("DistilBERT - Per-Class Accuracy:")
print("-"*40)

for i, emotion in enumerate(label_names):
    mask = np.array(true_labels) == i

    if mask.sum() > 0:
        class_acc = accuracy_score(
            np.array(true_labels)[mask],
            np.array(predictions)[mask]
        )
        print(f"{emotion:10s}: {class_acc:.2%} ({mask.sum()} samples)")
    else:
        print(f"{emotion:10s}: No samples in test set")

DistilBERT - Per-Class Accuracy:
----------------------------------------
sadness   : 97.06% (34 samples)
joy       : 91.67% (36 samples)
love      : 90.91% (11 samples)
anger     : 66.67% (12 samples)
fear      : 100.00% (6 samples)
surprise  : 0.00% (1 samples)


In [None]:
import pandas as pd

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

fine_tuned_model1.to(device)
fine_tuned_model1.eval()

texts = list(small_ds['test']['text'])
true_labels = list(small_ds['test']['label'])

inputs = tokenizer1(texts, return_tensors='pt', padding=True, truncation=True).to(device)

with torch.no_grad():
    outputs = fine_tuned_model1(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1).cpu().numpy()

pd.set_option('display.max_colwidth', None)
results_df = pd.DataFrame({
    'Text': [text[:100] + "..." if len(text) > 100 else text for text in texts],
    'True_Label': [label_names[label] for label in true_labels],
    'Predicted_Label': [label_names[pred] for pred in predictions],
    'Correct': ['✓' if true == pred else '✗' for true, pred in zip(true_labels, predictions)]
})

errors_df = results_df[results_df['Correct'] == '✗']
print("\nERRORS ONLY:")
print("-"*110)
print(errors_df)


ERRORS ONLY:
--------------------------------------------------------------------------------------------------------------
                                                                                                       Text  \
29                                                                         i feel so cold a href http irish   
36                                           im able to refine my poses and concepts without feeling rushed   
41  i used to be able to hang around talk with the cashier when i was putting away my money now i feel r...   
53                                       i feel very strongly about supporting charities that help children   
56                        i was somewhat coerced into this blog review so i feel a bit rushed and flustered   
70                                 i feel no shame whatsoever in longing for iron man at my local cineworld   
72  i feel like i know who most of them are by now and am starting to develop my likes and dislike

Visualization

In [None]:
from torch.utils.tensorboard import SummaryWriter
import re
import torch
import tensorflow as tf
import tensorboard as tb
import os

In [None]:
checkpoints = {
    1: "/content/drive/MyDrive/Colab Notebooks/distilbert_checkpoints/checkpoint-80",
    2: "/content/drive/MyDrive/Colab Notebooks/distilbert_checkpoints/checkpoint-160"
}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
base_path = "/content/drive/MyDrive/Colab Notebooks/results_vis_distilbert"

for epoch, checkpoint_path in checkpoints.items():
    fine_tuned_model1 = AutoModelForSequenceClassification.from_pretrained(checkpoint_path)
    fine_tuned_model1.to(device)
    fine_tuned_model1.eval()

    model_inputs = tokenizer1(list(small_tokenized_ds['test']['text']), padding=True, truncation=True, return_tensors='pt').to(device)
    outputs = fine_tuned_model1(**model_inputs, output_hidden_states=True)

    important_layers = [0, 3, 6]

    for layer in important_layers:
        path = f"{base_path}/epoch_{epoch}"

        if not os.path.exists(path):
            os.makedirs(path)

        if not os.path.exists(path + '/layer_' + str(layer)):
            os.mkdir(path + '/layer_' + str(layer))

        example = 0
        tensors = []
        labels = []

        while example in range(len(outputs['hidden_states'][layer])):
            sp_token_position = 0
            for token in model_inputs['input_ids'][example]:
                if token == 101:
                    tensor = outputs['hidden_states'][layer][example][sp_token_position]
                    tensors.append(tensor)
                    break
                sp_token_position += 1

            label = [small_tokenized_ds['test']['text'][example], str(small_tokenized_ds['test']['label'][example])]
            labels.append(label)
            example += 1

        writer = SummaryWriter(path + '/layer_' + str(layer))
        writer.add_embedding(torch.stack(tensors).cpu(), metadata=labels, metadata_header=['Text', 'Emotion'])
        writer.close()

print("Done!")

Done!


### Training #2: roberta-base

From above:


```
tokenizer2 = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")

model2 = AutoModelForSequenceClassification.from_pretrained('FacebookAI/roberta-base', num_labels=6)
```



In [None]:
def tokenize_function2(examples):
    return tokenizer2(examples["text"], padding=True, truncation=True)

small_tokenized_ds = small_ds.map(tokenize_function2, batched=True, batch_size=10)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer2)

In [None]:
import numpy as np
import evaluate
from transformers import TrainingArguments, Trainer
from transformers import AutoModelForSequenceClassification

set_seed(224)
model2 = AutoModelForSequenceClassification.from_pretrained('FacebookAI/roberta-base', num_labels=6)

accuracy = evaluate.load("accuracy")

arguments2 = TrainingArguments(
    output_dir="/content/drive/MyDrive/Colab Notebooks/roberta_checkpoints",
    per_device_train_batch_size=10,
    per_device_eval_batch_size=10,
    logging_steps=5,
    num_train_epochs=5,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to='none',
    seed=224
)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

trainer2 = Trainer(
    model=model2,
    args=arguments2,
    train_dataset=small_tokenized_ds['train'],
    eval_dataset=small_tokenized_ds['val'],
    processing_class=tokenizer2,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
trainer2.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,1.0662,1.04438,0.64
2,0.8196,0.666348,0.77
3,0.6218,0.536554,0.83
4,0.1508,0.559649,0.84
5,0.3161,0.618552,0.82


TrainOutput(global_step=400, training_loss=0.6573207901790739, metrics={'train_runtime': 133.4971, 'train_samples_per_second': 29.963, 'train_steps_per_second': 2.996, 'total_flos': 122458955266440.0, 'train_loss': 0.6573207901790739, 'epoch': 5.0})

Evaluation

In [None]:
result2 = trainer2.evaluate(small_tokenized_ds['test'])
print(result2)

{'eval_loss': 0.43955665826797485, 'eval_accuracy': 0.86, 'eval_runtime': 0.5226, 'eval_samples_per_second': 191.335, 'eval_steps_per_second': 19.134, 'epoch': 5.0}


In [None]:
from sklearn.metrics import accuracy_score

fine_tuned_model2 = AutoModelForSequenceClassification.from_pretrained("/content/drive/MyDrive/Colab Notebooks/roberta_checkpoints/checkpoint-320")

model_inputs = tokenizer2(
    list(small_tokenized_ds['test']['text']),
    padding=True,
    truncation=True,
    return_tensors='pt'
)

outputs = fine_tuned_model2(**model_inputs)
predictions = torch.argmax(outputs.logits, dim=-1).numpy()
true_labels = small_tokenized_ds['test']['label']

label_names = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']

print("roBERTa - Per-Class Accuracy:")
print("-"*40)

for i, emotion in enumerate(label_names):
    mask = np.array(true_labels) == i

    if mask.sum() > 0:
        class_acc = accuracy_score(
            np.array(true_labels)[mask],
            np.array(predictions)[mask]
        )
        print(f"{emotion:10s}: {class_acc:.2%} ({mask.sum()} samples)")
    else:
        print(f"{emotion:10s}: No samples in test set")

roBERTa - Per-Class Accuracy:
----------------------------------------
sadness   : 97.06% (34 samples)
joy       : 94.44% (36 samples)
love      : 54.55% (11 samples)
anger     : 66.67% (12 samples)
fear      : 83.33% (6 samples)
surprise  : 0.00% (1 samples)


In [None]:
import pandas as pd

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

fine_tuned_model2.to(device)
fine_tuned_model2.eval()

texts = list(small_ds['test']['text'])
true_labels = list(small_ds['test']['label'])

inputs = tokenizer2(texts, return_tensors='pt', padding=True, truncation=True).to(device)

with torch.no_grad():
    outputs = fine_tuned_model2(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1).cpu().numpy()

pd.set_option('display.max_colwidth', None)
results_df = pd.DataFrame({
    'Text': [text[:100] + "..." if len(text) > 100 else text for text in texts],
    'True_Label': [label_names[label] for label in true_labels],
    'Predicted_Label': [label_names[pred] for pred in predictions],
    'Correct': ['✓' if true == pred else '✗' for true, pred in zip(true_labels, predictions)]
})

errors_df = results_df[results_df['Correct'] == '✗']
print("\nERRORS ONLY:")
print("-"*110)
print(errors_df)


ERRORS ONLY:
--------------------------------------------------------------------------------------------------------------
                                                                                                       Text  \
11                                                                     i listen to it i feel all rebellious   
12                               i feel a gentle tap and find flower child watching me her expression grave   
25          im feeling angry at someone i do something thoughtful for her and my feelings toward her soften   
29                                                                         i feel so cold a href http irish   
49  i feel like the people that i myself love want and need don t talk to me and don t connect with me a...   
68  i was so impressed with the show especially for hs and i was moved by these talented kids but then a...   
70                                 i feel no shame whatsoever in longing for iron man at my local 

Visualization

In [None]:
checkpoints2 = {
    1: "/content/drive/MyDrive/Colab Notebooks/roberta_checkpoints/checkpoint-80",
    2: "/content/drive/MyDrive/Colab Notebooks/roberta_checkpoints/checkpoint-160",
    4: "/content/drive/MyDrive/Colab Notebooks/roberta_checkpoints/checkpoint-320"
}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
base_path = "/content/drive/MyDrive/Colab Notebooks/results_vis_roberta"

for epoch, checkpoint_path in checkpoints2.items():
    fine_tuned_model2 = AutoModelForSequenceClassification.from_pretrained(checkpoint_path)
    fine_tuned_model2.to(device)
    fine_tuned_model2.eval()

    model_inputs = tokenizer2(list(small_tokenized_ds['test']['text']), padding=True, truncation=True, return_tensors='pt').to(device)
    outputs = fine_tuned_model2(**model_inputs, output_hidden_states=True)

    important_layers = [0, 11]

    for layer in important_layers:
        path = f"{base_path}/epoch_{epoch}"

        if not os.path.exists(path):
            os.makedirs(path)

        if not os.path.exists(path + '/layer_' + str(layer)):
            os.mkdir(path + '/layer_' + str(layer))

        example = 0
        tensors = []
        labels = []

        while example in range(len(outputs['hidden_states'][layer])):
            sp_token_position = 0
            for token in model_inputs['input_ids'][example]:
                if token == 0:
                    tensor = outputs['hidden_states'][layer][example][sp_token_position]
                    tensors.append(tensor)
                    break
                sp_token_position += 1

            label = [small_tokenized_ds['test']['text'][example], str(small_tokenized_ds['test']['label'][example])]
            labels.append(label)
            example += 1

        writer = SummaryWriter(path + '/layer_' + str(layer))
        writer.add_embedding(torch.stack(tensors).cpu(), metadata=labels, metadata_header=['Text', 'Emotion'])
        writer.close()

print("Done!")

Done!
