# Phase 1

## load the dataset (emotion)

In [1]:
from datasets import load_dataset
raw_datasets = load_dataset("emotion")

In [2]:
raw_datasets

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

In [3]:
raw_train_dataset = raw_datasets["train"]
raw_train_dataset[0]

{'text': 'i didnt feel humiliated', 'label': 0}

In [4]:
raw_train_dataset.features

{'text': Value(dtype='string', id=None),
 'label': ClassLabel(names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], id=None)}

## preprossing

In [5]:
# ...existing code...
from collections import Counter

# Get label names
label_names = raw_datasets['train'].features['label'].names

# Count label occurrences
labels = [example['label'] for example in raw_train_dataset]
label_counts = Counter(labels)

# Print class distribution with label names
for label_id, count in label_counts.items():
    print(f"{label_names[label_id]} ({label_id}): {count}")
# ...existing code...

sadness (0): 4666
anger (3): 2159
love (2): 1304
surprise (5): 572
fear (4): 1937
joy (1): 5362


In [6]:
# Get all unique labels in the training set
unique_labels = set(labels)
print("Unique labels in the training set:", unique_labels)


Unique labels in the training set: {0, 1, 2, 3, 4, 5}


In [7]:
# statistics
print(f"Number of training samples: {len(raw_datasets['train'])}")
print(f"Number of validation samples: {len(raw_datasets['validation'])}")
print(f"Number of test samples: {len(raw_datasets['test'])}")
print(f"Number of classes: {len(label_names)}")
print(f"Classes: {label_names}")
print(f"Sample text: {raw_datasets['train'][0]['text']}")


Number of training samples: 16000
Number of validation samples: 2000
Number of test samples: 2000
Number of classes: 6
Classes: ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']
Sample text: i didnt feel humiliated


In [8]:
# text lengths statistics
text_lengths = [len(example['text'].split()) for example in raw_train_dataset]
import numpy as np
print(f"Average text length (in words): {np.mean(text_lengths):.2f}")
print(f"Median text length (in words): {np.median(text_lengths):.2f}")
print(f"Standard deviation of text length (in words): {np.std(text_lengths):.2f}")
# max text length
print(f"Max text length (in words): {np.max(text_lengths)}")
# min text length
print(f"Min text length (in words): {np.min(text_lengths)}")

Average text length (in words): 19.17
Median text length (in words): 17.00
Standard deviation of text length (in words): 10.99
Max text length (in words): 66
Min text length (in words): 2


# Phase 2

In [12]:


# Phase 2: Model Fine-Tuning & Evaluation

## 1. Import required libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import numpy as np
import evaluate
from sklearn.metrics import classification_report, confusion_matrix

In [57]:

## 2. Load the tokenizer and model
model_checkpoint = "distilbert-base-uncased"  # or "prajjwal1/bert-tiny" for smaller model
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=6)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [58]:
 #3 dynamic padding
from transformers import DataCollatorWithPadding
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True)
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
tokenized_datasets["train"][0]

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

{'text': 'i didnt feel humiliated',
 'label': 0,
 'input_ids': [101, 1045, 2134, 2102, 2514, 26608, 102],
 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}

In [59]:
#4 Remove the 'text' column from all splits
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_datasets["train"][0]

{'label': tensor(0),
 'input_ids': tensor([  101,  1045,  2134,  2102,  2514, 26608,   102]),
 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1])}

In [60]:
#5 define training arguments
# import TrainingArguments
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    # increase logging steps if needed

)

In [61]:
# define metrics function
from sklearn.metrics import f1_score, accuracy_score, classification_report

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    macro_f1 = f1_score(labels, predictions, average="macro")
    report = classification_report(labels, predictions, output_dict=True)
    return {
        "accuracy": acc,
        "macro_f1": macro_f1,
        "f1_sadness": report["0"]["f1-score"],
        "f1_joy": report["1"]["f1-score"],
        "f1_love": report["2"]["f1-score"],
        "f1_anger": report["3"]["f1-score"],
        "f1_fear": report["4"]["f1-score"],
        "f1_surprise": report["5"]["f1-score"],
    }

In [None]:
# 7 intialize trainer

from transformers import EarlyStoppingCallback

# Add callbacks to the Trainer
callbacks = [EarlyStoppingCallback(early_stopping_patience=)]

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    callbacks=callbacks,
    compute_metrics=compute_metrics,
)

In [63]:
# 8 train the model
trainer.train()     



Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,F1 Sadness,F1 Joy,F1 Love,F1 Anger,F1 Fear,F1 Surprise
1,0.24,0.215586,0.929,0.902907,0.962298,0.948884,0.87027,0.922787,0.871671,0.84153
2,0.1499,0.171035,0.9425,0.920567,0.965642,0.962224,0.893855,0.936248,0.892019,0.873418




TrainOutput(global_step=4000, training_loss=0.28264632987976074, metrics={'train_runtime': 6457.6216, 'train_samples_per_second': 4.955, 'train_steps_per_second': 0.619, 'total_flos': 342206328271392.0, 'train_loss': 0.28264632987976074, 'epoch': 2.0})

In [64]:
# evalute the model
trainer.evaluate(tokenized_datasets["validation"])
trainer.evaluate(tokenized_datasets["test"])





{'eval_loss': 0.2075832188129425,
 'eval_accuracy': 0.9285,
 'eval_macro_f1': 0.8887397158369915,
 'eval_f1_sadness': 0.9611063094209161,
 'eval_f1_joy': 0.9525862068965517,
 'eval_f1_love': 0.8411214953271028,
 'eval_f1_anger': 0.9183303085299456,
 'eval_f1_fear': 0.8923766816143498,
 'eval_f1_surprise': 0.7669172932330827,
 'eval_runtime': 88.974,
 'eval_samples_per_second': 22.478,
 'eval_steps_per_second': 2.81,
 'epoch': 2.0}

In [65]:
# detailed metrics & confusion matrix
preds = trainer.predict(tokenized_datasets["test"])
y_true = preds.label_ids
y_pred = np.argmax(preds.predictions, axis=1)
print(classification_report(y_true, y_pred, target_names=label_names))
print(confusion_matrix(y_true, y_pred))



              precision    recall  f1-score   support

     sadness       0.97      0.96      0.96       581
         joy       0.95      0.95      0.95       695
        love       0.83      0.85      0.84       159
       anger       0.92      0.92      0.92       275
        fear       0.90      0.89      0.89       224
    surprise       0.76      0.77      0.77        66

    accuracy                           0.93      2000
   macro avg       0.89      0.89      0.89      2000
weighted avg       0.93      0.93      0.93      2000

[[556   6   0  12   7   0]
 [  1 663  27   2   0   2]
 [  0  22 135   2   0   0]
 [ 12   3   0 253   7   0]
 [  4   0   0   7 199  14]
 [  3   3   0   0   9  51]]


In [66]:
# save the model and tokenizer
model.save_pretrained("./sentiment_model")
tokenizer.save_pretrained("./sentiment_model")

('./sentiment_model\\tokenizer_config.json',
 './sentiment_model\\special_tokens_map.json',
 './sentiment_model\\vocab.txt',
 './sentiment_model\\added_tokens.json',
 './sentiment_model\\tokenizer.json')