## Emotion classifier trainer

* Datasets: load_dataset("go_emotions") from the Hugging Face Datasets library to load and preprocess the data.
* Task: Single-label emotion classification problem (subset of emotions), where each text input is mapped to one primary emotion label.
* Model: BERT (bert-base-uncased), fine-tuned using Hugging Face’s Trainer API and BertForSequenceClassification.
* Goal: To detect student emotional tone (e.g., anger, sadness, disgust, fear, etc.) in open-ended queries or messages.
* The trained model is saved to models/emotionClassifier.

In [1]:
target_emotions = ["sadness", "grief", "fear", "remorse", "disappointment", "nervousness", "embarrassment" ]  # custom order: high severity first

In [2]:
# Load dataset
from datasets import load_dataset

goemo_dataset = load_dataset("go_emotions")

# Get label names (maps index to emotion string)
id2label = goemo_dataset['train'].features['labels'].feature.names
label2id = {label: i for i, label in enumerate(id2label)}

# Create emotion index map for only target emotions
target_label2id = {emotion: idx for idx, emotion in enumerate(target_emotions)}
target_id2label = {idx: emotion for emotion, idx in target_label2id.items()}
target_label_ids = {label2id[e] for e in target_emotions}

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import pandas as pd

# Convert to DataFrame
df = goemo_dataset["train"].to_pandas()

# Keep only rows with at least one target emotion
def get_first_target_label(labels):
    for l in labels:
        if l in target_label_ids:
            return id2label[l]
    return None

df['target_label'] = df['labels'].apply(get_first_target_label)
df = df[df['target_label'].notnull()].reset_index(drop=True)

# Map emotion string to class index 0-6
df['label'] = df['target_label'].map(target_label2id)
df = df[['text', 'label']]

df.head()

Unnamed: 0,text,label
0,To make her feel threatened,2
1,"Shit, I guess I accidentally bought a Pay-Per-...",6
2,"I never thought it was at the same moment, but...",4
3,i got a bump and a bald spot. i feel dumb <3,6
4,I miss them being alive,1


In [4]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df, test_size=0.1, stratify=df['label'], random_state=42)

In [5]:
from transformers import AutoTokenizer
from datasets import Dataset

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=128)

# Convert pandas back to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)

# Set format for PyTorch
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])


Map: 100%|██████████| 3555/3555 [00:00<00:00, 10259.11 examples/s]
Map: 100%|██████████| 395/395 [00:00<00:00, 9546.02 examples/s]


In [6]:
from transformers import BertForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import torch

# Model setup
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=len(target_emotions)
)

# Metrics
def compute_metrics(pred):
    logits, labels = pred
    preds = logits.argmax(axis=1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="weighted"),
        "precision": precision_score(labels, preds, average="weighted"),
        "recall": recall_score(labels, preds, average="weighted"),
    }

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
    greater_is_better=True,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=10,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=100,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

W0805 10:12:04.129910 1452 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
trainer.train()

  4%|▍         | 100/2230 [16:56<7:55:32, 13.40s/it]

{'loss': 1.5741, 'grad_norm': 4.606809616088867, 'learning_rate': 1.9103139013452916e-05, 'epoch': 0.45}


  9%|▉         | 200/2230 [32:43<4:29:28,  7.96s/it]

{'loss': 1.2477, 'grad_norm': 5.874758720397949, 'learning_rate': 1.820627802690583e-05, 'epoch': 0.9}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
                                                    
 10%|█         | 223/2230 [36:54<3:49:02,  6.85s/it]

{'eval_loss': 0.978505551815033, 'eval_accuracy': 0.660759493670886, 'eval_f1': 0.6408395905175219, 'eval_precision': 0.6346773599668077, 'eval_recall': 0.660759493670886, 'eval_runtime': 56.4848, 'eval_samples_per_second': 6.993, 'eval_steps_per_second': 0.124, 'epoch': 1.0}


 13%|█▎        | 300/2230 [48:27<4:50:19,  9.03s/it] 

{'loss': 0.8746, 'grad_norm': 9.37828254699707, 'learning_rate': 1.7309417040358745e-05, 'epoch': 1.35}


 18%|█▊        | 400/2230 [1:01:24<4:27:05,  8.76s/it]

{'loss': 0.767, 'grad_norm': 8.703765869140625, 'learning_rate': 1.641255605381166e-05, 'epoch': 1.79}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
                                                      
 20%|██        | 446/2230 [1:09:01<3:25:30,  6.91s/it]

{'eval_loss': 0.8383501768112183, 'eval_accuracy': 0.7113924050632912, 'eval_f1': 0.7034793684184795, 'eval_precision': 0.7004218020032112, 'eval_recall': 0.7113924050632912, 'eval_runtime': 56.3653, 'eval_samples_per_second': 7.008, 'eval_steps_per_second': 0.124, 'epoch': 2.0}


 22%|██▏       | 500/2230 [1:16:17<4:02:39,  8.42s/it] 

{'loss': 0.7078, 'grad_norm': 6.31528902053833, 'learning_rate': 1.5515695067264575e-05, 'epoch': 2.24}


 27%|██▋       | 600/2230 [1:30:06<4:03:12,  8.95s/it]

{'loss': 0.5921, 'grad_norm': 17.561147689819336, 'learning_rate': 1.461883408071749e-05, 'epoch': 2.69}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
                                                      
 30%|███       | 669/2230 [1:39:23<2:22:56,  5.49s/it]

{'eval_loss': 0.9166675209999084, 'eval_accuracy': 0.6860759493670886, 'eval_f1': 0.6806512402521872, 'eval_precision': 0.6919857874995372, 'eval_recall': 0.6860759493670886, 'eval_runtime': 44.5548, 'eval_samples_per_second': 8.865, 'eval_steps_per_second': 0.157, 'epoch': 3.0}


 31%|███▏      | 700/2230 [1:43:34<2:59:10,  7.03s/it]

{'loss': 0.5159, 'grad_norm': 10.167298316955566, 'learning_rate': 1.3721973094170404e-05, 'epoch': 3.14}


 36%|███▌      | 800/2230 [1:57:39<3:32:02,  8.90s/it]

{'loss': 0.3986, 'grad_norm': 8.780031204223633, 'learning_rate': 1.2825112107623318e-05, 'epoch': 3.59}


                                                      
 40%|████      | 892/2230 [2:11:43<2:04:33,  5.59s/it]

{'eval_loss': 0.9763070940971375, 'eval_accuracy': 0.6835443037974683, 'eval_f1': 0.6833893862514774, 'eval_precision': 0.6918313760812048, 'eval_recall': 0.6835443037974683, 'eval_runtime': 45.1653, 'eval_samples_per_second': 8.746, 'eval_steps_per_second': 0.155, 'epoch': 4.0}


 40%|████      | 892/2230 [2:11:57<3:17:55,  8.88s/it]

{'train_runtime': 7917.0235, 'train_samples_per_second': 4.49, 'train_steps_per_second': 0.282, 'train_loss': 0.7890615463256836, 'epoch': 4.0}





TrainOutput(global_step=892, training_loss=0.7890615463256836, metrics={'train_runtime': 7917.0235, 'train_samples_per_second': 4.49, 'train_steps_per_second': 0.282, 'total_flos': 935401792896000.0, 'train_loss': 0.7890615463256836, 'epoch': 4.0})

In [8]:
trainer.save_model("../../models/emotionClassifier")
tokenizer.save_pretrained("../../models/emotionClassifier")

('../../models/emotionClassifier\\tokenizer_config.json',
 '../../models/emotionClassifier\\special_tokens_map.json',
 '../../models/emotionClassifier\\vocab.txt',
 '../../models/emotionClassifier\\added_tokens.json',
 '../../models/emotionClassifier\\tokenizer.json')