In [60]:
# 2. Imports
import torch
import numpy as np
from datasets import load_dataset, Array2D
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
import evaluate

import evaluate


# 3. Device check (MPS = Apple GPU)
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple Silicon GPU (MPS)")
else:
    device = torch.device("cpu")
    print("⚠️ MPS not available, using CPU")

# 3. Load dataset
dataset = load_dataset("go_emotions")

# 4. Load tokenizer and model
model_name = "mental/mental-bert-base-uncased"   # MentalBERT base
tokenizer = AutoTokenizer.from_pretrained(model_name)

num_labels = 28   # 27 emotions + Neutral

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    problem_type="multi_label_classification"
).to(device)   # move model to GPU if available



Using Apple Silicon GPU (MPS)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [61]:
# 6. Tokenization
def tokenize(batch):
    return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=128)

encoded = dataset.map(tokenize, batched=True)

def one_hot_labels(batch):
    multi_hot = []
    for labels in batch["labels"]:
        label_vec = np.zeros(num_labels, dtype=np.float32)  # This is correct
        for l in labels:
            label_vec[l] = 1.0
        multi_hot.append(label_vec)
    batch["labels"] = multi_hot
    return batch

# Re-run the mapping
encoded = dataset.map(tokenize, batched=True)
encoded = encoded.map(one_hot_labels, batched=True)

# IMPORTANT: Explicitly specify dtype when setting format
encoded.set_format(
    type="torch", 
    columns=["input_ids", "attention_mask", "labels"],
    output_all_columns=False
)

# Cast labels to float32 explicitly
def cast_labels_to_float(example):
    example["labels"] = example["labels"].float()
    return example

encoded = encoded.map(cast_labels_to_float)

In [59]:
print(encoded["train"][0]["labels"].shape, encoded["train"][0]["labels"].dtype)


torch.Size([28]) torch.int64


In [62]:
# 7. Metrics
f1_metric = evaluate.load("f1")
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = 1 / (1 + np.exp(-logits))
    preds = (probs > 0.5).astype(int)
    labels = labels.astype(int)
    
    return {
        "f1": f1_metric.compute(predictions=preds.flatten(), 
                                references=labels.flatten(), 
                                average="micro")["f1"],
    }


In [63]:
# 3. Device check (MPS = Apple GPU)
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple Silicon GPU (MPS)")
else:
    device = torch.device("cpu")
    print("⚠️ MPS not available, using CPU")

Using Apple Silicon GPU (MPS)


In [66]:
# Add this RIGHT AFTER your imports (at the top with other imports)
from dataclasses import dataclass
from typing import Any, Dict, List

@dataclass
class MultiLabelCollator:
    tokenizer: AutoTokenizer
    
    def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
        # Extract labels - they're already tensors, so stack them
        labels = torch.stack([f["labels"] for f in features]).float()
        
        # Prepare batch for input_ids and attention_mask
        batch = {
            "input_ids": torch.stack([f["input_ids"] for f in features]),
            "attention_mask": torch.stack([f["attention_mask"] for f in features]),
        }
        
        batch["labels"] = labels
        return batch

In [67]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./mentalbert-goemotions",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,  # Reduce to 8 if you get memory errors on MPS
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    save_total_limit=2,  # Only keep 2 best checkpoints
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded["train"],
    eval_dataset=encoded["validation"],
    tokenizer=tokenizer,
    data_collator=MultiLabelCollator(tokenizer=tokenizer),  # ADD THIS LINE
    compute_metrics=compute_metrics,
)

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,F1
1,0.0901,0.088997,0.969571
2,0.0815,0.083399,0.970308
3,0.0726,0.083867,0.96992




TrainOutput(global_step=8142, training_loss=0.09069550095949147, metrics={'train_runtime': 44933.023, 'train_samples_per_second': 2.898, 'train_steps_per_second': 0.181, 'total_flos': 8568237917583360.0, 'train_loss': 0.09069550095949147, 'epoch': 3.0})

In [None]:
# Save the final model
trainer.save_model("./mentalbert-goemotions-final")
tokenizer.save_pretrained("./mentalbert-goemotions-final")

In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Load the saved model and tokenizer
model_path = "./mentalbert-goemotions-final"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Create the pipeline
emotion_classifier = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=-1,  # CPU
    top_k=None  # Return all emotion scores
)


Device set to use cpu


In [7]:


# Define emotion label mapping
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 
    'caring', 'confusion', 'curiosity', 'desire', 'disappointment',
    'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
    'gratitude', 'grief', 'joy', 'love', 'nervousness',
    'optimism', 'pride', 'realization', 'relief', 'remorse',
    'sadness', 'surprise', 'neutral'
]

# Now test with clearer examples
test_texts = [
    "I'm so angry and frustrated about losing my job",
    "I feel hopeful and excited about my new opportunity",
    "I'm terrified about being evicted next week",
    "I'm so stressed about rent and bills, can't sleep anymore"
    'Okay so I have had toxic family in the sense of my dad and lots of other relatives but I fixed my dead and don‚Äôt give a fuck bout those relatives btw I‚Äôm Indian Punjabi specifically. So I had this cousin who seemed like a good guy until I found out he did drugs and did stuff like stealing from some big companies by means of fraud. So he wanted to go to India but the visa was closed down so he came to us and then we welcomed him and I saw he was much worse I was a clean respectable person who didn‚Äôt look at woman as eye candy like him scrolling on IG to keep seeing them and chasing girls I could insult him more but cause I have immense respect for his dad I don‚Äôt so yeah and like he wasted a lot of money on him to eat outside even though we don‚Äôt eat that much and got my dad drinking and as drunk fuck broke my dads nose then he as the audacity to call me a fat ass and think it‚Äôs fine to joke when it isn‚Äôt ( we are in India right now) and starts causing fights with me and when I accuse him of something every one believes him but not me sorry bout the gramma I‚Äôm tired and angry any ways my dad can‚Äôt work and my mom has to pull all the weight so I was like let‚Äôs do a side hustle to help her but my twisted of a fuck cousin stops me from finishing it and wrestles me down letting me go then tries to explain why it‚Äôs so so bad even though I know it‚Äôs a legit course than when I want be left alone he starts to play dad and explain to me when he‚Äôs no good so I left again what should I do for my goals also handle him and cause he makes fun of me (fyi why parents like him is cause mom left him with mine when he was 10 for 2 years.'
]

for text in test_texts:
    result = emotion_classifier(text)
    top_3 = sorted(result[0], key=lambda x: x['score'], reverse=True)[:3]
    print(f"\nText: {text}")
    for r in top_3:
        emotion = emotion_labels[int(r['label'].split('_')[1])]
        print(f"  {emotion}: {r['score']:.3f}")


Text: I'm so angry and frustrated about losing my job
  anger: 0.639
  annoyance: 0.396
  disgust: 0.067

Text: I feel hopeful and excited about my new opportunity
  excitement: 0.529
  optimism: 0.192
  joy: 0.111

Text: I'm terrified about being evicted next week
  fear: 0.733
  neutral: 0.057
  nervousness: 0.055

Text: I'm so stressed about rent and bills, can't sleep anymoreOkay so I have had toxic family in the sense of my dad and lots of other relatives but I fixed my dead and don‚Äôt give a fuck bout those relatives btw I‚Äôm Indian Punjabi specifically. So I had this cousin who seemed like a good guy until I found out he did drugs and did stuff like stealing from some big companies by means of fraud. So he wanted to go to India but the visa was closed down so he came to us and then we welcomed him and I saw he was much worse I was a clean respectable person who didn‚Äôt look at woman as eye candy like him scrolling on IG to keep seeing them and chasing girls I could insult hi