<a href="https://colab.research.google.com/github/kamranr123/kamranr123.github.io/blob/master/fine_tune_multilingual_bert_on_emotions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Install dependencies
!pip install -q transformers datasets evaluate accelerate

In [None]:
!wget -P /content https://github.com/nazaninsbr/Persian-Emotion-Detection/raw/refs/heads/main/dataset.csv

In [1]:
import os
import pandas as pd
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch
import evaluate

# Disable W&B
os.environ["WANDB_DISABLED"] = "true"


In [95]:
# Load CSV
df = pd.read_csv("/content/dataset.csv")

# Emotion label columns
label_cols = ["Anger", "Fear", "Happiness", "Hatred", "Sadness", "Wonder"]

# Convert vote counts -> binary labels (≥3 = 1)
# df[label_cols] = df[label_cols].map(lambda x: 1 if x >= 2 else 0)
df[label_cols] = df[label_cols].map(lambda x: x * 0.2)

# Add Neutral label
df["Neutral"] = df[label_cols].apply(lambda row: 1 if row.sum() <= 0.1 else 0, axis=1)
# Add Neutral label (1 if all others are 0)
# df["Neutral"] = df[label_cols].apply(lambda row: 1 if row.sum() == 0 else 0, axis=1)

# Final label list
final_labels = label_cols + ["Neutral"]
df[final_labels] = df[final_labels].apply(lambda row: row / row.sum(), axis=1)
# df[final_labels] = df[final_labels].clip(0.0, 1.0)
num_labels = len(final_labels)

# Pack labels into list and convert to float
df["labels"] = df[final_labels].apply(lambda row: [float(x) for x in row], axis=1)

# Keep only text + labels
df = df[["text", "labels"]]

# Train/validation split
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Create Hugging Face datasets
train_ds = Dataset.from_pandas(train_df.reset_index(drop=True))
val_ds   = Dataset.from_pandas(val_df.reset_index(drop=True))

dataset = DatasetDict({"train": train_ds, "validation": val_ds})

# Optional: set torch format
# dataset.set_format(type="torch", columns=["text", "labels"])

# # for test
small_train_ds = dataset["train"].select(range(min(256, len(dataset["train"]))))
small_val_ds   = dataset["validation"].select(range(min(256, len(dataset["validation"]))))

# Create a small dataset dict
small_dataset = DatasetDict({
    "train": small_train_ds,
    "validation": small_val_ds
})

print(f"Number of labels: {num_labels}")
print(f"Classes: {final_labels}")


Number of labels: 7
Classes: ['Anger', 'Fear', 'Happiness', 'Hatred', 'Sadness', 'Wonder', 'Neutral']


In [None]:
for i in range(10):
  print(dataset["train"][i]['labels'])

In [96]:
model_name = "google-bert/bert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)

dataset = dataset.map(tokenize, batched=True)
dataset = dataset.remove_columns(["text"])

small_dataset = small_dataset.map(tokenize, batched=True)
small_dataset = small_dataset.remove_columns(["text"])

Map:   0%|          | 0/24000 [00:00<?, ? examples/s]

Map:   0%|          | 0/6000 [00:00<?, ? examples/s]

Map:   0%|          | 0/256 [00:00<?, ? examples/s]

Map:   0%|          | 0/256 [00:00<?, ? examples/s]

In [82]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    problem_type="multi_label_classification"
)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [93]:
metric_f1 = evaluate.load("f1")
metric_acc = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = (torch.sigmoid(torch.tensor(logits)) > 0.5).int().numpy()
    labels = labels.astype(int)
    f1 = metric_f1.compute(predictions=preds, references=labels, average="macro")["f1"]
    acc = metric_acc.compute(predictions=preds, references=labels)["accuracy"]
    return {"accuracy": acc, "f1_macro": f1}


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [84]:
import shutil
shutil.rmtree('/content/bert-persian-emotions')

In [85]:
from transformers import TrainingArguments, DataCollatorWithPadding

args = TrainingArguments(
    output_dir="/content/bert-persian-emotions",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,  # effective batch = 16
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    fp16=True,                     # mixed precision
    report_to="none",                # disables W&B
    optim="adamw_torch_fused"           # faster optimizer if CUDA 11.7+
)


In [98]:
from transformers import Trainer
import torch.nn as nn

class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        # Extract labels
        labels = inputs.pop("labels").float()  # shape: [batch_size, num_labels]

        # Forward pass
        outputs = model(**inputs)
        logits = outputs.logits

        # Weighted BCE loss
        # loss_fct = nn.BCEWithLogitsLoss(pos_weight=pos_weights.to(logits.device))
        loss_fct = nn.BCEWithLogitsLoss()
        loss = loss_fct(logits, labels)

        return (loss, outputs) if return_outputs else loss


trainer = WeightedTrainer(
    model=model,
    args=args,
    train_dataset=small_dataset["train"],
    eval_dataset=small_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics  # your F1/multi-label metrics
)


trainer.train()

  trainer = WeightedTrainer(


Epoch,Training Loss,Validation Loss


ValueError: Predictions and/or references don't match the expected format.
Expected format: {'predictions': Value('int32'), 'references': Value('int32')},
Input predictions: [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]],
Input references: [[0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]

In [None]:
texts = [
    "من امروز خیلی خوشحالم",      # Happy
    "احساس می‌کنم ناراحت و خسته‌ام", # Sad
    "از تاریکی می‌ترسم",           # Fear
    "قدم زدن زیر بارون شاید بهترین مسکن درد هاست..." # Neutral
]

inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=128)
outputs = model(**inputs)
probs = torch.sigmoid(outputs.logits).detach().numpy()

for text, p in zip(texts, probs):
    labels_pred = [final_labels[i] for i, v in enumerate(p) if v > 0.5]
    print(text, "->", labels_pred)


In [92]:
import numpy as np
from sklearn.metrics import f1_score, classification_report

# collect true labels and preds on validation set
preds_logits = trainer.predict(dataset["validation"]).predictions  # raw logits
probs = 1 / (1 + np.exp(-preds_logits))
preds = (probs > 0.5).astype(int)

# binarize references (soft labels → 0/1)
refs = np.stack([ex["labels"] for ex in dataset["validation"]])
refs_bin = (refs >= 0.5).astype(int)

# overall per-label counts
pos_counts = refs_bin.sum(axis=0)
neg_counts = refs_bin.shape[0] - pos_counts
print("pos counts per label:", pos_counts)
print("neg counts per label:", neg_counts)

# per-label f1
for i, name in enumerate(final_labels):
    print(name, "F1:", f1_score(refs_bin[:, i], preds[:, i], zero_division=0))

# full classification report
print(classification_report(refs_bin, preds, zero_division=0))


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.3905,0.391475,0.0


pos counts per label: [543 422 432 490 681 440  76]
neg counts per label: [5457 5578 5568 5510 5319 5560 5924]
Anger F1: 0.0
Fear F1: 0.0
Happiness F1: 0.0
Hatred F1: 0.0
Sadness F1: 0.0
Wonder F1: 0.0
Neutral F1: 0.0
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       543
           1       0.00      0.00      0.00       422
           2       0.00      0.00      0.00       432
           3       0.00      0.00      0.00       490
           4       0.00      0.00      0.00       681
           5       0.00      0.00      0.00       440
           6       0.00      0.00      0.00        76

   micro avg       0.00      0.00      0.00      3084
   macro avg       0.00      0.00      0.00      3084
weighted avg       0.00      0.00      0.00      3084
 samples avg       0.00      0.00      0.00      3084



In [None]:
from sklearn.metrics import f1_score
# macro F1:
macro = f1_score(refs, preds, average="macro", zero_division=0)
micro = f1_score(refs, preds, average="micro", zero_division=0)
print("macro, micro:", macro, micro)


In [97]:
import numpy as np

# Convert Hugging Face dataset into numpy array of labels
all_labels = np.stack(dataset["train"]["labels"])  # shape: (num_samples, num_labels)

# Count positives and negatives per label
pos_counts = all_labels.sum(axis=0)
neg_counts = all_labels.shape[0] - pos_counts

print("pos counts per label:", pos_counts)
print("neg counts per label:", neg_counts)

# If you want mapping to label names:
for name, pos, neg in zip(final_labels, pos_counts, neg_counts):
    print(f"{name:10s} | pos: {int(pos):5d} | neg: {int(neg):5d}")


pos counts per label: [4435.79776857 3377.90284005 3411.00137803 4083.14986549 4608.76150663
 3826.38664122  257.        ]
neg counts per label: [19564.20223143 20622.09715995 20588.99862197 19916.85013451
 19391.23849337 20173.61335878 23743.        ]
Anger      | pos:  4435 | neg: 19564
Fear       | pos:  3377 | neg: 20622
Happiness  | pos:  3411 | neg: 20588
Hatred     | pos:  4083 | neg: 19916
Sadness    | pos:  4608 | neg: 19391
Wonder     | pos:  3826 | neg: 20173
Neutral    | pos:   257 | neg: 23743
