## English only

In [4]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import classification_report


# Hyperparameters and Config
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 3
LEARNING_RATE = 2e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INCLUDE_DISGUST = False  # Set to False for English-only

# Emotion labels
emotions = ['anger', 'fear', 'joy', 'sadness', 'surprise']
if INCLUDE_DISGUST:
    emotions.append('disgust')

# Dataset Class
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.FloatTensor(label)
        }

# BERT-based Classifier
class EmotionClassifier(nn.Module):
    def __init__(self, n_classes):
        super(EmotionClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.bert.config.hidden_size, n_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        pooled_output = outputs.pooler_output
        output = self.drop(pooled_output)
        return self.out(output)

def train_epoch(model, data_loader, loss_fn, optimizer, device):
    model = model.train()
    losses = []
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = loss_fn(outputs, labels)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    return np.mean(losses)

def eval_model(model, data_loader, device):
    model = model.eval()
    predictions = []
    real_labels = []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.sigmoid(outputs)
            predictions.extend(preds.cpu().numpy())
            real_labels.extend(labels.cpu().numpy())
    return np.array(predictions), np.array(real_labels)

# Data Loader
def load_data(file_path, emotions):
    df = pd.read_csv(file_path)
    texts = df['text'].tolist()
    labels = df[emotions].values
    return texts, labels

def main():
    tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
    train_texts, train_labels = load_data('train_eng.csv', emotions)
    val_texts, val_labels = load_data('dev_eng.csv', emotions)

    train_dataset = EmotionDataset(train_texts, train_labels, tokenizer, MAX_LEN)
    val_dataset = EmotionDataset(val_texts, val_labels, tokenizer, MAX_LEN)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

    model = EmotionClassifier(n_classes=len(emotions)).to(DEVICE)

    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

    for epoch in range(EPOCHS):
        train_loss = train_epoch(model, train_loader, loss_fn, optimizer, DEVICE)
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss:.4f}")

    preds, targets = eval_model(model, val_loader, DEVICE)
    bin_preds = (preds >= 0.5).astype(int)

    print("F1-macro:", f1_score(targets, bin_preds, average='macro'))
    print("Precision-macro:", precision_score(targets, bin_preds, average='macro'))
    print("Recall-macro:", recall_score(targets, bin_preds, average='macro'))

    print("\nDetailed Classification Report:")
    print(classification_report(targets, bin_preds, target_names=emotions, zero_division=0))

    torch.save(model.state_dict(), "emotion_model.pt")

if __name__ == "__main__":
    main()


Epoch 1/3, Loss: 0.5524
Epoch 2/3, Loss: 0.4681
Epoch 3/3, Loss: 0.3698
F1-macro: 0.6423326225861329
Precision-macro: 0.7417090384091819
Recall-macro: 0.6058960573476703

Detailed Classification Report:
              precision    recall  f1-score   support

       anger       0.86      0.38      0.52        16
        fear       0.68      0.79      0.73        63
         joy       0.76      0.42      0.54        31
     sadness       0.71      0.83      0.76        35
    surprise       0.70      0.61      0.66        31

   micro avg       0.70      0.66      0.68       176
   macro avg       0.74      0.61      0.64       176
weighted avg       0.72      0.66      0.67       176
 samples avg       0.61      0.58      0.58       176



## Addition of

- Focal Loss (for class imbalance)

- Weighted Sampling (to oversample underrepresented emotions)

- Per-Class Threshold Tuning (for best F1 per class)

In [15]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, precision_recall_curve

MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 3
LEARNING_RATE = 2e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INCLUDE_DISGUST = False

emotions = ['anger', 'fear', 'joy', 'sadness', 'surprise']
if INCLUDE_DISGUST:
    emotions.append('disgust')

class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.FloatTensor(label)
        }

class EmotionClassifier(nn.Module):
    def __init__(self, n_classes):
        super(EmotionClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.bert.config.hidden_size, n_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        output = self.drop(pooled_output)
        return self.out(output)

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.bce = nn.BCEWithLogitsLoss(reduction='none')

    def forward(self, inputs, targets):
        BCE_loss = self.bce(inputs, targets)
        pt = torch.exp(-BCE_loss)
        focal_loss = self.alpha * ((1 - pt) ** self.gamma) * BCE_loss
        return focal_loss.mean()

def train_epoch(model, data_loader, loss_fn, optimizer, device):
    model = model.train()
    losses = []
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = loss_fn(outputs, labels)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    return np.mean(losses)

def eval_model(model, data_loader, device):
    model = model.eval()
    predictions = []
    real_labels = []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.sigmoid(outputs)
            predictions.extend(preds.cpu().numpy())
            real_labels.extend(labels.cpu().numpy())
    return np.array(predictions), np.array(real_labels)

def get_weighted_sampler(labels):
    label_counts = np.sum(labels, axis=0)
    class_weights = 1. / (label_counts + 1e-6)
    sample_weights = []
    for label in labels:
        weight = np.sum(class_weights[label == 1])
        sample_weights.append(weight)
    return WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

def get_optimal_thresholds(preds, targets):
    thresholds = []
    for i in range(preds.shape[1]):
        precision, recall, thresh = precision_recall_curve(targets[:, i], preds[:, i])
        f1 = 2 * precision * recall / (precision + recall + 1e-8)
        best_thresh = thresh[np.argmax(f1)] if len(thresh) > 0 else 0.5
        thresholds.append(best_thresh)
    return np.array(thresholds)

def load_data(file_path, emotions):
    df = pd.read_csv(file_path)
    texts = df['text'].tolist()
    labels = df[emotions].values
    return texts, labels

def main():
    tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

    train_texts, train_labels = load_data('train_eng.csv', emotions)
    val_texts, val_labels = load_data('dev_eng.csv', emotions)

    train_dataset = EmotionDataset(train_texts, train_labels, tokenizer, MAX_LEN)
    val_dataset = EmotionDataset(val_texts, val_labels, tokenizer, MAX_LEN)

    sampler = get_weighted_sampler(train_labels)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    model = EmotionClassifier(n_classes=len(emotions)).to(DEVICE)

    loss_fn = FocalLoss(gamma=2)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

    for epoch in range(EPOCHS):
        train_loss = train_epoch(model, train_loader, loss_fn, optimizer, DEVICE)
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss:.4f}")

    preds, targets = eval_model(model, val_loader, DEVICE)

    thresholds = get_optimal_thresholds(preds, targets)
    bin_preds = (preds >= thresholds).astype(int)

    print("F1-macro:", f1_score(targets, bin_preds, average='macro'))
    print("Precision-macro:", precision_score(targets, bin_preds, average='macro'))
    print("Recall-macro:", recall_score(targets, bin_preds, average='macro'))

    print("\nDetailed Classification Report:")
    print(classification_report(targets, bin_preds, target_names=emotions, zero_division=0))

    torch.save(model.state_dict(), "emotion_model.pt")

if __name__ == "__main__":
    main()


Epoch 1/3, Loss: 0.1468
Epoch 2/3, Loss: 0.1029
Epoch 3/3, Loss: 0.0751
F1-macro: 0.6908847992665802
Precision-macro: 0.7016922406746644
Recall-macro: 0.7135176651305684

Detailed Classification Report:
              precision    recall  f1-score   support

       anger       0.80      0.50      0.62        16
        fear       0.65      0.95      0.77        63
         joy       0.71      0.65      0.68        31
     sadness       0.64      0.86      0.73        35
    surprise       0.70      0.61      0.66        31

   micro avg       0.67      0.78      0.72       176
   macro avg       0.70      0.71      0.69       176
weighted avg       0.68      0.78      0.71       176
 samples avg       0.66      0.70      0.66       176



In [16]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, precision_recall_curve

MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 2e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INCLUDE_DISGUST = False

emotions = ['anger', 'fear', 'joy', 'sadness', 'surprise']
if INCLUDE_DISGUST:
    emotions.append('disgust')

class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.FloatTensor(label)
        }

class EmotionClassifier(nn.Module):
    def __init__(self, n_classes):
        super(EmotionClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.bert.config.hidden_size, n_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        output = self.drop(pooled_output)
        return self.out(output)

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.bce = nn.BCEWithLogitsLoss(reduction='none')

    def forward(self, inputs, targets):
        BCE_loss = self.bce(inputs, targets)
        pt = torch.exp(-BCE_loss)
        focal_loss = self.alpha * ((1 - pt) ** self.gamma) * BCE_loss
        return focal_loss.mean()

def train_epoch(model, data_loader, loss_fn, optimizer, device):
    model = model.train()
    losses = []
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = loss_fn(outputs, labels)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    return np.mean(losses)

def eval_model(model, data_loader, device):
    model = model.eval()
    predictions = []
    real_labels = []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.sigmoid(outputs)
            predictions.extend(preds.cpu().numpy())
            real_labels.extend(labels.cpu().numpy())
    return np.array(predictions), np.array(real_labels)

def get_weighted_sampler(labels):
    label_counts = np.sum(labels, axis=0)
    class_weights = 1. / (label_counts + 1e-6)
    sample_weights = []
    for label in labels:
        weight = np.sum(class_weights[label == 1])
        sample_weights.append(weight)
    return WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

def get_optimal_thresholds(preds, targets):
    thresholds = []
    for i in range(preds.shape[1]):
        precision, recall, thresh = precision_recall_curve(targets[:, i], preds[:, i])
        f1 = 2 * precision * recall / (precision + recall + 1e-8)
        best_thresh = thresh[np.argmax(f1)] if len(thresh) > 0 else 0.5
        thresholds.append(best_thresh)
    return np.array(thresholds)

def load_data(file_path, emotions):
    df = pd.read_csv(file_path)
    texts = df['text'].tolist()
    labels = df[emotions].values
    return texts, labels

def main():
    tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

    train_texts, train_labels = load_data('train_eng.csv', emotions)
    val_texts, val_labels = load_data('dev_eng.csv', emotions)

    train_dataset = EmotionDataset(train_texts, train_labels, tokenizer, MAX_LEN)
    val_dataset = EmotionDataset(val_texts, val_labels, tokenizer, MAX_LEN)

    sampler = get_weighted_sampler(train_labels)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    model = EmotionClassifier(n_classes=len(emotions)).to(DEVICE)

    loss_fn = FocalLoss(gamma=2)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

    for epoch in range(EPOCHS):
        train_loss = train_epoch(model, train_loader, loss_fn, optimizer, DEVICE)
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss:.4f}")

    preds, targets = eval_model(model, val_loader, DEVICE)

    thresholds = get_optimal_thresholds(preds, targets)
    bin_preds = (preds >= thresholds).astype(int)

    print("F1-macro:", f1_score(targets, bin_preds, average='macro'))
    print("Precision-macro:", precision_score(targets, bin_preds, average='macro'))
    print("Recall-macro:", recall_score(targets, bin_preds, average='macro'))

    print("\nDetailed Classification Report:")
    print(classification_report(targets, bin_preds, target_names=emotions, zero_division=0))

    torch.save(model.state_dict(), "emotion_model.pt")

if __name__ == "__main__":
    main()


Epoch 1/10, Loss: 0.1505
Epoch 2/10, Loss: 0.1094
Epoch 3/10, Loss: 0.0752
Epoch 4/10, Loss: 0.0548
Epoch 5/10, Loss: 0.0405
Epoch 6/10, Loss: 0.0278
Epoch 7/10, Loss: 0.0218
Epoch 8/10, Loss: 0.0199
Epoch 9/10, Loss: 0.0148
Epoch 10/10, Loss: 0.0108
F1-macro: 0.6766185007280898
Precision-macro: 0.6518483576838736
Recall-macro: 0.7288376856118791

Detailed Classification Report:
              precision    recall  f1-score   support

       anger       0.73      0.50      0.59        16
        fear       0.67      0.89      0.77        63
         joy       0.52      0.77      0.62        31
     sadness       0.77      0.77      0.77        35
    surprise       0.56      0.71      0.63        31

   micro avg       0.64      0.78      0.70       176
   macro avg       0.65      0.73      0.68       176
weighted avg       0.65      0.78      0.70       176
 samples avg       0.64      0.72      0.65       176



## additions

- Early stopping based on macro F1
- Learning rate scheduling
- Dynamic thresholding
- Per-class loss monitoring
- Hard negative mining

## Conceptual Explanation: Hard Negative Mining & Dynamic Thresholding

### Hard Negative Mining (via Focal Loss)

- Hard negative mining is a technique used to focus learning on difficult examples specifically, the ones the model is currently misclassifying or struggling with.

In this implementation, we achieve hard negative mining using **Focal Loss**. Here's how it works:

- Standard loss functions (like Binary Cross Entropy) treat all samples equally.
- Focal Loss, however, **down-weights well-classified examples** and focuses more on hard, misclassified ones.
- This is done using the term:

  $$
  \text{Focal Loss} = \alpha \cdot (1 - p_t)^\gamma \cdot \text{BCE}
  $$

  where:
  - $ p_t $ is the model’s predicted probability for the true class,
  - $ \gamma $ is a focusing parameter (typically ≥ 2),
  - $ \alpha $ is a weighting factor.

*** In Summary: easy samples are ignored, and the model spends more effort on learning from difficult \(hard negative\) examples ***.



### Dynamic Thresholding

- In multi-label classification, each label (emotion, in this case) is predicted independently using a sigmoid output. The common practice is to use a fixed threshold (like 0.5) to decide whether a label is active.

- However, that approach is not highly recommended, especially when class imbalance or varying decision boundaries exist.

To address this, Dynamic Thresholding does the following:

1. For each class, it computes the Precision-Recall curve using validation predictions.
2. It then calculates the F1 score at different thresholds.
3. Finally, it selects the optimal threshold that gives the highest F1 for each class.

This leads to:
- Better precision-recall balance per class,
- Improved macro-F1 score, which is the main metric used for early stopping and learning rate scheduling.

*** In Summary: instead of using a one-size-fits-all threshold, we adaptively choose the best one per class, based on validation performance. ***


In [22]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, precision_recall_curve

MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 2e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INCLUDE_DISGUST = False
EARLY_STOPPING_PATIENCE = 3

emotions = ['anger', 'fear', 'joy', 'sadness', 'surprise']
if INCLUDE_DISGUST:
    emotions.append('disgust')

class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.FloatTensor(label)
        }

class EmotionClassifier(nn.Module):
    def __init__(self, n_classes):
        super(EmotionClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.bert.config.hidden_size, n_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        output = self.drop(pooled_output)
        return self.out(output)

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.bce = nn.BCEWithLogitsLoss(reduction='none')

    def forward(self, inputs, targets):
        BCE_loss = self.bce(inputs, targets)
        pt = torch.exp(-BCE_loss)
        focal_loss = self.alpha * ((1 - pt) ** self.gamma) * BCE_loss
        return focal_loss.mean(dim=0), focal_loss.mean()

def train_epoch(model, data_loader, loss_fn, optimizer, device):
    model = model.train()
    total_loss = 0
    per_class_loss = torch.zeros(len(emotions)).to(device)
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss_per_class, loss = loss_fn(outputs, labels)
        per_class_loss += loss_per_class.detach()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()
    return total_loss / len(data_loader), (per_class_loss / len(data_loader)).cpu().numpy()

def eval_model(model, data_loader, device):
    model = model.eval()
    predictions = []
    real_labels = []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.sigmoid(outputs)
            predictions.extend(preds.cpu().numpy())
            real_labels.extend(labels.cpu().numpy())
    return np.array(predictions), np.array(real_labels)

def get_weighted_sampler(labels):
    label_counts = np.sum(labels, axis=0)
    class_weights = 1. / (label_counts + 1e-6)
    sample_weights = []
    for label in labels:
        weight = np.sum(class_weights[label == 1])
        sample_weights.append(weight)
    return WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

def get_optimal_thresholds(preds, targets):
    thresholds = []
    for i in range(preds.shape[1]):
        precision, recall, thresh = precision_recall_curve(targets[:, i], preds[:, i])
        f1 = 2 * precision * recall / (precision + recall + 1e-8)
        best_thresh = thresh[np.argmax(f1)] if len(thresh) > 0 else 0.5
        thresholds.append(best_thresh)
    return np.array(thresholds)

def load_data(file_path, emotions):
    df = pd.read_csv(file_path)
    texts = df['text'].tolist()
    labels = df[emotions].values
    return texts, labels

def main():
    tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
    train_texts, train_labels = load_data('train_eng.csv', emotions)
    val_texts, val_labels = load_data('dev_eng.csv', emotions)

    train_dataset = EmotionDataset(train_texts, train_labels, tokenizer, MAX_LEN)
    val_dataset = EmotionDataset(val_texts, val_labels, tokenizer, MAX_LEN)

    sampler = get_weighted_sampler(train_labels)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    model = EmotionClassifier(n_classes=len(emotions)).to(DEVICE)
    loss_fn = FocalLoss(gamma=2)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, verbose=True)

    best_macro_f1 = 0
    patience = 0

    for epoch in range(EPOCHS):
        train_loss, class_losses = train_epoch(model, train_loader, loss_fn, optimizer, DEVICE)
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss:.4f}, Per-class Loss: {class_losses}")

        preds, targets = eval_model(model, val_loader, DEVICE)
        thresholds = get_optimal_thresholds(preds, targets)
        bin_preds = (preds >= thresholds).astype(int)

        macro_f1 = f1_score(targets, bin_preds, average='macro')
        scheduler.step(macro_f1)

        print(f"F1-macro: {macro_f1}")
        print("Precision-macro:", precision_score(targets, bin_preds, average='macro'))
        print("Recall-macro:", recall_score(targets, bin_preds, average='macro'))
        print("Detailed Classification Report:")
        print(classification_report(targets, bin_preds, target_names=emotions, zero_division=0))

        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            patience = 0
            torch.save(model.state_dict(), "best_emotion_model.pt")
        else:
            patience += 1
            if patience >= EARLY_STOPPING_PATIENCE:
                print("Early stopping triggered.")
                break

if __name__ == "__main__":
    main()


Epoch 1/10, Loss: 0.1481, Per-class Loss: [0.1430554  0.1489459  0.13340986 0.16556878 0.14931847]
F1-macro: 0.6473179773179774
Precision-macro: 0.6386451425188872
Recall-macro: 0.6809114183307731
Detailed Classification Report:
              precision    recall  f1-score   support

       anger       0.73      0.50      0.59        16
        fear       0.66      0.89      0.76        63
         joy       0.51      0.65      0.57        31
     sadness       0.73      0.63      0.68        35
    surprise       0.56      0.74      0.64        31

   micro avg       0.63      0.73      0.68       176
   macro avg       0.64      0.68      0.65       176
weighted avg       0.64      0.73      0.67       176
 samples avg       0.63      0.68      0.63       176

Epoch 2/10, Loss: 0.1021, Per-class Loss: [0.08529981 0.10950178 0.0896517  0.12064811 0.10524616]
F1-macro: 0.6877745103097216
Precision-macro: 0.6510110197121767
Recall-macro: 0.7560432667690733
Detailed Classification Report:

# More additions

In [23]:
# Switched to BCEWithLogitsLoss with class-wise alpha weighting. So FocalLoss now supports class-wise alpha based on inverse class frequency.
# Replaced ReduceLROnPlateau with CosineAnnealingLR.
# Gradient clipping.
# Thresholds are printed.
# DataLoader shuffling when the sampler is not being used.

import pandas as pd
import numpy as np
import torch
import random
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, precision_recall_curve

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed()

MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 2e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INCLUDE_DISGUST = False
EARLY_STOPPING_PATIENCE = 3
GRAD_CLIP = 1.0

emotions = ['anger', 'fear', 'joy', 'sadness', 'surprise']
if INCLUDE_DISGUST:
    emotions.append('disgust')

class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.FloatTensor(label)
        }

class EmotionClassifier(nn.Module):
    def __init__(self, n_classes):
        super(EmotionClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.bert.config.hidden_size, n_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        return self.out(self.drop(pooled_output))

class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = torch.tensor(alpha).float() if alpha is not None else None
        self.gamma = gamma
        self.bce = nn.BCEWithLogitsLoss(reduction='none')

    def forward(self, inputs, targets):
        BCE_loss = self.bce(inputs, targets)
        pt = torch.exp(-BCE_loss)
        focal_loss = ((1 - pt) ** self.gamma) * BCE_loss
        if self.alpha is not None:
            focal_loss = self.alpha.to(inputs.device) * focal_loss
        return focal_loss.mean(dim=0), focal_loss.mean()

def train_epoch(model, data_loader, loss_fn, optimizer, device):
    model.train()
    total_loss = 0
    per_class_loss = torch.zeros(len(emotions)).to(device)
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss_per_class, loss = loss_fn(outputs, labels)
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)

        optimizer.step()
        optimizer.zero_grad()
        per_class_loss += loss_per_class.detach()
        total_loss += loss.item()
    return total_loss / len(data_loader), (per_class_loss / len(data_loader)).cpu().numpy()

def eval_model(model, data_loader, device):
    model.eval()
    predictions = []
    real_labels = []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.sigmoid(outputs)
            predictions.extend(preds.cpu().numpy())
            real_labels.extend(labels.cpu().numpy())
    return np.array(predictions), np.array(real_labels)

def get_weighted_sampler(labels):
    label_counts = np.sum(labels, axis=0)
    class_weights = 1. / (label_counts + 1e-6)
    sample_weights = np.dot(labels, class_weights)
    return WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True), class_weights

def get_optimal_thresholds(preds, targets):
    thresholds = []
    for i in range(preds.shape[1]):
        precision, recall, thresh = precision_recall_curve(targets[:, i], preds[:, i])
        f1 = 2 * precision * recall / (precision + recall + 1e-8)
        best_thresh = thresh[np.argmax(f1)] if len(thresh) > 0 else 0.5
        thresholds.append(best_thresh)
    return np.array(thresholds)

def load_data(file_path, emotions):
    df = pd.read_csv(file_path)
    texts = df['text'].tolist()
    labels = df[emotions].values
    return texts, labels

def main():
    tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
    train_texts, train_labels = load_data('train_eng.csv', emotions)
    val_texts, val_labels = load_data('dev_eng.csv', emotions)

    sampler, alpha = get_weighted_sampler(train_labels)
    train_dataset = EmotionDataset(train_texts, train_labels, tokenizer, MAX_LEN)
    val_dataset = EmotionDataset(val_texts, val_labels, tokenizer, MAX_LEN)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

    model = EmotionClassifier(n_classes=len(emotions)).to(DEVICE)
    loss_fn = FocalLoss(alpha=alpha, gamma=2)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

    best_macro_f1 = 0
    patience = 0

    for epoch in range(EPOCHS):
        train_loss, class_losses = train_epoch(model, train_loader, loss_fn, optimizer, DEVICE)
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss:.4f}, Per-class Loss: {class_losses}")

        preds, targets = eval_model(model, val_loader, DEVICE)
        thresholds = get_optimal_thresholds(preds, targets)
        print("Thresholds:", thresholds)
        bin_preds = (preds >= thresholds).astype(int)

        macro_f1 = f1_score(targets, bin_preds, average='macro')
        scheduler.step()

        print(f"F1-macro: {macro_f1}")
        print("Precision-macro:", precision_score(targets, bin_preds, average='macro'))
        print("Recall-macro:", recall_score(targets, bin_preds, average='macro'))
        print("Classification Report:")
        print(classification_report(targets, bin_preds, target_names=emotions, zero_division=0))

        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            patience = 0
            torch.save(model.state_dict(), "best_emotion_model.pt")
        else:
            patience += 1
            if patience >= EARLY_STOPPING_PATIENCE:
                print("Early stopping triggered.")
                break

if __name__ == "__main__":
    main()


Epoch 1/10, Loss: 0.0002, Per-class Loss: [4.2069220e-04 9.8013625e-05 2.0450109e-04 1.8887612e-04 1.9408338e-04]
Thresholds: [0.44262043 0.4875288  0.32372165 0.53325963 0.5517741 ]
F1-macro: 0.6533623643887403
Precision-macro: 0.609249788152051
Recall-macro: 0.7654377880184332
Classification Report:
              precision    recall  f1-score   support

       anger       0.67      0.50      0.57        16
        fear       0.57      1.00      0.73        63
         joy       0.43      0.94      0.59        31
     sadness       0.81      0.71      0.76        35
    surprise       0.57      0.68      0.62        31

   micro avg       0.57      0.83      0.67       176
   macro avg       0.61      0.77      0.65       176
weighted avg       0.60      0.83      0.68       176
 samples avg       0.58      0.76      0.63       176

Epoch 2/10, Loss: 0.0001, Per-class Loss: [2.0852283e-04 8.3534542e-05 1.4172826e-04 1.4558583e-04 1.4082473e-04]
Thresholds: [0.46822497 0.5083664  0.483

# For hindi and english on multi-lingual

In [27]:
# ==========================
# XLM-R BASED EMOTION CLASSIFIER
# ==========================

import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from transformers import XLMRobertaTokenizer, XLMRobertaModel
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, precision_recall_curve

MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 2e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
EARLY_STOPPING_PATIENCE = 3

# Define emotions
emotions_eng = ['anger', 'fear', 'joy', 'sadness', 'surprise']
emotions_hin = ['anger', 'fear', 'joy', 'sadness', 'surprise', 'disgust']  # Hindi includes 'disgust'

# Whether or not to include disgust
INCLUDE_DISGUST = True
emotions = emotions_eng + ['disgust'] if INCLUDE_DISGUST else emotions_eng

# Dataset class
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.FloatTensor(label)
        }

# XLM-R model class
class EmotionClassifier(nn.Module):
    def __init__(self, n_classes):
        super(EmotionClassifier, self).__init__()
        self.xlm_roberta = XLMRobertaModel.from_pretrained('xlm-roberta-base')
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.xlm_roberta.config.hidden_size, n_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.xlm_roberta(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        output = self.drop(pooled_output)
        return self.out(output)

# Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.bce = nn.BCEWithLogitsLoss(reduction='none')

    def forward(self, inputs, targets):
        BCE_loss = self.bce(inputs, targets)
        pt = torch.exp(-BCE_loss)
        focal_loss = self.alpha * ((1 - pt) ** self.gamma) * BCE_loss
        return focal_loss.mean(dim=0), focal_loss.mean()

# Training epoch
def train_epoch(model, data_loader, loss_fn, optimizer, device):
    model = model.train()
    total_loss = 0
    per_class_loss = torch.zeros(len(emotions)).to(device)
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss_per_class, loss = loss_fn(outputs, labels)
        per_class_loss += loss_per_class.detach()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()
    return total_loss / len(data_loader), (per_class_loss / len(data_loader)).cpu().numpy()

# Evaluation
def eval_model(model, data_loader, device):
    model = model.eval()
    predictions = []
    real_labels = []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.sigmoid(outputs)
            predictions.extend(preds.cpu().numpy())
            real_labels.extend(labels.cpu().numpy())
    return np.array(predictions), np.array(real_labels)

# Getting weighted sampler
def get_weighted_sampler(labels):
    label_counts = np.sum(labels, axis=0)
    class_weights = 1. / (label_counts + 1e-6)
    sample_weights = []
    for label in labels:
        weight = np.sum(class_weights[label == 1])
        sample_weights.append(weight)
    return WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

# Get optimal thresholds
def get_optimal_thresholds(preds, targets):
    thresholds = []
    for i in range(preds.shape[1]):
        precision, recall, thresh = precision_recall_curve(targets[:, i], preds[:, i])
        f1 = 2 * precision * recall / (precision + recall + 1e-8)
        best_thresh = thresh[np.argmax(f1)] if len(thresh) > 0 else 0.5
        thresholds.append(best_thresh)
    return np.array(thresholds)

def load_data(file_path, emotions, language):
    df = pd.read_csv(file_path)
    texts = df['text'].tolist()
    # Check if 'disgust' is in the dataframe and add if missing
    if 'disgust' not in df.columns:
        df['disgust'] = 0
    labels = df[emotions].values
    return texts, labels

# Main function
def main():
    tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')

    # Define emotions list for both languages
    emotions_eng = ['anger', 'fear', 'joy', 'sadness', 'surprise', 'disgust']  # Make sure "disgust" is here for consistency
    emotions_hin = ['anger', 'fear', 'joy', 'sadness', 'surprise', 'disgust']

    # Load both English and Hindi data
    train_texts_eng, train_labels_eng = load_data('train_eng.csv', emotions_eng, 'eng')
    train_texts_hin, train_labels_hin = load_data('train_hin.csv', emotions_hin, 'hin')
    val_texts_eng, val_labels_eng = load_data('dev_eng.csv', emotions_eng, 'eng')
    val_texts_hin, val_labels_hin = load_data('dev_hin.csv', emotions_hin, 'hin')

    # Ensure both train and validation labels have the same number of emotion columns
    # Add zeros for the 'disgust' column in the English data if it's missing
    if train_labels_eng.shape[1] < len(emotions_eng):
        train_labels_eng = np.hstack([train_labels_eng, np.zeros((train_labels_eng.shape[0], 1))])
    if val_labels_eng.shape[1] < len(emotions_eng):
        val_labels_eng = np.hstack([val_labels_eng, np.zeros((val_labels_eng.shape[0], 1))])

    # Concatenate English and Hindi data
    train_texts = train_texts_eng + train_texts_hin
    train_labels = np.concatenate([train_labels_eng, train_labels_hin], axis=0)
    val_texts = val_texts_eng + val_texts_hin
    val_labels = np.concatenate([val_labels_eng, val_labels_hin], axis=0)

    train_dataset = EmotionDataset(train_texts, train_labels, tokenizer, MAX_LEN)
    val_dataset = EmotionDataset(val_texts, val_labels, tokenizer, MAX_LEN)

    sampler = get_weighted_sampler(train_labels)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    model = EmotionClassifier(n_classes=len(emotions_eng)).to(DEVICE)
    loss_fn = FocalLoss(gamma=2)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, verbose=True)

    best_macro_f1 = 0
    patience = 0

    for epoch in range(EPOCHS):
        train_loss, class_losses = train_epoch(model, train_loader, loss_fn, optimizer, DEVICE)
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss:.4f}, Per-class Loss: {class_losses}")

        preds, targets = eval_model(model, val_loader, DEVICE)
        thresholds = get_optimal_thresholds(preds, targets)
        bin_preds = (preds >= thresholds).astype(int)

        macro_f1 = f1_score(targets, bin_preds, average='macro')
        scheduler.step(macro_f1)

        print(f"F1-macro: {macro_f1}")
        print("Precision-macro:", precision_score(targets, bin_preds, average='macro'))
        print("Recall-macro:", recall_score(targets, bin_preds, average='macro'))
        print("Detailed Classification Report:")
        print(classification_report(targets, bin_preds, target_names=emotions_eng, zero_division=0))

        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            patience = 0
            torch.save(model.state_dict(), "best_emotion_model.pt")
        else:
            patience += 1
            if patience >= EARLY_STOPPING_PATIENCE:
                print("Early stopping triggered.")
                break

if __name__ == "__main__":
    main()


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Epoch 1/10, Loss: 0.1177, Per-class Loss: [0.14062075 0.11637207 0.11197977 0.1459718  0.11996376 0.07106446]
F1-macro: 0.6947841833785517
Precision-macro: 0.6711391197726488
Recall-macro: 0.7278013653013654
Detailed Classification Report:
              precision    recall  f1-score   support

       anger       0.69      0.62      0.66        32
        fear       0.72      0.82      0.76        77
         joy       0.49      0.64      0.56        42
     sadness       0.60      0.73      0.66        52
    surprise       0.64      0.75      0.69        40
     disgust       0.89      0.80      0.84        10

   micro avg       0.64      0.74      0.68       253
   macro avg       0.67      0.73      0.69       253
weighted avg       0.65      0.74      0.69       253
 samples avg       0.56      0.60      0.56       253

Epoch 2/10, Loss: 0.0709, Per-class Loss: [0.08657888 0.08257655 0.05786232 0.10094671 0.07465521 0.02289892]
F1-macro: 0.7055262353100412
Precision-macro: 0.70259

# For only english advanced model

In [28]:
# ==========================
# XLM-R BASED EMOTION CLASSIFIER
# ==========================

import pandas as pd
import numpy as np
import torch
import random
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from transformers import XLMRobertaTokenizer, XLMRobertaModel
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, precision_recall_curve

# Set random seeds
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed()

# Constants
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 2e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INCLUDE_DISGUST = False
EARLY_STOPPING_PATIENCE = 3
GRAD_CLIP = 1.0
MODEL_NAME = 'xlm-roberta-base'

emotions = ['anger', 'fear', 'joy', 'sadness', 'surprise']
if INCLUDE_DISGUST:
    emotions.append('disgust')

# Dataset
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.FloatTensor(label)
        }

# Model
class EmotionClassifier(nn.Module):
    def __init__(self, n_classes):
        super(EmotionClassifier, self).__init__()
        self.transformer = XLMRobertaModel.from_pretrained(MODEL_NAME)
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.transformer.config.hidden_size, n_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]  # [CLS] token output
        return self.out(self.drop(cls_output))

# Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = torch.tensor(alpha).float() if alpha is not None else None
        self.gamma = gamma
        self.bce = nn.BCEWithLogitsLoss(reduction='none')

    def forward(self, inputs, targets):
        BCE_loss = self.bce(inputs, targets)
        pt = torch.exp(-BCE_loss)
        focal_loss = ((1 - pt) ** self.gamma) * BCE_loss
        if self.alpha is not None:
            focal_loss = self.alpha.to(inputs.device) * focal_loss
        return focal_loss.mean(dim=0), focal_loss.mean()

# Train function
def train_epoch(model, data_loader, loss_fn, optimizer, device):
    model.train()
    total_loss = 0
    per_class_loss = torch.zeros(len(emotions)).to(device)
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss_per_class, loss = loss_fn(outputs, labels)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)

        optimizer.step()
        optimizer.zero_grad()
        per_class_loss += loss_per_class.detach()
        total_loss += loss.item()
    return total_loss / len(data_loader), (per_class_loss / len(data_loader)).cpu().numpy()

# Eval function
def eval_model(model, data_loader, device):
    model.eval()
    predictions = []
    real_labels = []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.sigmoid(outputs)
            predictions.extend(preds.cpu().numpy())
            real_labels.extend(labels.cpu().numpy())
    return np.array(predictions), np.array(real_labels)

# Sampler
def get_weighted_sampler(labels):
    label_counts = np.sum(labels, axis=0)
    class_weights = 1. / (label_counts + 1e-6)
    sample_weights = np.dot(labels, class_weights)
    return WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True), class_weights

# Threshold optimizer
def get_optimal_thresholds(preds, targets):
    thresholds = []
    for i in range(preds.shape[1]):
        precision, recall, thresh = precision_recall_curve(targets[:, i], preds[:, i])
        f1 = 2 * precision * recall / (precision + recall + 1e-8)
        best_thresh = thresh[np.argmax(f1)] if len(thresh) > 0 else 0.5
        thresholds.append(best_thresh)
    return np.array(thresholds)

# Data loader
def load_data(file_path, emotions):
    df = pd.read_csv(file_path)
    texts = df['text'].tolist()
    labels = df[emotions].values
    return texts, labels

# Main loop
def main():
    tokenizer = XLMRobertaTokenizer.from_pretrained(MODEL_NAME)
    train_texts, train_labels = load_data('train_eng.csv', emotions)
    val_texts, val_labels = load_data('dev_eng.csv', emotions)

    sampler, alpha = get_weighted_sampler(train_labels)
    train_dataset = EmotionDataset(train_texts, train_labels, tokenizer, MAX_LEN)
    val_dataset = EmotionDataset(val_texts, val_labels, tokenizer, MAX_LEN)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

    model = EmotionClassifier(n_classes=len(emotions)).to(DEVICE)
    loss_fn = FocalLoss(alpha=alpha, gamma=2)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

    best_macro_f1 = 0
    patience = 0

    for epoch in range(EPOCHS):
        train_loss, class_losses = train_epoch(model, train_loader, loss_fn, optimizer, DEVICE)
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss:.4f}, Per-class Loss: {class_losses}")

        preds, targets = eval_model(model, val_loader, DEVICE)
        thresholds = get_optimal_thresholds(preds, targets)
        print("Thresholds:", thresholds)
        bin_preds = (preds >= thresholds).astype(int)

        macro_f1 = f1_score(targets, bin_preds, average='macro')
        scheduler.step()

        print(f"F1-macro: {macro_f1}")
        print("Precision-macro:", precision_score(targets, bin_preds, average='macro'))
        print("Recall-macro:", recall_score(targets, bin_preds, average='macro'))
        print("Classification Report:")
        print(classification_report(targets, bin_preds, target_names=emotions, zero_division=0))

        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            patience = 0
            torch.save(model.state_dict(), "best_emotion_model.pt")
        else:
            patience += 1
            if patience >= EARLY_STOPPING_PATIENCE:
                print("Early stopping triggered.")
                break

if __name__ == "__main__":
    main()


Epoch 1/10, Loss: 0.0003, Per-class Loss: [0.00046677 0.00010267 0.0002369  0.00022577 0.00022014]
Thresholds: [0.52154803 0.48535854 0.3900765  0.5373386  0.48674816]
F1-macro: 0.633762202248146
Precision-macro: 0.6099200318934384
Recall-macro: 0.7332821300563237
Classification Report:
              precision    recall  f1-score   support

       anger       0.89      0.50      0.64        16
        fear       0.59      0.97      0.73        63
         joy       0.62      0.65      0.63        31
     sadness       0.54      0.71      0.62        35
    surprise       0.40      0.84      0.54        31

   micro avg       0.55      0.80      0.65       176
   macro avg       0.61      0.73      0.63       176
weighted avg       0.58      0.80      0.65       176
 samples avg       0.56      0.72      0.60       176

Epoch 2/10, Loss: 0.0002, Per-class Loss: [2.8881832e-04 8.9925001e-05 1.7144826e-04 1.7157492e-04 1.7703226e-04]
Thresholds: [0.41819826 0.47349513 0.39110902 0.4769282