In [2]:
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from tqdm import tqdm
from transformers import BertTokenizer, BertForSequenceClassification, get_scheduler
from torch.optim import AdamW
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
#Load preprocessed dataset
df = pd.read_parquet("/content/drive/MyDrive/Project/augmented_dataset.parquet")
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Mounted at /content/drive
Using device: cuda


In [3]:
import ast

#label parsing
def parse_labels(label):
    if isinstance(label, str):
        try:
            # Try to parse string like '[5]', or '5' or '0,24'
            if label.startswith("["):
                return ast.literal_eval(label)
            elif ',' in label:
                return [int(i.strip()) for i in label.split(",")]
            else:
                return [int(label.strip())]
        except:
            return np.nan
    elif isinstance(label, (list, np.ndarray)):
        return label
    else:
        return np.nan

df["parsed_labels"] = df["labels"].apply(parse_labels)

#Extracting single-label samples
def extract_single_label(x):
    return x[0] if isinstance(x, list) and len(x) == 1 else np.nan

df["single_label"] = df["parsed_labels"].apply(extract_single_label)

#Dropping invalid rows and casting into integer type
df = df.dropna(subset=["single_label"])
df["single_label"] = df["single_label"].astype(int)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["single_label"] = df["single_label"].astype(int)


In [4]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(
    df, test_size=0.1, random_state=42, stratify=df["single_label"]
)

#Tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_data(df):
    encodings = tokenizer(
        df["cleaned_text"].tolist(),
        padding=True,
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    labels = torch.tensor(df["single_label"].tolist(), dtype=torch.long)
    return encodings, labels

train_enc, train_labels = tokenize_data(train_df)
val_enc, val_labels = tokenize_data(val_df)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [5]:
#Dataloaders
batch_size = 64
train_dataset = TensorDataset(train_enc["input_ids"], train_enc["attention_mask"], train_labels)
val_dataset = TensorDataset(val_enc["input_ids"], val_enc["attention_mask"], val_labels)
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    pin_memory=True
)

#Model
num_labels = 28
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [6]:
#Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, targets):
        ce_loss = nn.CrossEntropyLoss(reduction='none')(logits, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean() if self.reduction == 'mean' else focal_loss.sum()

loss_fn = FocalLoss()

#Optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=2e-5)
epochs = 5
total_steps = len(train_loader) * epochs
lr_scheduler = get_scheduler(
    name="linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps,
)


In [7]:
import os
import torch
from sklearn.metrics import accuracy_score, f1_score, classification_report
from tqdm import tqdm
import numpy as np

scaler = torch.cuda.amp.GradScaler()
best_val_loss = float("inf")

checkpoint_dir = "/content/drive/MyDrive/Project/models/singlelabel/checkpoints"
best_model_dir = "/content/drive/MyDrive/Project/models/singlelabel"
best_model_path = os.path.join(best_model_dir, "single_label_emotion_detection_final.pt")
os.makedirs(checkpoint_dir, exist_ok=True)

torch.cuda.empty_cache()  # clearing GPU before training

for epoch in range(1, epochs + 1):
    model.train()
    total_train_loss = 0
    all_preds = []
    all_targets = []

    loop = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}", leave=False, mininterval=2.0)
    for batch in loop:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]
        optimizer.zero_grad()

        try:
            with torch.cuda.amp.autocast():
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                loss = loss_fn(logits, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            lr_scheduler.step()

            total_train_loss += loss.item()

            preds = torch.argmax(logits, dim=1).detach().cpu().numpy()
            all_preds.extend(preds)
            all_targets.extend(labels.detach().cpu().numpy())

            loop.set_postfix(train_loss=loss.item())

        except RuntimeError as e:
            if "out of memory" in str(e):
                print("OOM error caught. Skipping batch.")
                torch.cuda.empty_cache()
                continue
            else:
                raise e

    avg_train_loss = total_train_loss / len(train_loader)
    train_acc = accuracy_score(all_targets, all_preds)
    f1_macro = f1_score(all_targets, all_preds, average='macro')
    f1_micro = f1_score(all_targets, all_preds, average='micro')
    f1_weighted = f1_score(all_targets, all_preds, average='weighted')

    print(f"\nEpoch {epoch} Training Summary:")
    print(f"Train Loss: {avg_train_loss:.4f} | Acc: {train_acc:.4f} | F1 (macro/micro/weighted): {f1_macro:.4f} / {f1_micro:.4f} / {f1_weighted:.4f}")

    #Saving checkpoint for each epoch
    model_path = os.path.join(checkpoint_dir, f"model_epoch_{epoch}.pt")
    torch.save(model.state_dict(), model_path)

    #Validation
    model.eval()
    val_preds = []
    val_targets = []
    val_loss_total = 0

    with torch.no_grad():
        for batch in val_loader:
            input_ids, attention_mask, labels = [b.to(device) for b in batch]

            try:
                with torch.cuda.amp.autocast():
                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    logits = outputs.logits
                    loss = loss_fn(logits, labels)

                val_loss_total += loss.item()
                preds = torch.argmax(logits, dim=1).detach().cpu().numpy()
                val_preds.extend(preds)
                val_targets.extend(labels.detach().cpu().numpy())

            except RuntimeError as e:
                if "out of memory" in str(e):
                    print("OOM during validation. Skipping batch.")
                    torch.cuda.empty_cache()
                    continue
                else:
                    raise e

    val_acc = accuracy_score(val_targets, val_preds)
    val_f1_macro = f1_score(val_targets, val_preds, average='macro')
    val_f1_micro = f1_score(val_targets, val_preds, average='micro')
    val_f1_weighted = f1_score(val_targets, val_preds, average='weighted')
    avg_val_loss = val_loss_total / len(val_loader)

    print(f"\nValidation Results for Epoch {epoch}:")
    print(f"Val Loss: {avg_val_loss:.4f} | Acc: {val_acc:.4f} | F1 (macro/micro/weighted): {val_f1_macro:.4f} / {val_f1_micro:.4f} / {val_f1_weighted:.4f}")
    print(classification_report(val_targets, val_preds, digits=4))

    #Saving best model (based on validation loss)
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), best_model_path)
        print(f"Best model saved to {best_model_path}")


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():



Epoch 1 Training Summary:
Train Loss: 1.5940 | Acc: 0.4948 | F1 (macro/micro/weighted): 0.3373 / 0.4948 / 0.4266


  with torch.cuda.amp.autocast():



Validation Results for Epoch 1:
Val Loss: 1.1111 | Acc: 0.5878 | F1 (macro/micro/weighted): 0.5014 / 0.5878 / 0.5481
              precision    recall  f1-score   support

           0     0.5721    0.7401    0.6453       327
           1     0.6164    0.8911    0.7287       202
           2     0.5957    0.4553    0.5161       123
           3     0.3279    0.1105    0.1653       181
           4     0.4426    0.2308    0.3034       234
           5     0.5161    0.3019    0.3810       106
           6     0.4353    0.3033    0.3575       122
           7     0.9500    0.1118    0.2000       170
           8     0.7538    0.5385    0.6282        91
           9     0.5714    0.0385    0.0721       104
          10     0.5217    0.0667    0.1182       180
          11     0.4623    0.4851    0.4734       101
          12     0.8061    0.7189    0.7600       185
          13     0.4810    0.4043    0.4393        94
          14     0.6731    0.7216    0.6965        97
          15     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best model saved to /content/drive/MyDrive/Project/models/singlelabel/single_label_emotion_detection_final.pt



  with torch.cuda.amp.autocast():



Epoch 2 Training Summary:
Train Loss: 1.0159 | Acc: 0.6160 | F1 (macro/micro/weighted): 0.5445 / 0.6160 / 0.5776


  with torch.cuda.amp.autocast():



Validation Results for Epoch 2:
Val Loss: 0.9796 | Acc: 0.6118 | F1 (macro/micro/weighted): 0.5666 / 0.6118 / 0.5869
              precision    recall  f1-score   support

           0     0.5972    0.7798    0.6764       327
           1     0.6286    0.8713    0.7303       202
           2     0.5429    0.4634    0.5000       123
           3     0.2797    0.2210    0.2469       181
           4     0.5179    0.2479    0.3353       234
           5     0.5294    0.2547    0.3439       106
           6     0.4884    0.3443    0.4038       122
           7     0.8889    0.1412    0.2437       170
           8     0.7703    0.6264    0.6909        91
           9     0.4419    0.1827    0.2585       104
          10     0.4107    0.1278    0.1949       180
          11     0.4924    0.6436    0.5579       101
          12     0.8693    0.8270    0.8476       185
          13     0.5932    0.3723    0.4575        94
          14     0.7196    0.7938    0.7549        97
          15     


  with torch.cuda.amp.autocast():



Epoch 3 Training Summary:
Train Loss: 0.8366 | Acc: 0.6555 | F1 (macro/micro/weighted): 0.6160 / 0.6555 / 0.6290


  with torch.cuda.amp.autocast():



Validation Results for Epoch 3:
Val Loss: 0.9447 | Acc: 0.6155 | F1 (macro/micro/weighted): 0.5914 / 0.6155 / 0.5985
              precision    recall  f1-score   support

           0     0.6467    0.7278    0.6849       327
           1     0.6413    0.8762    0.7406       202
           2     0.5118    0.5285    0.5200       123
           3     0.2781    0.2597    0.2686       181
           4     0.4818    0.2265    0.3081       234
           5     0.5323    0.3113    0.3929       106
           6     0.5316    0.3443    0.4179       122
           7     0.5918    0.1706    0.2648       170
           8     0.7500    0.6593    0.7018        91
           9     0.2963    0.2308    0.2595       104
          10     0.3629    0.2500    0.2961       180
          11     0.4718    0.6634    0.5514       101
          12     0.8384    0.8973    0.8668       185
          13     0.5970    0.4255    0.4969        94
          14     0.7297    0.8351    0.7788        97
          15     


  with torch.cuda.amp.autocast():



Epoch 4 Training Summary:
Train Loss: 0.7202 | Acc: 0.6894 | F1 (macro/micro/weighted): 0.6646 / 0.6894 / 0.6680


  with torch.cuda.amp.autocast():



Validation Results for Epoch 4:
Val Loss: 0.9197 | Acc: 0.6207 | F1 (macro/micro/weighted): 0.6041 / 0.6207 / 0.6042
              precision    recall  f1-score   support

           0     0.5980    0.7462    0.6639       327
           1     0.6705    0.8564    0.7522       202
           2     0.5122    0.5122    0.5122       123
           3     0.2597    0.2210    0.2388       181
           4     0.4258    0.2821    0.3393       234
           5     0.4706    0.3774    0.4188       106
           6     0.5538    0.2951    0.3850       122
           7     0.5769    0.1765    0.2703       170
           8     0.7356    0.7033    0.7191        91
           9     0.3385    0.2115    0.2604       104
          10     0.3704    0.2222    0.2778       180
          11     0.5631    0.5743    0.5686       101
          12     0.8711    0.9135    0.8918       185
          13     0.6047    0.5532    0.5778        94
          14     0.7800    0.8041    0.7919        97
          15     


  with torch.cuda.amp.autocast():



Epoch 5 Training Summary:
Train Loss: 0.6451 | Acc: 0.7116 | F1 (macro/micro/weighted): 0.6962 / 0.7116 / 0.6931


  with torch.cuda.amp.autocast():



Validation Results for Epoch 5:
Val Loss: 0.9271 | Acc: 0.6194 | F1 (macro/micro/weighted): 0.6046 / 0.6194 / 0.6045
              precision    recall  f1-score   support

           0     0.6432    0.7278    0.6829       327
           1     0.6641    0.8614    0.7500       202
           2     0.5124    0.5041    0.5082       123
           3     0.2457    0.2376    0.2416       181
           4     0.4200    0.2692    0.3281       234
           5     0.4516    0.3962    0.4221       106
           6     0.5250    0.3443    0.4158       122
           7     0.5472    0.1706    0.2601       170
           8     0.7191    0.7033    0.7111        91
           9     0.3333    0.2500    0.2857       104
          10     0.3704    0.2222    0.2778       180
          11     0.5566    0.5842    0.5700       101
          12     0.8680    0.9243    0.8953       185
          13     0.6250    0.5319    0.5747        94
          14     0.7411    0.8557    0.7943        97
          15     

In [None]:
from sklearn.metrics import accuracy_score, f1_score, classification_report
import torch
from transformers import BertForSequenceClassification

#Loading the best saved model
model_path = "/content/drive/MyDrive/Project/models/singlelabel/single_label_emotion_detection_final.pt"
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=28)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

#Collecting predictions and labels
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in val_loader:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

#Metrics
acc = accuracy_score(all_labels, all_preds)
f1_macro = f1_score(all_labels, all_preds, average="macro")
f1_micro = f1_score(all_labels, all_preds, average="micro")
f1_weighted = f1_score(all_labels, all_preds, average="weighted")

print(f"\n📊 Final Evaluation on Validation Set:")
print(f"Accuracy: {acc:.4f}")
print(f"F1 (macro/micro/weighted): {f1_macro:.4f} / {f1_micro:.4f} / {f1_weighted:.4f}")

# Classification report
print("\nDetailed Classification Report:")
print(classification_report(all_labels, all_preds))


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



📊 Final Evaluation on Validation Set:
Accuracy: 0.6211
F1 (macro/micro/weighted): 0.6047 / 0.6211 / 0.6046

Detailed Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.75      0.66       327
           1       0.67      0.86      0.75       202
           2       0.51      0.51      0.51       123
           3       0.26      0.22      0.24       181
           4       0.43      0.28      0.34       234
           5       0.47      0.38      0.42       106
           6       0.55      0.30      0.39       122
           7       0.58      0.18      0.27       170
           8       0.74      0.70      0.72        91
           9       0.34      0.21      0.26       104
          10       0.37      0.22      0.28       180
          11       0.56      0.57      0.57       101
          12       0.87      0.91      0.89       185
          13       0.60      0.55      0.58        94
          14       0.78      0.80      0.79     

In [17]:
from google.colab import files
files.download("/content/drive/MyDrive/Project/models/singlelabel/single_label_emotion_detection_final.pt")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
def predict_emotion_from_text(text, model, tokenizer):
    #Tokenizing the text
    encoding = tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        padding='max_length',
        max_length=128
    )

    input_ids = encoding['input_ids']
    attention_mask = encoding['attention_mask']

    #Prediction
    pred_idx, probs = predict_single_label(input_ids, attention_mask, model)

    #Emotions for predicted labels
    emotion_labels = [
        "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion",
        "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment",
        "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism",
        "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"
    ]

    predicted_label = emotion_labels[pred_idx.item()]
    predicted_confidence = probs[0][pred_idx.item()].item()

    return predicted_label, predicted_confidence


In [None]:
text = "I just got the job I've always dreamed of!"
emotion, confidence = predict_emotion_from_text(text, model, tokenizer)

print(f"Predicted Emotion: {emotion}") #(Confidence: {confidence:.2f})")


Predicted Emotion: pride
