## 1. Setup & Import Libraries

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import transformers
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.metrics import hamming_loss, accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
import json
warnings.filterwarnings('ignore')

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
else:
    print("CPU mode - training will be slower")

Using device: cuda
GPU: NVIDIA GeForce RTX 5070 Ti Laptop GPU
VRAM: 11.94 GB


## 2. Load and Prepare Data

In [4]:
# Load preprocessed dataset
df = pd.read_csv('dataset_preprocessed.csv')

print("Dataset Info:")
print(f"Shape: {df.shape}")
print(f"First few texts:\n{df['text'].head()}\n")

# Define emotion columns
emotion_cols = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 
                'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 
                'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 
                'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 
                'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 
                'neutral']

print(f"Number of emotion labels: {len(emotion_cols)}")
print(f"Emotions: {emotion_cols}\n")

# Get labels
X = df['text'].values
y = df[emotion_cols].values

print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")
print(f"Sample text: {X[0][:100]}...")
print(f"Sample labels: {y[0]}")
print(f"Emotions for first sample: {[emotion_cols[i] for i in range(len(emotion_cols)) if y[0][i] == 1]}")

Dataset Info:
Shape: (69865, 29)
First few texts:
0                                            game hurt
1    sexuality ‚Äô grouping category make different o...
2                              right dont care fuck em
3                                      man love reddit
4                             name nowhere near falcon
Name: text, dtype: object

Number of emotion labels: 28
Emotions: ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']

Features shape: (69865,)
Labels shape: (69865, 28)
Sample text: game hurt...
Sample labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
Emotions for first sample: ['sadness']


## 3. Tokenize and Create DataLoaders

In [5]:
# Initialize BERT tokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

print(f"Using model: {model_name}")
print(f"Tokenizer vocabulary size: {len(tokenizer)}")

# Tokenize texts
def tokenize_texts(texts, max_length=256, batch_size=32):
    """Tokenize texts with BERT tokenizer"""
    input_ids = []
    attention_masks = []
    
    print(f"Tokenizing {len(texts)} texts...")
    for text in tqdm(texts):
        encoding = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors=None
        )
        input_ids.append(encoding['input_ids'])
        attention_masks.append(encoding['attention_mask'])
    
    return np.array(input_ids), np.array(attention_masks)

# Tokenize all texts
input_ids, attention_masks = tokenize_texts(X, max_length=256)

print(f"\nInput IDs shape: {input_ids.shape}")
print(f"Attention masks shape: {attention_masks.shape}")
print(f"Sample input IDs (first 20 tokens): {input_ids[0][:20]}")
print(f"Sample attention mask (first 20 tokens): {attention_masks[0][:20]}")

Using model: bert-base-uncased
Tokenizer vocabulary size: 30522
Tokenizing 69865 texts...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 69865/69865 [00:03<00:00, 17933.63it/s]


Input IDs shape: (69865, 256)
Attention masks shape: (69865, 256)
Sample input IDs (first 20 tokens): [ 101 2208 3480  102    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0]
Sample attention mask (first 20 tokens): [1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]





In [6]:
# Split data into train, validation, and test sets
X_train_ids, X_temp_ids, X_train_mask, X_temp_mask, y_train, y_temp = train_test_split(
    input_ids, attention_masks, y, test_size=0.2, random_state=42
)

X_val_ids, X_test_ids, X_val_mask, X_test_mask, y_val, y_test = train_test_split(
    X_temp_ids, X_temp_mask, y_temp, test_size=0.5, random_state=42
)

print(f"Train set size: {len(X_train_ids)}")
print(f"Validation set size: {len(X_val_ids)}")
print(f"Test set size: {len(X_test_ids)}")

# Convert to tensors
X_train_ids = torch.tensor(X_train_ids, dtype=torch.long)
X_train_mask = torch.tensor(X_train_mask, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.float32)

X_val_ids = torch.tensor(X_val_ids, dtype=torch.long)
X_val_mask = torch.tensor(X_val_mask, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.float32)

X_test_ids = torch.tensor(X_test_ids, dtype=torch.long)
X_test_mask = torch.tensor(X_test_mask, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoaders
batch_size = 32

train_dataset = TensorDataset(X_train_ids, X_train_mask, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = TensorDataset(X_val_ids, X_val_mask, y_val)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

test_dataset = TensorDataset(X_test_ids, X_test_mask, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"\nDataLoaders created:")
print(f"Train batches: {len(train_loader)}")
print(f"Validation batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

Train set size: 55892
Validation set size: 6986
Test set size: 6987

DataLoaders created:
Train batches: 1747
Validation batches: 219
Test batches: 219


## 4. Define BERT Fine-Tuning Model

In [7]:
class BertMultiLabelClassifier(nn.Module):
    """
    BERT-based Multi-Label Emotion Classifier
    """
    def __init__(self, model_name, num_labels, dropout_rate=0.3):
        super(BertMultiLabelClassifier, self).__init__()
        
        # Load pre-trained BERT model
        self.bert = AutoModel.from_pretrained(model_name)
        self.config = self.bert.config
        
        # Dropout
        self.dropout = nn.Dropout(dropout_rate)
        
        # Classification head
        hidden_size = self.config.hidden_size
        self.fc1 = nn.Linear(hidden_size, 256)
        self.fc2 = nn.Linear(256, num_labels)
        
        # Activation
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
        # Loss function for multi-label classification
        self.loss_fn = nn.BCELoss()
    
    def forward(self, input_ids, attention_mask):
        # Get BERT outputs
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        
        # Use [CLS] token representation (first token)
        cls_output = outputs.last_hidden_state[:, 0, :]
        
        # Classification head
        x = self.dropout(cls_output)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        
        # Apply sigmoid for multi-label classification
        logits = self.sigmoid(x)
        
        return logits
    
    def compute_loss(self, logits, labels):
        return self.loss_fn(logits, labels)

# Initialize model
num_labels = len(emotion_cols)
model = BertMultiLabelClassifier(model_name, num_labels, dropout_rate=0.3)
model = model.to(device)

print(f"Model initialized:")
print(f"Number of labels: {num_labels}")
print(f"Model architecture:")
print(model)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Model initialized:
Number of labels: 28
Model architecture:
BertMultiLabelClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=Tr

## 5. Training Functions

In [8]:
def train_epoch(model, train_loader, optimizer, scheduler, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    
    for batch_idx, (input_ids, attention_mask, labels) in enumerate(tqdm(train_loader)):
        # Move to device
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        logits = model(input_ids, attention_mask)
        loss = model.compute_loss(logits, labels)
        
        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
        
        total_loss += loss.item()
    
    avg_loss = total_loss / len(train_loader)
    return avg_loss

def validate(model, val_loader, device, threshold=0.5):
    """Validate the model"""
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for input_ids, attention_mask, labels in tqdm(val_loader):
            # Move to device
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            labels = labels.to(device)
            
            # Forward pass
            logits = model(input_ids, attention_mask)
            loss = model.compute_loss(logits, labels)
            
            total_loss += loss.item()
            
            # Store predictions and labels
            preds = (logits > threshold).cpu().numpy()
            labels_np = labels.cpu().numpy()
            
            all_preds.append(preds)
            all_labels.append(labels_np)
    
    avg_loss = total_loss / len(val_loader)
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    
    # Calculate metrics
    hamming = hamming_loss(all_labels, all_preds)
    f1_micro = f1_score(all_labels, all_preds, average='micro', zero_division=0)
    f1_macro = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    
    return avg_loss, hamming, f1_micro, f1_macro, all_preds, all_labels

print("Training functions defined!")

Training functions defined!


## 6. Train the Model

In [10]:
# Training configuration
num_epochs = 3  # Reduced from 5 to 3 epochs
learning_rate = 2e-5
weight_decay = 0.01

# Use smaller batch size for faster iterations (but may need more memory)
batch_size_training = 16  # Reduced from 32

# Recreate train loader with smaller batch size
train_dataset = TensorDataset(X_train_ids, X_train_mask, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size_training, shuffle=True)

# Optimizer and scheduler
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
total_steps = len(train_loader) * num_epochs
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(total_steps * 0.1),
    num_training_steps=total_steps
)

# Training history
history = {
    'train_loss': [],
    'val_loss': [],
    'val_hamming': [],
    'val_f1_micro': [],
    'val_f1_macro': []
}

best_val_f1 = 0
patience = 2  # Reduced from 3 to 2 (stop earlier if no improvement)
patience_counter = 0

# Training loop
print(f"Starting training (optimized: 3 epochs, batch_size={batch_size_training})...\n")
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    print("-" * 50)
    
    # Train
    train_loss = train_epoch(model, train_loader, optimizer, scheduler, device)
    history['train_loss'].append(train_loss)
    print(f"Train Loss: {train_loss:.4f}")
    
    # Validate
    val_loss, hamming, f1_micro, f1_macro, _, _ = validate(model, val_loader, device)
    history['val_loss'].append(val_loss)
    history['val_hamming'].append(hamming)
    history['val_f1_micro'].append(f1_micro)
    history['val_f1_macro'].append(f1_macro)
    
    print(f"Val Loss: {val_loss:.4f}")
    print(f"Hamming Loss: {hamming:.4f}")
    print(f"F1 Score (Micro): {f1_micro:.4f}")
    print(f"F1 Score (Macro): {f1_macro:.4f}")
    
    # Early stopping
    if f1_macro > best_val_f1:
        best_val_f1 = f1_macro
        patience_counter = 0
        # Save best model
        torch.save(model.state_dict(), 'best_emotion_classifier.pt')
        print("‚úì Model saved!")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"\nEarly stopping triggered after {epoch + 1} epochs")
            break
    
    print()

print("Training completed!")

# Load best model
model.load_state_dict(torch.load('best_emotion_classifier.pt'))
print("Best model loaded!")

Starting training (optimized: 3 epochs, batch_size=16)...

Epoch 1/3
--------------------------------------------------


  0%|          | 0/3494 [01:05<?, ?it/s]



KeyboardInterrupt: 

## 7. Plot Training History

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Loss
axes[0, 0].plot(history['train_loss'], label='Train Loss')
axes[0, 0].plot(history['val_loss'], label='Val Loss')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].set_title('Training and Validation Loss')
axes[0, 0].legend()
axes[0, 0].grid(True)

# Hamming Loss
axes[0, 1].plot(history['val_hamming'], label='Hamming Loss')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Hamming Loss')
axes[0, 1].set_title('Validation Hamming Loss')
axes[0, 1].legend()
axes[0, 1].grid(True)

# F1 Micro
axes[1, 0].plot(history['val_f1_micro'], label='F1 Micro')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('F1 Score')
axes[1, 0].set_title('Validation F1 Score (Micro)')
axes[1, 0].legend()
axes[1, 0].grid(True)

# F1 Macro
axes[1, 1].plot(history['val_f1_macro'], label='F1 Macro')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('F1 Score')
axes[1, 1].set_title('Validation F1 Score (Macro)')
axes[1, 1].legend()
axes[1, 1].grid(True)

plt.tight_layout()
plt.savefig('training_history.png', dpi=300, bbox_inches='tight')
plt.show()

print("Training history plot saved as 'training_history.png'")

## 8. Evaluate on Test Set

In [None]:
print("Evaluating on test set...\n")
test_loss, test_hamming, test_f1_micro, test_f1_macro, test_preds, test_labels = validate(model, test_loader, device)

print("=" * 60)
print("TEST SET EVALUATION")
print("=" * 60)
print(f"Test Loss: {test_loss:.4f}")
print(f"Hamming Loss: {test_hamming:.4f}")
print(f"F1 Score (Micro): {test_f1_micro:.4f}")
print(f"F1 Score (Macro): {test_f1_macro:.4f}")
print()

# Per-emotion metrics
print("Per-Emotion F1 Scores:")
print("-" * 60)
report = classification_report(test_labels, test_preds, target_names=emotion_cols, zero_division=0)
print(report)

## 9. Inference - Predict Emotions

In [None]:
def predict_emotions(text, model, tokenizer, device, emotion_cols, threshold=0.5):
    """
    Predict emotions for a given text
    """
    model.eval()
    
    # Tokenize
    encoding = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=256,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )
    
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    
    # Predict
    with torch.no_grad():
        logits = model(input_ids, attention_mask)
    
    # Get probabilities
    probs = logits.cpu().numpy()[0]
    
    # Get predicted emotions
    predicted_emotions = []
    emotion_probs = {}
    
    for emotion, prob in zip(emotion_cols, probs):
        emotion_probs[emotion] = float(prob)
        if prob > threshold:
            predicted_emotions.append((emotion, prob))
    
    # Sort by probability
    predicted_emotions.sort(key=lambda x: x[1], reverse=True)
    
    return predicted_emotions, emotion_probs

# Test with sample texts
print("=" * 80)
print("EMOTION PREDICTION ON SAMPLE TEXTS")
print("=" * 80)

sample_texts = [
    "I'm so happy and excited about this amazing news!",
    "This is absolutely terrible and I hate it.",
    "I don't understand what's happening here.",
    "Thank you so much for your help and support!",
    "I'm really worried and anxious about the situation."
]

for i, text in enumerate(sample_texts, 1):
    print(f"\n{i}. Text: {text}")
    predicted, probs = predict_emotions(text, model, tokenizer, device, emotion_cols, threshold=0.5)
    
    print(f"   Predicted emotions (threshold=0.5):")
    if predicted:
        for emotion, prob in predicted:
            print(f"   - {emotion:15s}: {prob:.4f}")
    else:
        print(f"   - No emotions detected with threshold=0.5")
    
    print(f"\n   Top 5 emotions by probability:")
    sorted_probs = sorted(probs.items(), key=lambda x: x[1], reverse=True)[:5]
    for emotion, prob in sorted_probs:
        print(f"   - {emotion:15s}: {prob:.4f}")

## 10. Performance Analysis

In [None]:
# Calculate metrics for each emotion
emotion_metrics = {}
for i, emotion in enumerate(emotion_cols):
    cm = confusion_matrix(test_labels[:, i], test_preds[:, i], labels=[0, 1])
    tn, fp, fn, tp = cm.ravel()
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    emotion_metrics[emotion] = {
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'support': (tp + fn)
    }

# Create summary dataframe
metrics_df = pd.DataFrame(emotion_metrics).T
metrics_df = metrics_df.sort_values('f1', ascending=False)

print("\nPer-Emotion Performance Summary:")
print(metrics_df.to_string())

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Top emotions by F1 score
top_emotions = metrics_df.head(10)
axes[0].barh(range(len(top_emotions)), top_emotions['f1'])
axes[0].set_yticks(range(len(top_emotions)))
axes[0].set_yticklabels(top_emotions.index)
axes[0].set_xlabel('F1 Score')
axes[0].set_title('Top 10 Emotions by F1 Score')
axes[0].invert_yaxis()

# Emotion support distribution
emotion_support = metrics_df['support'].astype(int).sort_values(ascending=False).head(10)
axes[1].bar(range(len(emotion_support)), emotion_support.values)
axes[1].set_xticks(range(len(emotion_support)))
axes[1].set_xticklabels(emotion_support.index, rotation=45, ha='right')
axes[1].set_ylabel('Number of Samples')
axes[1].set_title('Top 10 Emotions by Support')

plt.tight_layout()
plt.savefig('emotion_performance.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nEmotion performance plot saved as 'emotion_performance.png'")

## 11. Save Model and Configuration

In [None]:
# Save the fine-tuned model
print("Saving model...")
torch.save(model.state_dict(), 'emotion_classifier_finetuned.pt')

# Save model configuration
config = {
    'model_name': model_name,
    'num_labels': num_labels,
    'emotion_cols': emotion_cols,
    'max_length': 256,
    'dropout_rate': 0.3
}

with open('model_config.json', 'w') as f:
    json.dump(config, f, indent=4)

# Save metrics
metrics_summary = {
    'test_loss': float(test_loss),
    'test_hamming_loss': float(test_hamming),
    'test_f1_micro': float(test_f1_micro),
    'test_f1_macro': float(test_f1_macro),
    'training_config': {
        'num_epochs': num_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'weight_decay': weight_decay
    }
}

with open('model_metrics.json', 'w') as f:
    json.dump(metrics_summary, f, indent=4)

print("‚úì Model saved as 'emotion_classifier_finetuned.pt'")
print("‚úì Configuration saved as 'model_config.json'")
print("‚úì Metrics saved as 'model_metrics.json'")

## 12. Summary and Results

In [None]:
print("\n" + "=" * 80)
print("EMOTION CLASSIFICATION MODEL - FINAL SUMMARY")
print("=" * 80)

print("\nüìä DATASET INFORMATION:")
print(f"  ‚Ä¢ Total samples: {len(df):,}")
print(f"  ‚Ä¢ Number of emotions: {len(emotion_cols)}")
print(f"  ‚Ä¢ Train/Val/Test split: {len(X_train_ids)}/{len(X_val_ids)}/{len(X_test_ids)}")
print(f"  ‚Ä¢ Multi-label: Yes (average {y.sum(axis=1).mean():.2f} emotions per text)")

print("\nü§ñ MODEL INFORMATION:")
print(f"  ‚Ä¢ Architecture: BERT Fine-Tuning (transformer-based)")
print(f"  ‚Ä¢ Base model: {model_name}")
print(f"  ‚Ä¢ Max sequence length: 256 tokens")
print(f"  ‚Ä¢ Classification layers: 768 ‚Üí 256 ‚Üí {num_labels} (BCELoss)")

print("\n‚öôÔ∏è TRAINING CONFIGURATION:")
print(f"  ‚Ä¢ Epochs: {num_epochs}")
print(f"  ‚Ä¢ Batch size: {batch_size}")
print(f"  ‚Ä¢ Learning rate: {learning_rate}")
print(f"  ‚Ä¢ Optimizer: AdamW with linear warmup scheduler")
print(f"  ‚Ä¢ Early stopping: Yes (patience={patience})")

print("\nüìà TEST SET PERFORMANCE:")
print(f"  ‚Ä¢ Loss: {test_loss:.4f}")
print(f"  ‚Ä¢ Hamming Loss: {test_hamming:.4f}")
print(f"  ‚Ä¢ F1 Score (Micro): {test_f1_micro:.4f}")
print(f"  ‚Ä¢ F1 Score (Macro): {test_f1_macro:.4f}")

print("\nüéØ TOP 5 BEST PERFORMING EMOTIONS:")
for i, (emotion, metrics) in enumerate(metrics_df.head(5).iterrows(), 1):
    print(f"  {i}. {emotion:15s} - F1: {metrics['f1']:.4f}, Precision: {metrics['precision']:.4f}, Recall: {metrics['recall']:.4f}")

print("\nüíæ SAVED FILES:")
print(f"  ‚Ä¢ best_emotion_classifier.pt - Best model checkpoint")
print(f"  ‚Ä¢ emotion_classifier_finetuned.pt - Final fine-tuned model")
print(f"  ‚Ä¢ model_config.json - Model configuration")
print(f"  ‚Ä¢ model_metrics.json - Test metrics")
print(f"  ‚Ä¢ training_history.png - Training history plots")
print(f"  ‚Ä¢ emotion_performance.png - Emotion performance analysis")

print("\n" + "=" * 80)