In [1]:
# Cell 1: Setup and Imports for RoBERTa Pipeline
import os
import json
import time
import random
import numpy as np
import pandas as pd
from typing import Dict, List, Tuple, Optional
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Deep Learning
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import LinearLR

# Transformers
from transformers import (
    AutoTokenizer, 
    AutoModel,
    get_linear_schedule_with_warmup
)

# ML utilities
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
import joblib

# Hyperparameter tuning
import optuna

# Dataset loading
from datasets import load_dataset

# Set random seeds for reproducibility
def set_random_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_random_seed(42)

# GPU setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

print("All imports completed and GPU configured")

Using device: cuda
GPU: NVIDIA GeForce RTX 4060
GPU Memory: 8.0 GB
All imports completed and GPU configured


In [2]:
# Cell 2: Configuration Classes for distilroBERTa
from dataclasses import dataclass

@dataclass
class TrainingConfig:
    model_name: str = "distilroberta-base" 
    learning_rate: float = 2e-5
    batch_size: int = 16
    num_epochs: int = 3
    max_length: int = 128
    warmup_ratio: float = 0.1
    weight_decay: float = 0.01
    max_grad_norm: float = 1.0
    hidden_dropout_prob: float = 0.1
    attention_dropout_prob: float = 0.1
    classifier_dropout: float = 0.1
    alpha: float = 0.5  # For multitask loss weighting
    task_type: str = "sentiment"  # "sentiment", "emotion", or "multitask"
    output_dir: str = "./roberta_model"

class distilroBERTaModelConfig:
    def __init__(self):
        self.sentiment_classes = ['Negative', 'Neutral', 'Positive']
        self.emotion_classes = ['Anger', 'Fear', 'Joy', 'No Emotion', 'Sadness', 'Surprise']
        self.sentiment_num_classes = len(self.sentiment_classes)
        self.emotion_num_classes = len(self.emotion_classes)

roberta_model_config = distilroBERTaModelConfig()
print("Configuration classes defined")

Configuration classes defined


In [3]:
# Cell 3: Dataset Classes for distilroBERTa
class distilroBERTaSingleTaskDataset:
    def __init__(
        self,
        texts: List[str],
        labels: List[int],
        tokenizer,
        max_length: int = 128
    ):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        assert len(texts) == len(labels), "Texts and labels must have same length"
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        # RoBERTa tokenization
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long),
            'text': text
        }

class distilroBERTaMultiTaskDataset(Dataset):
    
    def __init__(
        self,
        texts: List[str],
        sentiment_labels: List[int],
        emotion_labels: List[int],
        tokenizer,
        max_length: int = 128
    ):
        self.texts = texts
        self.sentiment_labels = sentiment_labels
        self.emotion_labels = emotion_labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        assert len(texts) == len(sentiment_labels) == len(emotion_labels), \
            "All inputs must have same length"
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        sentiment_label = self.sentiment_labels[idx]
        emotion_label = self.emotion_labels[idx]
        
        # RoBERTa tokenization
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'sentiment_labels': torch.tensor(sentiment_label, dtype=torch.long),
            'emotion_labels': torch.tensor(emotion_label, dtype=torch.long),
            'text': text
        }

print("distilroBERTa Dataset classes defined")

distilroBERTa Dataset classes defined


In [None]:
# Cell 4: Model Architectures
class distilroBERTaSingleTaskTransformer(nn.Module):
    
    def __init__(
        self,
        model_name: str = "distilroberta-base",
        num_classes: int = 3,
        hidden_dropout_prob: float = 0.1,
        attention_dropout_prob: float = 0.1,
        classifier_dropout: float = 0.1
    ):
        super().__init__()
        self.num_classes = num_classes
        
        # Load RoBERTa model
        self.roberta = AutoModel.from_pretrained(
            model_name,
            hidden_dropout_prob=hidden_dropout_prob,
            attention_probs_dropout_prob=attention_dropout_prob
        )
        
        # Classification head
        self.dropout = nn.Dropout(classifier_dropout)
        self.classifier = nn.Linear(self.roberta.config.hidden_size, num_classes)
    
    def forward(self, input_ids, attention_mask):
        # Get RoBERTa outputs
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        
        # Use [CLS] token representation
        pooled_output = outputs.last_hidden_state[:, 0]  # [CLS] token
        pooled_output = self.dropout(pooled_output)
        
        # Classification
        logits = self.classifier(pooled_output)
        
        return {'logits': logits}

class distilroBERTaMultiTaskTransformer(nn.Module):
    
    def __init__(
        self,
        model_name: str = "distilroberta-base",
        sentiment_num_classes: int = 3,
        emotion_num_classes: int = 6,
        hidden_dropout_prob: float = 0.1,
        attention_dropout_prob: float = 0.1,
        classifier_dropout: float = 0.1
    ):
        super().__init__()
        self.sentiment_num_classes = sentiment_num_classes
        self.emotion_num_classes = emotion_num_classes
        
        # Shared RoBERTa encoder
        self.roberta = AutoModel.from_pretrained(
            model_name,
            hidden_dropout_prob=hidden_dropout_prob,
            attention_probs_dropout_prob=attention_dropout_prob
        )
        
        # Task-specific heads
        self.dropout = nn.Dropout(classifier_dropout)
        
        # Sentiment classification head
        self.sentiment_classifier = nn.Linear(
            self.roberta.config.hidden_size, 
            sentiment_num_classes
        )
        
        # Emotion classification head
        self.emotion_classifier = nn.Linear(
            self.roberta.config.hidden_size, 
            emotion_num_classes
        )
    
    def forward(self, input_ids, attention_mask):
        # Get shared RoBERTa representations
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        
        # Use [CLS] token representation
        pooled_output = outputs.last_hidden_state[:, 0]  # [CLS] token
        pooled_output = self.dropout(pooled_output)
        
        # Task-specific predictions
        sentiment_logits = self.sentiment_classifier(pooled_output)
        emotion_logits = self.emotion_classifier(pooled_output)
        
        return {
            'sentiment_logits': sentiment_logits,
            'emotion_logits': emotion_logits
        }

print("distilroBERTa Model architectures defined")

distilroBERTa Model architectures defined


In [5]:
# Cell 5: Data Loading and Processing Functions for RoBERTa
def aggressive_memory_cleanup():
    """Aggressive memory cleanup for GPU"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
    import gc
    gc.collect()
    print("🧹 Memory cleaned!")

def load_and_process_datasets_roberta():
    """Load and process datasets for RoBERTa training"""
    print("📥 Loading external datasets for RoBERTa...")
    
    # Load SST-2 for sentiment
    try:
        sst2_dataset = load_dataset("sst2")
        print(f"✅ SST-2 dataset loaded: {len(sst2_dataset['train'])} train samples")
    except Exception as e:
        print(f"❌ Error loading SST-2: {e}")
        raise
    
    # Load GoEmotions for emotion
    try:
        emotions_dataset = load_dataset("go_emotions", "simplified")
        print(f"✅ GoEmotions dataset loaded: {len(emotions_dataset['train'])} train samples")
    except Exception as e:
        print(f"❌ Error loading GoEmotions: {e}")
        raise
    
    # Process sentiment data
    sentiment_data = process_sentiment_data_roberta(sst2_dataset)
    
    # Process emotion data  
    emotion_data = process_emotion_data_roberta(emotions_dataset)
    
    return sentiment_data, emotion_data

def process_sentiment_data_roberta(sst2_dataset, max_samples=10000):
    """Process SST-2 dataset for RoBERTa sentiment classification"""
    
    print("🔄 Processing sentiment data for RoBERTa...")
    
    # Extract texts and labels
    train_texts = sst2_dataset['train']['sentence'][:max_samples]
    train_labels = sst2_dataset['train']['label'][:max_samples]
    
    # Map SST-2 labels to 3 classes: 0->Negative, 1->Positive
    # Add some neutral examples by random assignment
    expanded_labels = []
    expanded_texts = []
    
    for text, label in zip(train_texts, train_labels):
        if label == 0:  # Negative
            expanded_labels.append(0)
            expanded_texts.append(text)
        elif label == 1:  # Positive
            # Sometimes assign as positive, sometimes as neutral
            if np.random.random() < 0.15:  # 15% chance to be neutral
                expanded_labels.append(1)  # Neutral
            else:
                expanded_labels.append(2)  # Positive
            expanded_texts.append(text)
    
    # Ensure we have all 3 classes
    if 1 not in expanded_labels:
        # Force some examples to be neutral
        neutral_indices = np.random.choice(len(expanded_labels), size=100, replace=False)
        for idx in neutral_indices:
            expanded_labels[idx] = 1
    
    # Create train/val/test splits
    train_texts, temp_texts, train_labels, temp_labels = train_test_split(
        expanded_texts, expanded_labels, test_size=0.3, random_state=42, stratify=expanded_labels
    )
    
    val_texts, test_texts, val_labels, test_labels = train_test_split(
        temp_texts, temp_labels, test_size=0.5, random_state=42, stratify=temp_labels
    )
    
    sentiment_data = {
        'train': {'texts': train_texts, 'labels': train_labels},
        'val': {'texts': val_texts, 'labels': val_labels},
        'test': {'texts': test_texts, 'labels': test_labels}
    }
    
    print(f"✅ RoBERTa Sentiment data processed:")
    print(f"  Train: {len(train_texts)} samples")
    print(f"  Val: {len(val_texts)} samples")
    print(f"  Test: {len(test_texts)} samples")
    
    return sentiment_data

def process_emotion_data_roberta(emotion_dataset, max_samples=10000):
    """Process GoEmotion dataset for RoBERTa emotion classification"""
    
    print("🔄 Processing emotion data for RoBERTa...")
    
    # Filter to first 6 emotions only
    def filter_emotions(example):
        if isinstance(example['labels'], list):
            return example['labels'] and example['labels'][0] in range(6)
        else:
            return example['labels'] in range(6)
    
    filtered_train = emotion_dataset['train'].filter(filter_emotions)
    filtered_val = emotion_dataset['validation'].filter(filter_emotions)
    
    # Extract texts and labels
    train_texts = filtered_train['text'][:max_samples]
    train_labels_raw = filtered_train['labels'][:max_samples]
    
    # Handle multi-label to single-label conversion
    train_labels = []
    for label in train_labels_raw:
        if isinstance(label, list):
            train_labels.append(label[0] if label else 0)
        else:
            train_labels.append(label)
    
    # Create train/val/test splits
    train_texts, temp_texts, train_labels, temp_labels = train_test_split(
        train_texts, train_labels, test_size=0.3, random_state=42, stratify=train_labels
    )
    
    val_texts, test_texts, val_labels, test_labels = train_test_split(
        temp_texts, temp_labels, test_size=0.5, random_state=42, stratify=temp_labels
    )
    
    emotion_data = {
        'train': {'texts': train_texts, 'labels': train_labels},
        'val': {'texts': val_texts, 'labels': val_labels},
        'test': {'texts': test_texts, 'labels': test_labels}
    }
    
    print(f"✅ RoBERTa Emotion data processed:")
    print(f"  Train: {len(train_texts)} samples")
    print(f"  Val: {len(val_texts)} samples")
    print(f"  Test: {len(test_texts)} samples")
    
    return emotion_data

def create_multitask_data_roberta(sentiment_data, emotion_data):
    """Create combined dataset for multi-task learning with RoBERTa"""
    
    print("🔄 Creating multi-task dataset for RoBERTa...")
    
    # Take minimum length to balance datasets
    min_train_len = min(len(sentiment_data['train']['texts']), len(emotion_data['train']['texts']))
    min_val_len = min(len(sentiment_data['val']['texts']), len(emotion_data['val']['texts']))
    min_test_len = min(len(sentiment_data['test']['texts']), len(emotion_data['test']['texts']))
    
    multitask_data = {
        'train': {
            'texts': sentiment_data['train']['texts'][:min_train_len],
            'sentiment_labels': sentiment_data['train']['labels'][:min_train_len],
            'emotion_labels': emotion_data['train']['labels'][:min_train_len]
        },
        'val': {
            'texts': sentiment_data['val']['texts'][:min_val_len],
            'sentiment_labels': sentiment_data['val']['labels'][:min_val_len],
            'emotion_labels': emotion_data['val']['labels'][:min_val_len]
        },
        'test': {
            'texts': sentiment_data['test']['texts'][:min_test_len],
            'sentiment_labels': sentiment_data['test']['labels'][:min_test_len],
            'emotion_labels': emotion_data['test']['labels'][:min_test_len]
        }
    }
    
    print(f"✅ RoBERTa Multi-task data created:")
    print(f"  Train: {len(multitask_data['train']['texts'])} samples")
    print(f"  Val: {len(multitask_data['val']['texts'])} samples")
    print(f"  Test: {len(multitask_data['test']['texts'])} samples")
    
    return multitask_data

print("✅ RoBERTa data processing functions defined!")

✅ RoBERTa data processing functions defined!


In [6]:
# Cell 6: distilroBERTa Training Classes
class distilroBERTaSingleTaskTrainer:
    
    def __init__(self, config: TrainingConfig, num_classes: int):
        self.config = config
        self.num_classes = num_classes
        self.device = device
        
        # Initialize distilroBERTa tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        # Initialize distilroBERTa model
        self.model = distilroBERTaSingleTaskTransformer(
            model_name=config.model_name,
            num_classes=num_classes,
            hidden_dropout_prob=config.hidden_dropout_prob,
            attention_dropout_prob=config.attention_dropout_prob,
            classifier_dropout=config.classifier_dropout
        ).to(self.device)
        
        # Loss function
        self.loss_fn = nn.CrossEntropyLoss()
        
        # Initialize tracking
        self.training_history = {
            'train_loss': [],
            'train_accuracy': [],
            'val_loss': [],
            'val_accuracy': [],
            'val_f1_macro': []
        }
    
    def create_data_loaders(self, data_splits: Dict):
        
        # Create datasets
        train_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['train']['texts'],
            labels=data_splits['train']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        val_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['val']['texts'],
            labels=data_splits['val']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        # Create data loaders
        self.train_loader = DataLoader(
            train_dataset,
            batch_size=self.config.batch_size,
            shuffle=True,
            num_workers=2,
            pin_memory=True
        )
        
        self.val_loader = DataLoader(
            val_dataset,
            batch_size=self.config.batch_size,
            shuffle=False,
            num_workers=2,
            pin_memory=True
        )
        
        # Setup optimizer and scheduler
        total_steps = len(self.train_loader) * self.config.num_epochs
        
        self.optimizer = AdamW(
            self.model.parameters(),
            lr=self.config.learning_rate,
            weight_decay=self.config.weight_decay
        )
        
        self.scheduler = get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=int(total_steps * self.config.warmup_ratio),
            num_training_steps=total_steps
        )
    
    def train_epoch(self):
        self.model.train()
        
        total_loss = 0.0
        correct_predictions = 0
        total_predictions = 0
        
        for batch in self.train_loader:
            # Move to device
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            labels = batch['labels'].to(self.device)
            
            # Forward pass
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            loss = self.loss_fn(outputs['logits'], labels)
            
            # Backward pass
            self.optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.max_grad_norm)
            self.optimizer.step()
            self.scheduler.step()
            
            # Track metrics
            total_loss += loss.item()
            predictions = torch.argmax(outputs['logits'], dim=-1)
            correct_predictions += (predictions == labels).sum().item()
            total_predictions += labels.size(0)
        
        avg_loss = total_loss / len(self.train_loader)
        accuracy = correct_predictions / total_predictions
        
        return avg_loss, accuracy
    
    def evaluate(self):
        self.model.eval()
        
        total_loss = 0.0
        all_predictions = []
        all_labels = []
        
        with torch.no_grad():
            for batch in self.val_loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)
                
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                loss = self.loss_fn(outputs['logits'], labels)
                
                total_loss += loss.item()
                predictions = torch.argmax(outputs['logits'], dim=-1)
                
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        avg_loss = total_loss / len(self.val_loader)
        accuracy = accuracy_score(all_labels, all_predictions)
        f1_macro = f1_score(all_labels, all_predictions, average='macro', zero_division=0)
        
        return avg_loss, accuracy, f1_macro
    
    def train(self, data_splits: Dict):
        print(f"Starting distilroBERTa single-task training ({self.config.task_type})...")
        
        # Setup data loaders
        self.create_data_loaders(data_splits)
        
        best_f1 = 0.0
        
        for epoch in range(self.config.num_epochs):
            print(f"\n📍 Epoch {epoch + 1}/{self.config.num_epochs}")
            
            # Train
            train_loss, train_accuracy = self.train_epoch()
            
            # Evaluate
            val_loss, val_accuracy, val_f1_macro = self.evaluate()
            
            # Track metrics
            self.training_history['train_loss'].append(train_loss)
            self.training_history['train_accuracy'].append(train_accuracy)
            self.training_history['val_loss'].append(val_loss)
            self.training_history['val_accuracy'].append(val_accuracy)
            self.training_history['val_f1_macro'].append(val_f1_macro)
            
            # Print results
            print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}")
            print(f"  Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}, Val F1: {val_f1_macro:.4f}")
            
            # Save best model
            if val_f1_macro > best_f1:
                best_f1 = val_f1_macro
                self.save_model(is_best=True)
        
        print(f"\ndistilroBERTa training completed! Best F1: {best_f1:.4f}")
        return self.training_history
    
    def save_model(self, is_best=False):
        suffix = "_best" if is_best else ""
        model_dir = os.path.join(self.config.output_dir, f"model{suffix}")
        
        os.makedirs(model_dir, exist_ok=True)
        
        # Save model
        self.model.roberta.save_pretrained(model_dir)
        self.tokenizer.save_pretrained(model_dir)
        
        # Save custom components
        torch.save({
            'classifier_state_dict': self.model.classifier.state_dict(),
            'num_classes': self.num_classes,
            'config': self.config
        }, os.path.join(model_dir, 'custom_components.pt'))
        
        if is_best:
            print(f"Best distilroBERTa model saved to {model_dir}")

class distilroBERTaMultiTaskTrainer:
    
    def __init__(self, config: TrainingConfig):
        self.config = config
        self.device = device
        
        # Initialize RoBERTa tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        # Initialize RoBERTa multi-task model
        self.model = distilroBERTaMultiTaskTransformer(
            model_name=config.model_name,
            sentiment_num_classes=roberta_model_config.sentiment_num_classes,
            emotion_num_classes=roberta_model_config.emotion_num_classes,
            hidden_dropout_prob=config.hidden_dropout_prob,
            attention_dropout_prob=config.attention_dropout_prob,
            classifier_dropout=config.classifier_dropout
        ).to(self.device)
        
        # Loss function
        self.loss_fn = nn.CrossEntropyLoss()
        
        # Initialize tracking
        self.training_history = {
            'train_loss': [],
            'train_sentiment_accuracy': [],
            'train_emotion_accuracy': [],
            'val_loss': [],
            'val_sentiment_accuracy': [],
            'val_emotion_accuracy': [],
            'val_sentiment_f1_macro': [],
            'val_emotion_f1_macro': []
        }
    
    def create_data_loaders(self, data_splits: Dict):
        """Create data loaders for RoBERTa multi-task training"""
        
        # Create datasets
        train_dataset = distilroBERTaMultiTaskDataset(
            texts=data_splits['train']['texts'],
            sentiment_labels=data_splits['train']['sentiment_labels'],
            emotion_labels=data_splits['train']['emotion_labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        val_dataset = distilroBERTaMultiTaskDataset(
            texts=data_splits['val']['texts'],
            sentiment_labels=data_splits['val']['sentiment_labels'],
            emotion_labels=data_splits['val']['emotion_labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        # Create data loaders
        self.train_loader = DataLoader(
            train_dataset,
            batch_size=self.config.batch_size,
            shuffle=True,
            num_workers=2,
            pin_memory=True
        )
        
        self.val_loader = DataLoader(
            val_dataset,
            batch_size=self.config.batch_size,
            shuffle=False,
            num_workers=2,
            pin_memory=True
        )
        
        # Setup optimizer and scheduler
        total_steps = len(self.train_loader) * self.config.num_epochs
        
        self.optimizer = AdamW(
            self.model.parameters(),
            lr=self.config.learning_rate,
            weight_decay=self.config.weight_decay
        )
        
        self.scheduler = get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=int(total_steps * self.config.warmup_ratio),
            num_training_steps=total_steps
        )
    
    def train_epoch(self):
        self.model.train()
        
        total_loss = 0.0
        sentiment_correct = 0
        emotion_correct = 0
        total_predictions = 0
        
        for batch in self.train_loader:
            # Move to device
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            sentiment_labels = batch['sentiment_labels'].to(self.device)
            emotion_labels = batch['emotion_labels'].to(self.device)
            
            # Forward pass
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            
            # Calculate losses
            sentiment_loss = self.loss_fn(outputs['sentiment_logits'], sentiment_labels)
            emotion_loss = self.loss_fn(outputs['emotion_logits'], emotion_labels)
            
            # Combined loss with alpha weighting
            loss = self.config.alpha * sentiment_loss + (1 - self.config.alpha) * emotion_loss
            
            # Backward pass
            self.optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.max_grad_norm)
            self.optimizer.step()
            self.scheduler.step()
            
            # Track metrics
            total_loss += loss.item()
            
            sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
            emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
            
            sentiment_correct += (sentiment_preds == sentiment_labels).sum().item()
            emotion_correct += (emotion_preds == emotion_labels).sum().item()
            total_predictions += sentiment_labels.size(0)
        
        avg_loss = total_loss / len(self.train_loader)
        sentiment_accuracy = sentiment_correct / total_predictions
        emotion_accuracy = emotion_correct / total_predictions
        
        return avg_loss, sentiment_accuracy, emotion_accuracy
    
    def evaluate(self):
        self.model.eval()
        
        total_loss = 0.0
        sentiment_predictions = []
        emotion_predictions = []
        sentiment_labels = []
        emotion_labels = []
        
        with torch.no_grad():
            for batch in self.val_loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                sentiment_true = batch['sentiment_labels'].to(self.device)
                emotion_true = batch['emotion_labels'].to(self.device)
                
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                
                # Calculate combined loss
                sentiment_loss = self.loss_fn(outputs['sentiment_logits'], sentiment_true)
                emotion_loss = self.loss_fn(outputs['emotion_logits'], emotion_true)
                loss = self.config.alpha * sentiment_loss + (1 - self.config.alpha) * emotion_loss
                
                total_loss += loss.item()
                
                sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
                emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
                
                sentiment_predictions.extend(sentiment_preds.cpu().numpy())
                emotion_predictions.extend(emotion_preds.cpu().numpy())
                sentiment_labels.extend(sentiment_true.cpu().numpy())
                emotion_labels.extend(emotion_true.cpu().numpy())
        
        avg_loss = total_loss / len(self.val_loader)
        
        # Calculate metrics
        sentiment_accuracy = accuracy_score(sentiment_labels, sentiment_predictions)
        emotion_accuracy = accuracy_score(emotion_labels, emotion_predictions)
        sentiment_f1_macro = f1_score(sentiment_labels, sentiment_predictions, average='macro', zero_division=0)
        emotion_f1_macro = f1_score(emotion_labels, emotion_predictions, average='macro', zero_division=0)
        
        return avg_loss, sentiment_accuracy, emotion_accuracy, sentiment_f1_macro, emotion_f1_macro
    
    def train(self, data_splits: Dict):
        print(f"🚀 Starting distilroBERTa multi-task training...")
        
        # Setup data loaders
        self.create_data_loaders(data_splits)
        
        best_combined_f1 = 0.0
        
        for epoch in range(self.config.num_epochs):
            print(f"\n📍 Epoch {epoch + 1}/{self.config.num_epochs}")
            
            # Train
            train_loss, train_sent_acc, train_emo_acc = self.train_epoch()
            
            # Evaluate
            val_loss, val_sent_acc, val_emo_acc, val_sent_f1, val_emo_f1 = self.evaluate()
            
            # Track metrics
            self.training_history['train_loss'].append(train_loss)
            self.training_history['train_sentiment_accuracy'].append(train_sent_acc)
            self.training_history['train_emotion_accuracy'].append(train_emo_acc)
            self.training_history['val_loss'].append(val_loss)
            self.training_history['val_sentiment_accuracy'].append(val_sent_acc)
            self.training_history['val_emotion_accuracy'].append(val_emo_acc)
            self.training_history['val_sentiment_f1_macro'].append(val_sent_f1)
            self.training_history['val_emotion_f1_macro'].append(val_emo_f1)
            
            # Print results
            print(f"  Train Loss: {train_loss:.4f}")
            print(f"  Train Sentiment Acc: {train_sent_acc:.4f}, Train Emotion Acc: {train_emo_acc:.4f}")
            print(f"  Val Loss: {val_loss:.4f}")
            print(f"  Val Sentiment Acc: {val_sent_acc:.4f}, F1: {val_sent_f1:.4f}")
            print(f"  Val Emotion Acc: {val_emo_acc:.4f}, F1: {val_emo_f1:.4f}")
            
            # Save best model
            combined_f1 = (val_sent_f1 + val_emo_f1) / 2
            if combined_f1 > best_combined_f1:
                best_combined_f1 = combined_f1
                self.save_model(is_best=True)
        
        print(f"\ndistilroBERTa training completed! Best Combined F1: {best_combined_f1:.4f}")
        return self.training_history
    
    def save_model(self, is_best=False):
        suffix = "_best" if is_best else ""
        model_dir = os.path.join(self.config.output_dir, f"model{suffix}")
        
        os.makedirs(model_dir, exist_ok=True)
        
        # Save model
        self.model.roberta.save_pretrained(model_dir)
        self.tokenizer.save_pretrained(model_dir)
        
        # Save custom components
        torch.save({
            'sentiment_classifier_state_dict': self.model.sentiment_classifier.state_dict(),
            'emotion_classifier_state_dict': self.model.emotion_classifier.state_dict(),
            'sentiment_num_classes': self.model.sentiment_num_classes,
            'emotion_num_classes': self.model.emotion_num_classes,
            'config': self.config
        }, os.path.join(model_dir, 'custom_components.pt'))
        
        if is_best:
            print(f"Best distilroBERTa model saved to {model_dir}")

print("distilroBERTa Training classes defined!")

distilroBERTa Training classes defined!


In [None]:
# Cell 7: Evaluation Functions for distilroBERTa
def evaluate_distilroBERTa_model(model_path: str, model_type: str, test_data: Dict, model_name: str = "distilroberta-base"):
    
    print(f"🔍 Evaluating distilroBERTa {model_type} model...")
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    # Load custom components
    custom_components = torch.load(os.path.join(model_path, 'custom_components.pt'))
    
    if model_type == "multitask":
        # Load multi-task model
        model = distilroBERTaMultiTaskTransformer(
            model_name=model_name,
            sentiment_num_classes=custom_components['sentiment_num_classes'],
            emotion_num_classes=custom_components['emotion_num_classes']
        ).to(device)
        
        # Load weights
        model.roberta = AutoModel.from_pretrained(model_path)
        model.sentiment_classifier.load_state_dict(custom_components['sentiment_classifier_state_dict'])
        model.emotion_classifier.load_state_dict(custom_components['emotion_classifier_state_dict'])
        
        # Create test dataset
        test_dataset = distilroBERTaMultiTaskDataset(
            texts=test_data['texts'],
            sentiment_labels=test_data['sentiment_labels'],
            emotion_labels=test_data['emotion_labels'],
            tokenizer=tokenizer,
            max_length=128
        )
        
    else:
        # Load single-task model
        model = distilroBERTaSingleTaskTransformer(
            model_name=model_name,
            num_classes=custom_components['num_classes']
        ).to(device)
        
        # Load weights
        model.roberta = AutoModel.from_pretrained(model_path)
        model.classifier.load_state_dict(custom_components['classifier_state_dict'])
        
        # Create test dataset
        test_dataset = distilroBERTaSingleTaskDataset(
            texts=test_data['texts'],
            labels=test_data['labels'],
            tokenizer=tokenizer,
            max_length=128
        )
    
    # Create test loader
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # Evaluate
    model.eval()
    all_predictions = {'sentiment': [], 'emotion': []} if model_type == "multitask" else []
    all_labels = {'sentiment': [], 'emotion': []} if model_type == "multitask" else []
    
    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            
            if model_type == "multitask":
                # Multi-task predictions
                sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
                emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
                
                all_predictions['sentiment'].extend(sentiment_preds.cpu().numpy())
                all_predictions['emotion'].extend(emotion_preds.cpu().numpy())
                all_labels['sentiment'].extend(batch['sentiment_labels'].numpy())
                all_labels['emotion'].extend(batch['emotion_labels'].numpy())
            else:
                # Single-task predictions
                predictions = torch.argmax(outputs['logits'], dim=-1)
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(batch['labels'].numpy())
    
    # Calculate metrics
    if model_type == "multitask":
        sentiment_accuracy = accuracy_score(all_labels['sentiment'], all_predictions['sentiment'])
        emotion_accuracy = accuracy_score(all_labels['emotion'], all_predictions['emotion'])
        sentiment_f1_macro = f1_score(all_labels['sentiment'], all_predictions['sentiment'], average='macro', zero_division=0)
        emotion_f1_macro = f1_score(all_labels['emotion'], all_predictions['emotion'], average='macro', zero_division=0)
        
        results = {
            'sentiment_accuracy': sentiment_accuracy,
            'emotion_accuracy': emotion_accuracy,
            'sentiment_f1_macro': sentiment_f1_macro,
            'emotion_f1_macro': emotion_f1_macro,
            'combined_accuracy': (sentiment_accuracy + emotion_accuracy) / 2,
            'combined_f1_macro': (sentiment_f1_macro + emotion_f1_macro) / 2
        }
        
        print(f"distilroBERTa Multi-task Results:")
        print(f"  Sentiment - Accuracy: {sentiment_accuracy:.4f}, F1: {sentiment_f1_macro:.4f}")
        print(f"  Emotion - Accuracy: {emotion_accuracy:.4f}, F1: {emotion_f1_macro:.4f}")
        print(f"  Combined - Accuracy: {results['combined_accuracy']:.4f}, F1: {results['combined_f1_macro']:.4f}")
        
    else:
        accuracy = accuracy_score(all_labels, all_predictions)
        f1_macro = f1_score(all_labels, all_predictions, average='macro', zero_division=0)
        
        results = {
            'accuracy': accuracy,
            'f1_macro': f1_macro
        }
        
        print(f"distilroBERTa {model_type.title()} Results:")
        print(f"  Accuracy: {accuracy:.4f}")
        print(f"  F1 Macro: {f1_macro:.4f}")
    
    return results

print("distilroBERTa evaluation functions defined")

distilroBERTa evaluation functions defined


In [8]:
# Cell 8: Ultra-Fast Hyperparameter Tuning Classes for distilroBERTa 
def create_tuning_subset(data_splits, subset_ratio=0.01):  # Even smaller: 1%
    print(f"🔪 Creating {subset_ratio*100:.0f}% subset for hyperparameter tuning...")
    
    def sample_split(split_data, ratio):
        n_samples = int(len(split_data['texts']) * ratio)
        if n_samples < 20:  # Minimum 20 samples
            n_samples = min(20, len(split_data['texts']))
        indices = np.random.choice(len(split_data['texts']), n_samples, replace=False)
        
        return {
            'texts': [split_data['texts'][i] for i in indices],
            'labels': [split_data['labels'][i] for i in indices]
        }
    
    val_key = 'val' if 'val' in data_splits else ('validation' if 'validation' in data_splits else 'test')
    
    tuning_data = {
        'train': sample_split(data_splits['train'], subset_ratio),
        'val': sample_split(data_splits[val_key], subset_ratio),
        'test': sample_split(data_splits['test'], subset_ratio) if 'test' in data_splits else sample_split(data_splits[val_key], subset_ratio)
    }
    
    print(f"📊 Tuning subset created:")
    print(f"  Train: {len(tuning_data['train']['texts'])} samples")
    print(f"  Val: {len(tuning_data['val']['texts'])} samples")
    
    return tuning_data

def create_multitask_tuning_subset(data_splits, subset_ratio=0.01):
    print(f"🔪 Creating {subset_ratio*100:.0f}% multitask subset for hyperparameter tuning...")
    
    def sample_multitask_split(split_data, ratio):
        n_samples = int(len(split_data['texts']) * ratio)
        if n_samples < 20:
            n_samples = min(20, len(split_data['texts']))
        indices = np.random.choice(len(split_data['texts']), n_samples, replace=False)
        
        return {
            'texts': [split_data['texts'][i] for i in indices],
            'sentiment_labels': [split_data['sentiment_labels'][i] for i in indices],
            'emotion_labels': [split_data['emotion_labels'][i] for i in indices]
        }
    
    val_key = 'val' if 'val' in data_splits else ('validation' if 'validation' in data_splits else 'test')
    
    tuning_data = {
        'train': sample_multitask_split(data_splits['train'], subset_ratio),
        'val': sample_multitask_split(data_splits[val_key], subset_ratio),
        'test': sample_multitask_split(data_splits['test'], subset_ratio) if 'test' in data_splits else sample_multitask_split(data_splits[val_key], subset_ratio)
    }
    
    print(f"Multitask tuning subset created:")
    print(f"  Train: {len(tuning_data['train']['texts'])} samples")
    print(f"  Val: {len(tuning_data['val']['texts'])} samples")
    
    return tuning_data

class UltraFastdistilroBERTaHyperparameterTuner:
    
    def __init__(
        self,
        model_type: str,
        data_splits: Dict,
        n_trials: int = 5, 
        model_name: str = "distilroberta-base",
        subset_ratio: float = 0.01,  
        max_epochs_per_trial: int = 1  
    ):
        self.model_type = model_type
        self.n_trials = n_trials
        self.model_name = model_name
        self.max_epochs_per_trial = max_epochs_per_trial
        
        print(f"Creating ultra-fast tuning setup for {model_type}")
        
        if model_type == "multitask":
            self.tuning_data = create_multitask_tuning_subset(data_splits, subset_ratio)
        else:
            self.tuning_data = create_tuning_subset(data_splits, subset_ratio)
        
        print(f"⚡ EXTREME Speed optimizations:")
        print(f"  - Using {subset_ratio*100:.0f}% of data ({len(self.tuning_data['train']['texts'])} samples)")
        print(f"  - Only {max_epochs_per_trial} epoch per trial")
        print(f"  - {n_trials} total trials")
        print(f"  - Small batch sizes (4-8)")
        print(f"  - No multiprocessing")
        print(f"  - Estimated time: {n_trials * max_epochs_per_trial * 1:.0f}-{n_trials * max_epochs_per_trial * 3:.0f} minutes")
    
    def objective(self, trial):
        
        # Very fast hyperparameter suggestions
        learning_rate = trial.suggest_float('learning_rate', 2e-5, 1e-4, log=True)
        batch_size = trial.suggest_categorical('batch_size', [4, 8])  # Small batches for speed
        num_epochs = self.max_epochs_per_trial  # Only 1 epoch
        warmup_ratio = 0.1  # Fixed for speed
        weight_decay = trial.suggest_float('weight_decay', 0.01, 0.1)
        hidden_dropout = trial.suggest_float('hidden_dropout_prob', 0.1, 0.2)
        classifier_dropout = trial.suggest_float('classifier_dropout', 0.1, 0.2)
        max_length = 64  # Shorter sequences for speed
        
        alpha = trial.suggest_float('alpha', 0.4, 0.6) if self.model_type == "multitask" else 0.5
        
        # Create ultra-speed config
        config = TrainingConfig(
            model_name=self.model_name,
            learning_rate=learning_rate,
            batch_size=batch_size,
            num_epochs=num_epochs,
            warmup_ratio=warmup_ratio,
            weight_decay=weight_decay,
            hidden_dropout_prob=hidden_dropout,
            classifier_dropout=classifier_dropout,
            max_length=max_length,
            alpha=alpha,
            task_type=self.model_type,
            output_dir=f"./ultra_fast_trial_{trial.number}"
        )
        
        start_time = time.time()
        
        try:
            # Aggressive memory cleanup
            aggressive_memory_cleanup()
            
            if self.model_type == "multitask":
                trainer = UltraFastRoBERTaMultiTaskTrainer(config)
                history = trainer.train(self.tuning_data)
                
                best_sentiment_f1 = max(history['val_sentiment_f1_macro']) if history['val_sentiment_f1_macro'] else 0.0
                best_emotion_f1 = max(history['val_emotion_f1_macro']) if history['val_emotion_f1_macro'] else 0.0
                score = (best_sentiment_f1 + best_emotion_f1) / 2
                
            else:
                if self.model_type == "sentiment":
                    num_classes = roberta_model_config.sentiment_num_classes
                else:
                    num_classes = roberta_model_config.emotion_num_classes
                
                trainer = UltraFastRoBERTaSingleTaskTrainer(config, num_classes)
                history = trainer.train(self.tuning_data)
                
                score = max(history['val_f1_macro']) if history['val_f1_macro'] else 0.0
            
            elapsed = time.time() - start_time
            print(f"⚡ Trial {trial.number}: Score={score:.4f}, Time={elapsed/60:.1f}min")
            
            return score
            
        except Exception as e:
            elapsed = time.time() - start_time
            print(f"❌ Trial {trial.number} failed after {elapsed/60:.1f}min: {str(e)[:100]}...")
            return 0.0
            
        finally:
            # Aggressive cleanup
            if 'trainer' in locals():
                del trainer
            aggressive_memory_cleanup()
    
    def tune(self):
        """Run ultra-fast hyperparameter optimization"""
        
        study = optuna.create_study(
            direction='maximize',
            sampler=optuna.samplers.RandomSampler(seed=42)
        )
        
        print(f"\n🚀 Starting ULTRA-FAST hyperparameter tuning for {self.model_type}...")
        print(f"⚡ Target: Find good hyperparameters in ~{self.n_trials * 2:.0f} minutes")
        print("=" * 60)
        
        start_time = time.time()
        study.optimize(self.objective, n_trials=self.n_trials)
        total_time = time.time() - start_time
        
        print(f"\n🏆 Ultra-fast tuning completed in {total_time/60:.1f} minutes!")
        print(f"🎯 Best score: {study.best_value:.4f}")
        print(f"📋 Best parameters:")
        for key, value in study.best_params.items():
            print(f"  {key}: {value}")
        
        return study

print("UltraFastdistilroBERTaHyperparameterTuner class defined")

UltraFastdistilroBERTaHyperparameterTuner class defined


In [9]:
# Cell 8B: Ultra-Fast Trainers for Speed
class UltraFastdistilroBERTaSingleTaskTrainer:
    
    def __init__(self, config: TrainingConfig, num_classes: int):
        self.config = config
        self.num_classes = num_classes
        self.device = device
        
        # Initialize tokenizer (reuse if possible)
        if not hasattr(self, '_tokenizer_cache'):
            UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache = AutoTokenizer.from_pretrained(config.model_name)
            if UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache.pad_token is None:
                UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache.pad_token = UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache.eos_token
        
        self.tokenizer = UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache
        
        # Initialize model
        self.model = distilroBERTaSingleTaskTransformer(
            model_name=config.model_name,
            num_classes=num_classes,
            hidden_dropout_prob=config.hidden_dropout_prob,
            attention_dropout_prob=config.attention_dropout_prob,
            classifier_dropout=config.classifier_dropout
        ).to(self.device)
        
        self.loss_fn = nn.CrossEntropyLoss()
        self.training_history = {
            'train_loss': [], 'train_accuracy': [], 'val_loss': [], 'val_accuracy': [], 'val_f1_macro': []
        }
    
    def create_data_loaders(self, data_splits: Dict):
        train_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['train']['texts'],
            labels=data_splits['train']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        val_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['val']['texts'],
            labels=data_splits['val']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        # Speed-optimized data loaders
        self.train_loader = DataLoader(
            train_dataset,
            batch_size=self.config.batch_size,
            shuffle=True,
            num_workers=0,  # No multiprocessing for speed
            pin_memory=False  # Disable pin_memory
        )
        
        self.val_loader = DataLoader(
            val_dataset,
            batch_size=self.config.batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=False
        )
        
        # Simple optimizer setup
        self.optimizer = AdamW(
            self.model.parameters(),
            lr=self.config.learning_rate,
            weight_decay=self.config.weight_decay
        )
    
    def train_epoch(self):
        self.model.train()
        
        total_loss = 0.0
        correct_predictions = 0
        total_predictions = 0
        
        for batch_idx, batch in enumerate(self.train_loader):
            input_ids = batch['input_ids'].to(self.device, non_blocking=True)
            attention_mask = batch['attention_mask'].to(self.device, non_blocking=True)
            labels = batch['labels'].to(self.device, non_blocking=True)
            
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            loss = self.loss_fn(outputs['logits'], labels)
            
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
            # Track metrics
            total_loss += loss.item()
            predictions = torch.argmax(outputs['logits'], dim=-1)
            correct_predictions += (predictions == labels).sum().item()
            total_predictions += labels.size(0)
            
            # Print progress for very small datasets
            if batch_idx % max(1, len(self.train_loader) // 4) == 0:
                print(f"    Batch {batch_idx + 1}/{len(self.train_loader)}")
        
        return total_loss / len(self.train_loader), correct_predictions / total_predictions
    
    def evaluate(self):
        self.model.eval()
        
        total_loss = 0.0
        all_predictions = []
        all_labels = []
        
        with torch.no_grad():
            for batch in self.val_loader:
                input_ids = batch['input_ids'].to(self.device, non_blocking=True)
                attention_mask = batch['attention_mask'].to(self.device, non_blocking=True)
                labels = batch['labels'].to(self.device, non_blocking=True)
                
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                loss = self.loss_fn(outputs['logits'], labels)
                
                total_loss += loss.item()
                predictions = torch.argmax(outputs['logits'], dim=-1)
                
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        accuracy = accuracy_score(all_labels, all_predictions)
        f1_macro = f1_score(all_labels, all_predictions, average='macro', zero_division=0)
        
        return total_loss / len(self.val_loader), accuracy, f1_macro
    
    def train(self, data_splits: Dict):
        print(f"🚀 Starting ultra-fast distilroBERTa training ({self.config.task_type})...")
        
        self.create_data_loaders(data_splits)
        
        best_f1 = 0.0
        
        for epoch in range(self.config.num_epochs):
            print(f"  📍 Epoch {epoch + 1}/{self.config.num_epochs}")
            
            train_loss, train_accuracy = self.train_epoch()
            val_loss, val_accuracy, val_f1_macro = self.evaluate()
            
            self.training_history['train_loss'].append(train_loss)
            self.training_history['train_accuracy'].append(train_accuracy)
            self.training_history['val_loss'].append(val_loss)
            self.training_history['val_accuracy'].append(val_accuracy)
            self.training_history['val_f1_macro'].append(val_f1_macro)
            
            print(f"    Loss: {train_loss:.4f}, Acc: {train_accuracy:.4f}, Val F1: {val_f1_macro:.4f}")
            
            if val_f1_macro > best_f1:
                best_f1 = val_f1_macro
        
        print(f"✅ Training completed! Best F1: {best_f1:.4f}")
        return self.training_history

class UltraFastRoBERTaMultiTaskTrainer:
    # Similar structure but for multitask...
    pass  # Implement if needed

print("Ultra-fast trainers defined")

Ultra-fast trainers defined


In [10]:
# Cell 9: Data Loading and Initial Setup for distilroBERTa
print("🚀 STARTING ROBERTA TRAINING PIPELINE")
print("=" * 80)

# Clear memory before starting
aggressive_memory_cleanup()

# Load and process datasets for distilroBERTa
print("\n1️⃣ Loading and processing datasets for distilroBERTa...")
sentiment_data, emotion_data = load_and_process_datasets_roberta()
multitask_data = create_multitask_data_roberta(sentiment_data, emotion_data)

# Model configurations
model_name = "distilroberta-base"
n_trials = 8  # Number of hyperparameter tuning trials

print("Data loading completed!")
print(f"Sentiment data: {len(sentiment_data['train']['texts'])} train samples")
print(f"Emotion data: {len(emotion_data['train']['texts'])} train samples")
print(f"Multitask data: {len(multitask_data['train']['texts'])} train samples")
print(f"Model: {model_name}")
print(f"Hyperparameter trials per model: {n_trials}")

🚀 STARTING ROBERTA TRAINING PIPELINE
🧹 Memory cleaned!

1️⃣ Loading and processing datasets for distilroBERTa...
📥 Loading external datasets for RoBERTa...
✅ SST-2 dataset loaded: 67349 train samples
✅ GoEmotions dataset loaded: 43410 train samples
🔄 Processing sentiment data for RoBERTa...
✅ RoBERTa Sentiment data processed:
  Train: 7000 samples
  Val: 1500 samples
  Test: 1500 samples
🔄 Processing emotion data for RoBERTa...
✅ RoBERTa Emotion data processed:
  Train: 7000 samples
  Val: 1500 samples
  Test: 1500 samples
🔄 Creating multi-task dataset for RoBERTa...
✅ RoBERTa Multi-task data created:
  Train: 7000 samples
  Val: 1500 samples
  Test: 1500 samples
Data loading completed!
Sentiment data: 7000 train samples
Emotion data: 7000 train samples
Multitask data: 7000 train samples
Model: distilroberta-base
Hyperparameter trials per model: 8


In [None]:
# Cell 10: Fast Initial Sentiment Model Training
print("\n" + "="*80)
print("📍 PHASE 1: FAST INITIAL distilroberta TRAINING - SENTIMENT MODEL")
print("="*80)

# Create smaller subset for initial training too
print("🔪 Creating 10% subset for fast initial training...")
initial_sentiment_data = create_tuning_subset(sentiment_data, subset_ratio=0.1)

# Faster configuration for initial training
fast_initial_config_sentiment = TrainingConfig(
    model_name=model_name,
    batch_size=16,  # Larger batch size
    learning_rate=2e-5,
    num_epochs=2,  # Fewer epochs
    max_length=64,  # Shorter sequences
    task_type="sentiment",
    output_dir="./initial_distilroberta_sentiment_model"
)

print("\n2️⃣ Training Fast Initial DistilRoBERTa Sentiment Model...")
print("="*60)

# Train initial sentiment model on subset
initial_sentiment_trainer = distilroBERTaSingleTaskTrainer(
    config=fast_initial_config_sentiment,
    num_classes=roberta_model_config.sentiment_num_classes
)
initial_sentiment_history = initial_sentiment_trainer.train(initial_sentiment_data)

# Evaluate on full test set
initial_sentiment_results = evaluate_distilroBERTa_model(
    model_path="./initial_distilroberta_sentiment_model/model_best",
    model_type="sentiment",
    test_data=sentiment_data['test'],
    model_name=model_name
)

print(f"\n✅ Initial Sentiment Model Results:")
print(f"  Accuracy: {initial_sentiment_results['accuracy']:.4f}")
print(f"  F1 Macro: {initial_sentiment_results['f1_macro']:.4f}")
print(f"  (Note: Trained on 10% subset for speed)")

# Clean up memory
aggressive_memory_cleanup()


📍 PHASE 1: FAST INITIAL distilroberta TRAINING - SENTIMENT MODEL
🔪 Creating 10% subset for fast initial training...
🔪 Creating 10% subset for hyperparameter tuning...
📊 Tuning subset created:
  Train: 700 samples
  Val: 150 samples

2️⃣ Training Fast Initial DistilRoBERTa Sentiment Model...
Starting distilroBERTa single-task training (sentiment)...

📍 Epoch 1/2


In [None]:
# Debug Cell: Check what model is actually being used
print("🔍 DEBUGGING MODEL LOADING...")

# Check the current model_name variable
print(f"Current model_name variable: {model_name}")

# Test loading the model directly
import time
print(f"\n⏱️ Testing direct model loading...")

start_time = time.time()
test_tokenizer = AutoTokenizer.from_pretrained(model_name)
load_time_tokenizer = time.time() - start_time
print(f"Tokenizer loaded in: {load_time_tokenizer:.1f} seconds")

start_time = time.time()
test_model = AutoModel.from_pretrained(model_name)
load_time_model = time.time() - start_time
print(f"Model loaded in: {load_time_model:.1f} seconds")

# Check model size
total_params = sum(p.numel() for p in test_model.parameters())
print(f"Model parameters: {total_params:,} ({total_params/1e6:.1f}M)")

# Check model type
print(f"Model type: {type(test_model)}")
print(f"Model config: {test_model.config.model_type}")

# Expected sizes:
# distilroberta-base: ~82M parameters  
# bert-tiny: ~4M parameters

if total_params > 100e6:
    print("⚠️ WARNING: This is still a large model (>100M params)")
elif total_params > 50e6:
    print("✅ Medium-sized model (50-100M params)")
else:
    print("✅ Small model (<50M params)")

# Clean up
del test_tokenizer, test_model
torch.cuda.empty_cache()

In [None]:
# Cell 11: Initial Emotion Model Training
print("\n" + "="*80)
print("📍 PHASE 1: INITIAL distilroBERTa TRAINING - EMOTION MODEL")
print("="*80)

# Default configuration for RoBERTa emotion
default_config_emotion = TrainingConfig(
    model_name=model_name,
    batch_size=8,
    learning_rate=2e-5,
    num_epochs=3,
    max_length=128,
    task_type="emotion",
    output_dir="./initial_roberta_emotion_model"
)

print("\n3️⃣ Training Initial distilroBERTa Emotion Model...")
print("="*60)

# Train initial emotion model
initial_emotion_trainer = distilroBERTaSingleTaskTrainer(
    config=default_config_emotion,
    num_classes=roberta_model_config.emotion_num_classes
)
initial_emotion_history = initial_emotion_trainer.train(emotion_data)

# Evaluate initial emotion model
initial_emotion_results = evaluate_distilroBERTa_model(
    model_path="./initial_distilroBERTa_emotion_model/model_best",
    model_type="emotion",
    test_data=emotion_data['test'],
    model_name=model_name
)

print(f"\n✅ Initial Emotion Model Results:")
print(f"  Accuracy: {initial_emotion_results['accuracy']:.4f}")
print(f"  F1 Macro: {initial_emotion_results['f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()

In [None]:
# Cell 12: Initial Multitask Model Training
print("\n" + "="*80)
print("📍 PHASE 1: INITIAL ROBERTA TRAINING - MULTITASK MODEL")
print("="*80)

# Default configuration for RoBERTa multitask
default_config_multitask = TrainingConfig(
    model_name=model_name,
    batch_size=8,
    learning_rate=2e-5,
    num_epochs=3,
    max_length=128,
    alpha=0.5,
    task_type="multitask",
    output_dir="./initial_roberta_multitask_model"
)

print("\n4️⃣ Training Initial RoBERTa Multi-task Model...")
print("="*60)

# Train initial multitask model
initial_multitask_trainer = distilroBERTaMultiTaskTrainer(config=default_config_multitask)
initial_multitask_history = initial_multitask_trainer.train(multitask_data)

# Evaluate initial multitask model
initial_multitask_results = evaluate_distilroBERTa_model(
    model_path="./initial_roberta_multitask_model/model_best",
    model_type="multitask",
    test_data=multitask_data['test'],
    model_name=model_name
)

print(f"\n✅ Initial Multitask Model Results:")
print(f"  Sentiment - Accuracy: {initial_multitask_results['sentiment_accuracy']:.4f}, F1: {initial_multitask_results['sentiment_f1_macro']:.4f}")
print(f"  Emotion - Accuracy: {initial_multitask_results['emotion_accuracy']:.4f}, F1: {initial_multitask_results['emotion_f1_macro']:.4f}")
print(f"  Combined - Accuracy: {initial_multitask_results['combined_accuracy']:.4f}, F1: {initial_multitask_results['combined_f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()

In [None]:
# Cell 13: Initial Results Summary
print("\n" + "="*80)
print("📍 INITIAL ROBERTA RESULTS SUMMARY")
print("="*80)

print(f"\n📊 INITIAL ROBERTA MODEL PERFORMANCE:")
print(f"  Sentiment Model:")
print(f"    Accuracy: {initial_sentiment_results['accuracy']:.4f}")
print(f"    F1 Macro: {initial_sentiment_results['f1_macro']:.4f}")

print(f"\n  Emotion Model:")
print(f"    Accuracy: {initial_emotion_results['accuracy']:.4f}")
print(f"    F1 Macro: {initial_emotion_results['f1_macro']:.4f}")

print(f"\n  Multitask Model:")
print(f"    Sentiment - Accuracy: {initial_multitask_results['sentiment_accuracy']:.4f}, F1: {initial_multitask_results['sentiment_f1_macro']:.4f}")
print(f"    Emotion - Accuracy: {initial_multitask_results['emotion_accuracy']:.4f}, F1: {initial_multitask_results['emotion_f1_macro']:.4f}")
print(f"    Combined - Accuracy: {initial_multitask_results['combined_accuracy']:.4f}, F1: {initial_multitask_results['combined_f1_macro']:.4f}")

# Store results for later comparison
all_results = {
    'initial_sentiment': initial_sentiment_results,
    'initial_emotion': initial_emotion_results,
    'initial_multitask': initial_multitask_results
}

print(f"\n💡 These are RoBERTa baseline results. Now proceeding to hyperparameter tuning!")

In [None]:
# Cell 10: Ultra-Fast Hyperparameter Tuning - Sentiment (Updated)
print("\n" + "="*80)
print("📍 PHASE 2: ULTRA-FAST HYPERPARAMETER TUNING - SENTIMENT")
print("="*80)

print("\n6️⃣ ULTRA-Fast Hyperparameter Tuning for RoBERTa Sentiment Model...")
print("="*60)

# Create ULTRA-FAST tuner for sentiment
sentiment_tuner = UltraFastdistilroBERTaHyperparameterTuner(
    model_type="sentiment",
    data_splits=sentiment_data,
    n_trials=5,  # Only 5 trials
    model_name=model_name,
    subset_ratio=0.005,  # Only 0.5% of data!
    max_epochs_per_trial=1  # Only 1 epoch per trial!
)

# Run hyperparameter tuning
sentiment_study = sentiment_tuner.tune()

print(f"\n✅ Sentiment Hyperparameter Tuning Completed!")
print(f"🏆 Best F1 Score: {sentiment_study.best_value:.4f}")
print(f"📋 Best Parameters:")
for key, value in sentiment_study.best_params.items():
    print(f"  {key}: {value}")

# Clean up memory
aggressive_memory_cleanup()

In [None]:
# Cell 11: Ultra-Fast Hyperparameter Tuning - Emotion
print("\n" + "="*80)
print("📍 PHASE 2: ULTRA-FAST HYPERPARAMETER TUNING - EMOTION")
print("="*80)

print("\n7️⃣ Fast Hyperparameter Tuning for RoBERTa Emotion Model...")
print("="*60)

# Create FAST tuner for emotion
emotion_tuner = FastRoBERTaHyperparameterTuner(
    model_type="emotion",
    data_splits=emotion_data,
    n_trials=n_trials,
    model_name=model_name,
    subset_ratio=0.02,  # Only 2% of data!
    max_epochs_per_trial=2  # Only 2 epochs per trial!
)

# Run hyperparameter tuning
emotion_study = emotion_tuner.tune()

print(f"\n✅ Emotion Hyperparameter Tuning Completed!")
print(f"🏆 Best F1 Score: {emotion_study.best_value:.4f}")
print(f"📋 Best Parameters:")
for key, value in emotion_study.best_params.items():
    print(f"  {key}: {value}")

# Clean up memory
aggressive_memory_cleanup()

In [None]:
# Cell 12: Ultra-Fast Hyperparameter Tuning - Multitask
print("\n" + "="*80)
print("📍 PHASE 2: ULTRA-FAST HYPERPARAMETER TUNING - MULTITASK")
print("="*80)

print("\n8️⃣ Fast Hyperparameter Tuning for RoBERTa Multi-task Model...")
print("="*60)

# Create FAST tuner for multitask
multitask_tuner = FastRoBERTaHyperparameterTuner(
    model_type="multitask",
    data_splits=multitask_data,
    n_trials=n_trials,
    model_name=model_name,
    subset_ratio=0.02,  # Only 2% of data!
    max_epochs_per_trial=2  # Only 2 epochs per trial!
)

# Run hyperparameter tuning
multitask_study = multitask_tuner.tune()

print(f"\n✅ Multitask Hyperparameter Tuning Completed!")
print(f"🏆 Best Combined F1 Score: {multitask_study.best_value:.4f}")
print(f"📋 Best Parameters:")
for key, value in multitask_study.best_params.items():
    print(f"  {key}: {value}")

# Clean up memory
aggressive_memory_cleanup()

In [None]:
# Cell 13: Final Sentiment Model Training with Best Parameters
print("\n" + "="*80)
print("📍 PHASE 3: FINAL TRAINING - OPTIMIZED SENTIMENT MODEL")
print("="*80)

print("\n9️⃣ Training Final RoBERTa Sentiment Model with Best Parameters...")
print("="*60)

# Get best parameters from sentiment tuning
best_sentiment_params = sentiment_study.best_params
print(f"🎯 Using best hyperparameters:")
for key, value in best_sentiment_params.items():
    print(f"  {key}: {value}")

# Create optimized config for final training (full dataset, more epochs)
final_sentiment_config = TrainingConfig(
    model_name=model_name,
    learning_rate=best_sentiment_params['learning_rate'],
    batch_size=best_sentiment_params['batch_size'],
    num_epochs=5,  # Increase epochs for final training
    warmup_ratio=best_sentiment_params['warmup_ratio'],
    weight_decay=best_sentiment_params['weight_decay'],
    hidden_dropout_prob=best_sentiment_params['hidden_dropout_prob'],
    classifier_dropout=best_sentiment_params['classifier_dropout'],
    max_length=best_sentiment_params.get('max_length', 128),
    task_type="sentiment",
    output_dir="./final_roberta_sentiment_model"
)

print(f"\n🚀 Training final sentiment model:")
print(f"  Dataset: Full sentiment data ({len(sentiment_data['train']['texts'])} train samples)")
print(f"  Epochs: {final_sentiment_config.num_epochs}")
print(f"  Batch size: {final_sentiment_config.batch_size}")
print(f"  Learning rate: {final_sentiment_config.learning_rate:.2e}")

# Train final sentiment model
final_sentiment_trainer = distilroBERTaSingleTaskTrainer(
    config=final_sentiment_config,
    num_classes=roberta_model_config.sentiment_num_classes
)
final_sentiment_history = final_sentiment_trainer.train(sentiment_data)

# Evaluate final sentiment model
final_sentiment_results = evaluate_distilroBERTa_model(
    model_path="./final_roberta_sentiment_model/model_best",
    model_type="sentiment",
    test_data=sentiment_data['test'],
    model_name=model_name
)

print(f"\n✅ Final Sentiment Model Results:")
print(f"  Accuracy: {final_sentiment_results['accuracy']:.4f}")
print(f"  F1 Macro: {final_sentiment_results['f1_macro']:.4f}")

# Compare with tuning results
print(f"\n📊 Comparison:")
print(f"  Tuning F1 (on subset): {sentiment_study.best_value:.4f}")
print(f"  Final F1 (on full test): {final_sentiment_results['f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()
print(f"💾 Final sentiment model saved to: ./final_roberta_sentiment_model/")

In [None]:
# Cell 14: Final Emotion Model Training with Best Parameters
print("\n" + "="*80)
print("📍 PHASE 3: FINAL TRAINING - OPTIMIZED EMOTION MODEL")
print("="*80)

print("\n🔟 Training Final RoBERTa Emotion Model with Best Parameters...")
print("="*60)

# Get best parameters from emotion tuning
best_emotion_params = emotion_study.best_params
print(f"🎯 Using best hyperparameters:")
for key, value in best_emotion_params.items():
    print(f"  {key}: {value}")

# Create optimized config for final training (full dataset, more epochs)
final_emotion_config = TrainingConfig(
    model_name=model_name,
    learning_rate=best_emotion_params['learning_rate'],
    batch_size=best_emotion_params['batch_size'],
    num_epochs=5,  # Increase epochs for final training
    warmup_ratio=best_emotion_params['warmup_ratio'],
    weight_decay=best_emotion_params['weight_decay'],
    hidden_dropout_prob=best_emotion_params['hidden_dropout_prob'],
    classifier_dropout=best_emotion_params['classifier_dropout'],
    max_length=best_emotion_params.get('max_length', 128),
    task_type="emotion",
    output_dir="./final_roberta_emotion_model"
)

print(f"\n🚀 Training final emotion model:")
print(f"  Dataset: Full emotion data ({len(emotion_data['train']['texts'])} train samples)")
print(f"  Epochs: {final_emotion_config.num_epochs}")
print(f"  Batch size: {final_emotion_config.batch_size}")
print(f"  Learning rate: {final_emotion_config.learning_rate:.2e}")

# Train final emotion model
final_emotion_trainer = RoBERTaSingleTaskTrainer(
    config=final_emotion_config,
    num_classes=roberta_model_config.emotion_num_classes
)
final_emotion_history = final_emotion_trainer.train(emotion_data)

# Evaluate final emotion model
final_emotion_results = evaluate_roberta_model(
    model_path="./final_roberta_emotion_model/model_best",
    model_type="emotion",
    test_data=emotion_data['test'],
    model_name=model_name
)

print(f"\n✅ Final Emotion Model Results:")
print(f"  Accuracy: {final_emotion_results['accuracy']:.4f}")
print(f"  F1 Macro: {final_emotion_results['f1_macro']:.4f}")

# Compare with tuning results
print(f"\n📊 Comparison:")
print(f"  Tuning F1 (on subset): {emotion_study.best_value:.4f}")
print(f"  Final F1 (on full test): {final_emotion_results['f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()
print(f"💾 Final emotion model saved to: ./final_roberta_emotion_model/")

In [None]:
# Cell 15: Final Multitask Model Training with Best Parameters
print("\n" + "="*80)
print("📍 PHASE 3: FINAL TRAINING - OPTIMIZED MULTITASK MODEL")
print("="*80)

print("\n1️⃣1️⃣ Training Final RoBERTa Multi-task Model with Best Parameters...")
print("="*60)

# Get best parameters from multitask tuning
best_multitask_params = multitask_study.best_params
print(f"🎯 Using best hyperparameters:")
for key, value in best_multitask_params.items():
    print(f"  {key}: {value}")

# Create optimized config for final training (full dataset, more epochs)
final_multitask_config = TrainingConfig(
    model_name=model_name,
    learning_rate=best_multitask_params['learning_rate'],
    batch_size=best_multitask_params['batch_size'],
    num_epochs=5,  # Increase epochs for final training
    warmup_ratio=best_multitask_params['warmup_ratio'],
    weight_decay=best_multitask_params['weight_decay'],
    hidden_dropout_prob=best_multitask_params['hidden_dropout_prob'],
    classifier_dropout=best_multitask_params['classifier_dropout'],
    max_length=best_multitask_params.get('max_length', 128),
    alpha=best_multitask_params['alpha'],  # Multitask-specific parameter
    task_type="multitask",
    output_dir="./final_roberta_multitask_model"
)

print(f"\n🚀 Training final multitask model:")
print(f"  Dataset: Full multitask data ({len(multitask_data['train']['texts'])} train samples)")
print(f"  Epochs: {final_multitask_config.num_epochs}")
print(f"  Batch size: {final_multitask_config.batch_size}")
print(f"  Learning rate: {final_multitask_config.learning_rate:.2e}")
print(f"  Alpha (loss weighting): {final_multitask_config.alpha:.3f}")

# Train final multitask model
final_multitask_trainer = RoBERTaMultiTaskTrainer(config=final_multitask_config)
final_multitask_history = final_multitask_trainer.train(multitask_data)

# Evaluate final multitask model
final_multitask_results = evaluate_roberta_model(
    model_path="./final_roberta_multitask_model/model_best",
    model_type="multitask",
    test_data=multitask_data['test'],
    model_name=model_name
)

print(f"\n✅ Final Multitask Model Results:")
print(f"  Sentiment - Accuracy: {final_multitask_results['sentiment_accuracy']:.4f}, F1: {final_multitask_results['sentiment_f1_macro']:.4f}")
print(f"  Emotion - Accuracy: {final_multitask_results['emotion_accuracy']:.4f}, F1: {final_multitask_results['emotion_f1_macro']:.4f}")
print(f"  Combined - Accuracy: {final_multitask_results['combined_accuracy']:.4f}, F1: {final_multitask_results['combined_f1_macro']:.4f}")

# Compare with tuning results
print(f"\n📊 Comparison:")
print(f"  Tuning Combined F1 (on subset): {multitask_study.best_value:.4f}")
print(f"  Final Combined F1 (on full test): {final_multitask_results['combined_f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()
print(f"💾 Final multitask model saved to: ./final_roberta_multitask_model/")

In [None]:
# Cell 20: Final Results Comparison for RoBERTa (Updated with Initial Comparisons)
print("\n" + "="*80)
print("📍 PHASE 4: COMPREHENSIVE RESULTS COMPARISON")
print("="*80)

print("\n📊 ROBERTA COMPLETE PIPELINE RESULTS")
print("="*60)

# Display initial results
print(f"\n📋 INITIAL MODEL PERFORMANCE (baseline):")
print(f"  Sentiment Model:")
print(f"    Accuracy: {all_results['initial_sentiment']['accuracy']:.4f}")
print(f"    F1 Macro: {all_results['initial_sentiment']['f1_macro']:.4f}")

print(f"\n  Emotion Model:")
print(f"    Accuracy: {all_results['initial_emotion']['accuracy']:.4f}")
print(f"    F1 Macro: {all_results['initial_emotion']['f1_macro']:.4f}")

print(f"\n  Multitask Model:")
print(f"    Sentiment - Accuracy: {all_results['initial_multitask']['sentiment_accuracy']:.4f}, F1: {all_results['initial_multitask']['sentiment_f1_macro']:.4f}")
print(f"    Emotion - Accuracy: {all_results['initial_multitask']['emotion_accuracy']:.4f}, F1: {all_results['initial_multitask']['emotion_f1_macro']:.4f}")
print(f"    Combined - Accuracy: {all_results['initial_multitask']['combined_accuracy']:.4f}, F1: {all_results['initial_multitask']['combined_f1_macro']:.4f}")

# Display hyperparameter tuning results
print(f"\n🎯 HYPERPARAMETER TUNING PERFORMANCE (on small subsets):")
print(f"  Sentiment Best F1: {sentiment_study.best_value:.4f}")
print(f"  Emotion Best F1: {emotion_study.best_value:.4f}")
print(f"  Multitask Best Combined F1: {multitask_study.best_value:.4f}")

# Display final optimized results
print(f"\n🏆 FINAL OPTIMIZED MODEL PERFORMANCE:")
print(f"  Sentiment Model:")
print(f"    Accuracy: {final_sentiment_results['accuracy']:.4f}")
print(f"    F1 Macro: {final_sentiment_results['f1_macro']:.4f}")

print(f"\n  Emotion Model:")
print(f"    Accuracy: {final_emotion_results['accuracy']:.4f}")
print(f"    F1 Macro: {final_emotion_results['f1_macro']:.4f}")

print(f"\n  Multitask Model:")
print(f"    Sentiment - Accuracy: {final_multitask_results['sentiment_accuracy']:.4f}, F1: {final_multitask_results['sentiment_f1_macro']:.4f}")
print(f"    Emotion - Accuracy: {final_multitask_results['emotion_accuracy']:.4f}, F1: {final_multitask_results['emotion_f1_macro']:.4f}")
print(f"    Combined - Accuracy: {final_multitask_results['combined_accuracy']:.4f}, F1: {final_multitask_results['combined_f1_macro']:.4f}")

# Calculate improvements from initial to final
print(f"\n📈 IMPROVEMENT FROM INITIAL TO OPTIMIZED:")

# Sentiment improvements
sentiment_acc_improvement = final_sentiment_results['accuracy'] - all_results['initial_sentiment']['accuracy']
sentiment_f1_improvement = final_sentiment_results['f1_macro'] - all_results['initial_sentiment']['f1_macro']

print(f"  Sentiment:")
print(f"    Accuracy: {all_results['initial_sentiment']['accuracy']:.4f} → {final_sentiment_results['accuracy']:.4f} ({sentiment_acc_improvement:+.4f})")
print(f"    F1 Macro: {all_results['initial_sentiment']['f1_macro']:.4f} → {final_sentiment_results['f1_macro']:.4f} ({sentiment_f1_improvement:+.4f}) {'✅' if sentiment_f1_improvement > 0 else '⚠️'}")

# Emotion improvements
emotion_acc_improvement = final_emotion_results['accuracy'] - all_results['initial_emotion']['accuracy']
emotion_f1_improvement = final_emotion_results['f1_macro'] - all_results['initial_emotion']['f1_macro']

print(f"\n  Emotion:")
print(f"    Accuracy: {all_results['initial_emotion']['accuracy']:.4f} → {final_emotion_results['accuracy']:.4f} ({emotion_acc_improvement:+.4f})")
print(f"    F1 Macro: {all_results['initial_emotion']['f1_macro']:.4f} → {final_emotion_results['f1_macro']:.4f} ({emotion_f1_improvement:+.4f}) {'✅' if emotion_f1_improvement > 0 else '⚠️'}")

# Multitask improvements
multitask_sent_acc_improvement = final_multitask_results['sentiment_accuracy'] - all_results['initial_multitask']['sentiment_accuracy']
multitask_emo_acc_improvement = final_multitask_results['emotion_accuracy'] - all_results['initial_multitask']['emotion_accuracy']
multitask_combined_f1_improvement = final_multitask_results['combined_f1_macro'] - all_results['initial_multitask']['combined_f1_macro']

print(f"\n  Multitask:")
print(f"    Sentiment Accuracy: {all_results['initial_multitask']['sentiment_accuracy']:.4f} → {final_multitask_results['sentiment_accuracy']:.4f} ({multitask_sent_acc_improvement:+.4f})")
print(f"    Emotion Accuracy: {all_results['initial_multitask']['emotion_accuracy']:.4f} → {final_multitask_results['emotion_accuracy']:.4f} ({multitask_emo_acc_improvement:+.4f})")
print(f"    Combined F1: {all_results['initial_multitask']['combined_f1_macro']:.4f} → {final_multitask_results['combined_f1_macro']:.4f} ({multitask_combined_f1_improvement:+.4f}) {'✅' if multitask_combined_f1_improvement > 0 else '⚠️'}")

# Create comprehensive results summary
results_summary = {
    'model_type': 'RoBERTa',
    'model_name': model_name,
    'pipeline_type': 'Complete: Initial + Hyperparameter Tuning + Final Training',
    'initial_models': {
        'sentiment': all_results['initial_sentiment'],
        'emotion': all_results['initial_emotion'],
        'multitask': all_results['initial_multitask']
    },
    'hyperparameter_tuning': {
        'method': 'Fast Random Search',
        'subset_ratio': 0.02,
        'trials_per_model': n_trials,
        'epochs_per_trial': 2,
        'sentiment_best_f1': float(sentiment_study.best_value),
        'emotion_best_f1': float(emotion_study.best_value),
        'multitask_best_f1': float(multitask_study.best_value)
    },
    'final_models': {
        'sentiment': final_sentiment_results,
        'emotion': final_emotion_results,
        'multitask': final_multitask_results
    },
    'improvements': {
        'sentiment': {
            'accuracy_improvement': float(sentiment_acc_improvement),
            'f1_improvement': float(sentiment_f1_improvement)
        },
        'emotion': {
            'accuracy_improvement': float(emotion_acc_improvement),
            'f1_improvement': float(emotion_f1_improvement)
        },
        'multitask': {
            'sentiment_accuracy_improvement': float(multitask_sent_acc_improvement),
            'emotion_accuracy_improvement': float(multitask_emo_acc_improvement),
            'combined_f1_improvement': float(multitask_combined_f1_improvement)
        }
    },
    'best_hyperparameters': {
        'sentiment': sentiment_study.best_params,
        'emotion': emotion_study.best_params,
        'multitask': multitask_study.best_params
    },
    'model_locations': {
        'initial_sentiment': './initial_roberta_sentiment_model/',
        'initial_emotion': './initial_roberta_emotion_model/',
        'initial_multitask': './initial_roberta_multitask_model/',
        'final_sentiment': './final_roberta_sentiment_model/',
        'final_emotion': './final_roberta_emotion_model/',
        'final_multitask': './final_roberta_multitask_model/'
    }
}

# Save results
import json
with open('comprehensive_roberta_results_summary.json', 'w') as f:
    json.dump(results_summary, f, indent=2)

print(f"\n📁 MODEL LOCATIONS:")
print(f"  📦 Initial models: ./initial_roberta_*_model/")
print(f"  📦 Final models: ./final_roberta_*_model/")

print(f"\n📄 RESULTS SAVED:")
print(f"  📊 Comprehensive summary: ./comprehensive_roberta_results_summary.json")

print(f"\n🎉 COMPLETE ROBERTA PIPELINE FINISHED!")
print(f"✅ Initial training + Fast hyperparameter tuning + Optimized final training!")
print(f"🚀 Now you can compare baseline vs optimized performance!")

# Display final summary table
print(f"\n" + "="*80)
print(f"📋 BASELINE vs OPTIMIZED PERFORMANCE SUMMARY")
print(f"="*80)
print(f"{'Model':<12} {'Initial F1':<12} {'Final F1':<12} {'Improvement':<12} {'Status':<10}")
print(f"-" * 70)
print(f"{'Sentiment':<12} {all_results['initial_sentiment']['f1_macro']:<12.4f} {final_sentiment_results['f1_macro']:<12.4f} {sentiment_f1_improvement:+12.4f} {'✅ Better' if sentiment_f1_improvement > 0 else '⚠️ Worse':<10}")
print(f"{'Emotion':<12} {all_results['initial_emotion']['f1_macro']:<12.4f} {final_emotion_results['f1_macro']:<12.4f} {emotion_f1_improvement:+12.4f} {'✅ Better' if emotion_f1_improvement > 0 else '⚠️ Worse':<10}")
print(f"{'Multitask':<12} {all_results['initial_multitask']['combined_f1_macro']:<12.4f} {final_multitask_results['combined_f1_macro']:<12.4f} {multitask_combined_f1_improvement:+12.4f} {'✅ Better' if multitask_combined_f1_improvement > 0 else '⚠️ Worse':<10}")
print(f"="*80)

print(f"\n🤖 RoBERTa pipeline complete with baseline comparisons!")