In [29]:
# Cell: Aggressive Memory Management
import torch
import gc
import os

def aggressive_memory_cleanup():
    """Aggressively clear GPU memory"""
    if torch.cuda.is_available():
        # Clear cache
        torch.cuda.empty_cache()
        
        # Force garbage collection
        gc.collect()
        
        # Reset memory stats
        torch.cuda.reset_peak_memory_stats()
        torch.cuda.reset_accumulated_memory_stats()
        
        print(f"🧹 Memory cleaned!")
        print(f"  Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
        print(f"  Cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")

# Set memory optimization environment variables
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

print("✅ Memory management setup complete!")

✅ Memory management setup complete!


In [30]:
# Cell 1: Imports and Configuration
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.optim import AdamW
from torch.optim.lr_scheduler import LambdaLR
import numpy as np
import pandas as pd
import json
import os
from typing import Dict, List, Tuple, Optional, Union
import random
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from transformers import (
    AutoTokenizer, AutoModel, AutoConfig,
    get_linear_schedule_with_warmup
)
import warnings
warnings.filterwarnings('ignore')

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")

Using device: cuda
GPU: NVIDIA GeForce RTX 4060
CUDA Version: 12.1


In [31]:
# Cell 3: Updated Dataset Class and External Data Loading (FIXED)
from datasets import load_dataset, Dataset as HFDataset
from collections import Counter

class MultiTaskDataset(Dataset):
    
    def __init__(
        self,
        texts: List[str],
        sentiment_labels: List[int],
        emotion_labels: List[int],
        tokenizer,
        max_length: int = 512,
        sentiment_label_encoder=None,
        emotion_label_encoder=None
    ):
        self.texts = texts
        self.sentiment_labels = sentiment_labels
        self.emotion_labels = emotion_labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.sentiment_label_encoder = sentiment_label_encoder
        self.emotion_label_encoder = emotion_label_encoder
        
        # Validate data
        assert len(texts) == len(sentiment_labels) == len(emotion_labels), \
            "All inputs must have the same length"
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        sentiment_label = self.sentiment_labels[idx]
        emotion_label = self.emotion_labels[idx]
        
        # Tokenize text
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'sentiment_labels': torch.tensor(sentiment_label, dtype=torch.long),
            'emotion_labels': torch.tensor(emotion_label, dtype=torch.long),
            'text': text
        }

def load_external_datasets() -> Tuple[Dict, Dict]:
    print("Loading external datasets for training...")
    
    # Load SST-2 for sentiment
    try:
        sst2_dataset = load_dataset("sst2")
        sentiment_data = {
            'train': sst2_dataset['train'],
            'validation': sst2_dataset['validation']
        }
        print(f"✅ SST-2 dataset loaded: {len(sentiment_data['train'])} train, {len(sentiment_data['validation'])} val")
    except Exception as e:
        print(f"⚠️ Could not load SST-2: {e}. Using dummy data.")
        sentiment_data = _create_dummy_sentiment_data()
    
    # Load GoEmotions for emotion
    try:
        emotions_dataset = load_dataset("go_emotions", "simplified")
        emotion_data = {
            'train': emotions_dataset['train'],
            'validation': emotions_dataset['validation']
        }
        print(f"✅ GoEmotions dataset loaded: {len(emotion_data['train'])} train, {len(emotion_data['validation'])} val")
    except Exception as e:
        print(f"⚠️ Could not load GoEmotions: {e}. Using dummy data.")
        emotion_data = _create_dummy_emotion_data()
    
    return sentiment_data, emotion_data

def _create_dummy_sentiment_data() -> Dict:
    """Create dummy sentiment data for testing"""
    dummy_texts = [
        "I love this product!", "This is terrible", "It's okay",
        "Amazing quality", "Worst experience ever", "Not bad"
    ] * 200
    dummy_labels = [1, 0, 1, 1, 0, 1] * 200
    
    dummy_data = {
        'sentence': dummy_texts,
        'label': dummy_labels
    }
    
    dataset = HFDataset.from_dict(dummy_data)
    return {'train': dataset, 'validation': dataset.select(range(200))}

def _create_dummy_emotion_data() -> Dict:
    """Create dummy emotion data for testing"""
    dummy_texts = [
        "I'm so happy!", "This is sad", "I'm angry", "That's scary",
        "What a surprise!", "This is neutral", "I love this!", "Great stuff"
    ] * 200
    dummy_labels = [0, 1, 2, 3, 4, 5, 0, 0] * 200  # Map to 6 classes
    
    dummy_data = {
        'text': dummy_texts,
        'labels': dummy_labels
    }
    
    dataset = HFDataset.from_dict(dummy_data)
    return {'train': dataset, 'validation': dataset.select(range(200))}

def prepare_external_data_for_multitask(
    sentiment_data: Dict,
    emotion_data: Dict,
    max_samples: int = 10000
) -> Tuple[Dict, LabelEncoder, LabelEncoder]:
    """
    Prepare external datasets for multitask training
    """
    print("🔄 Preparing external datasets for multitask training...")
    
    # Filter emotion data to first 6 classes only (to match your Reddit data)
    def filter_emotion_classes(example):
        # Handle both single-label and multi-label
        if isinstance(example['labels'], list):
            return example['labels'] and example['labels'][0] in range(6)
        else:
            return example['labels'] in range(6)
    
    emotion_data['train'] = emotion_data['train'].filter(filter_emotion_classes)
    emotion_data['validation'] = emotion_data['validation'].filter(filter_emotion_classes)
    
    # Extract texts and labels
    # Sentiment (SST-2)
    sentiment_texts = sentiment_data['train']['sentence'][:max_samples]
    sentiment_labels = sentiment_data['train']['label'][:max_samples]
    
    # Emotion (GoEmotions) 
    emotion_texts = emotion_data['train']['text'][:max_samples]
    emotion_labels_raw = emotion_data['train']['labels'][:max_samples]
    
    # Handle multi-label to single-label conversion for emotions
    emotion_labels = []
    for label in emotion_labels_raw:
        if isinstance(label, list):
            emotion_labels.append(label[0] if label else 0)
        else:
            emotion_labels.append(label)
    
    # Create label encoders based on your Reddit data classes
    sentiment_encoder = LabelEncoder()
    emotion_encoder = LabelEncoder()
    
    # Fit with the classes that match your Reddit data
    # SST-2: 0=negative, 1=positive. We need: Negative, Neutral, Positive
    # Map SST labels to 3-class: 0->0 (Negative), 1->2 (Positive), add 1 (Neutral) artificially
    sentiment_encoder.classes_ = np.array(['Negative', 'Neutral', 'Positive'])
    
    # GoEmotions: Map to your 6 classes
    emotion_encoder.classes_ = np.array(['Anger', 'Fear', 'Joy', 'No Emotion', 'Sadness', 'Surprise'])
    
    # Convert SST labels: 0->0 (Negative), 1->2 (Positive)
    # We'll add some neutral examples by randomly converting some to class 1
    converted_sentiment_labels = []
    for label in sentiment_labels:
        if label == 0:  # Negative
            converted_sentiment_labels.append(0)
        elif label == 1:  # Positive
            # Randomly assign some as neutral (class 1) to have all 3 classes
            if np.random.random() < 0.1:  # 10% chance
                converted_sentiment_labels.append(1)  # Neutral
            else:
                converted_sentiment_labels.append(2)  # Positive
    
    # Ensure we have all 3 sentiment classes
    if 1 not in converted_sentiment_labels:
        # Force some examples to be neutral
        neutral_indices = np.random.choice(len(converted_sentiment_labels), size=50, replace=False)
        for idx in neutral_indices:
            converted_sentiment_labels[idx] = 1
    
    # Balance the datasets - use minimum length
    min_length = min(len(sentiment_texts), len(emotion_texts))
    
    final_texts = sentiment_texts[:min_length]
    final_sentiment_labels = converted_sentiment_labels[:min_length]
    final_emotion_labels = emotion_labels[:min_length]
    
    # Create train/val splits
    split_idx = int(0.8 * min_length)
    
    data_splits = {
        'train': {
            'texts': final_texts[:split_idx],
            'sentiment_labels': final_sentiment_labels[:split_idx],
            'emotion_labels': final_emotion_labels[:split_idx]
        },
        'val': {
            'texts': final_texts[split_idx:],
            'sentiment_labels': final_sentiment_labels[split_idx:],
            'emotion_labels': final_emotion_labels[split_idx:]
        }
    }
    
    print(f"✅ External data prepared:")
    print(f"  Train samples: {len(data_splits['train']['texts'])}")
    print(f"  Validation samples: {len(data_splits['val']['texts'])}")
    print(f"  Sentiment classes: {list(sentiment_encoder.classes_)}")
    print(f"  Emotion classes: {list(emotion_encoder.classes_)}")
    
    # Print class distribution
    train_sentiment_counts = Counter(data_splits['train']['sentiment_labels'])
    train_emotion_counts = Counter(data_splits['train']['emotion_labels'])
    
    print(f"\n📈 Training set class distribution:")
    for i, class_name in enumerate(sentiment_encoder.classes_):
        count = train_sentiment_counts.get(i, 0)
        print(f"  Sentiment '{class_name}': {count} samples")
    
    for i, class_name in enumerate(emotion_encoder.classes_):
        count = train_emotion_counts.get(i, 0)
        print(f"  Emotion '{class_name}': {count} samples")
    
    return data_splits, sentiment_encoder, emotion_encoder

def prepare_reddit_data_for_evaluation(
    df: pd.DataFrame,
    sentiment_encoder: LabelEncoder,
    emotion_encoder: LabelEncoder,
    sentiment_column: str = 'sentiment',
    emotion_column: str = 'emotion',
    text_column: str = 'text_content'
) -> Dict:
    """
    Prepare Reddit data for evaluation only (not training)
    """
    print("🔄 Preparing Reddit data for evaluation...")
    
    # Extract data
    texts = df[text_column].tolist()
    sentiment_labels_text = df[sentiment_column].tolist()
    emotion_labels_text = df[emotion_column].tolist()
    
    # Transform labels using pre-fitted encoders
    try:
        sentiment_labels = sentiment_encoder.transform(sentiment_labels_text)
    except ValueError as e:
        print(f"⚠️ Sentiment label mismatch: {e}")
        # Handle unknown labels by mapping them to existing classes
        sentiment_labels = []
        for label in sentiment_labels_text:
            if label in sentiment_encoder.classes_:
                sentiment_labels.append(sentiment_encoder.transform([label])[0])
            else:
                print(f"⚠️ Unknown sentiment label '{label}', mapping to 'Neutral'")
                sentiment_labels.append(sentiment_encoder.transform(['Neutral'])[0])
        sentiment_labels = np.array(sentiment_labels)
    
    try:
        emotion_labels = emotion_encoder.transform(emotion_labels_text)
    except ValueError as e:
        print(f"⚠️ Emotion label mismatch: {e}")
        # Handle unknown labels
        emotion_labels = []
        for label in emotion_labels_text:
            if label in emotion_encoder.classes_:
                emotion_labels.append(emotion_encoder.transform([label])[0])
            else:
                print(f"⚠️ Unknown emotion label '{label}', mapping to 'No Emotion'")
                emotion_labels.append(emotion_encoder.transform(['No Emotion'])[0])
        emotion_labels = np.array(emotion_labels)
    
    evaluation_data = {
        'texts': texts,
        'sentiment_labels': sentiment_labels.tolist(),
        'emotion_labels': emotion_labels.tolist()
    }
    
    print(f"✅ Reddit evaluation data prepared: {len(texts)} samples")
    
    return evaluation_data

def create_stratified_sampler(sentiment_labels: List[int], emotion_labels: List[int]) -> WeightedRandomSampler:
    """
    Create a weighted random sampler for stratified sampling
    considering both sentiment and emotion class distributions
    """
    # Combine labels to create compound classes for stratification
    compound_labels = [f"{s}_{e}" for s, e in zip(sentiment_labels, emotion_labels)]
    
    # Calculate class weights
    unique_labels = list(set(compound_labels))
    
    # FIX: Convert to numpy array as required by compute_class_weight
    unique_labels_array = np.array(unique_labels)
    
    class_weights = compute_class_weight(
        'balanced',
        classes=unique_labels_array,  # Now it's a numpy array
        y=compound_labels
    )
    
    # Create weight dictionary
    weight_dict = dict(zip(unique_labels, class_weights))
    
    # Assign weights to each sample
    sample_weights = [weight_dict[label] for label in compound_labels]
    
    return WeightedRandomSampler(
        weights=sample_weights,
        num_samples=len(sample_weights),
        replacement=True
    )

print("✅ Updated dataset preparation functions defined!")

✅ Updated dataset preparation functions defined!


In [32]:
# Cell: Ultra Memory-Optimized Training
def run_ultra_lightweight_training(
    reddit_data_path: str = "annotated_reddit_posts.csv",
    model_name: str = "microsoft/deberta-base",
    output_dir: str = "./multitask_model_ultra_light",
    max_external_samples: int = 1000  # Very small dataset
):
    """
    Ultra-lightweight training for 8GB GPU
    """
    print("🚀 Starting Ultra-Lightweight Training")
    print("=" * 50)
    
    # Aggressive memory cleanup first
    aggressive_memory_cleanup()
    
    # Load external datasets
    print("\n1️⃣ Loading minimal external datasets...")
    sentiment_data, emotion_data = load_external_datasets()
    
    # Prepare very small external data
    external_data_splits, sentiment_encoder, emotion_encoder = prepare_external_data_for_multitask(
        sentiment_data, emotion_data, max_samples=max_external_samples
    )
    
    # Load Reddit data
    print("\n2️⃣ Loading Reddit data...")
    reddit_df = pd.read_csv(reddit_data_path)
    reddit_evaluation_data = prepare_reddit_data_for_evaluation(
        reddit_df, sentiment_encoder, emotion_encoder
    )
    
    # Ultra-lightweight config
    config = TrainingConfig(
        model_name=model_name,
        output_dir=output_dir,
        num_epochs=2,       # Minimal epochs
        batch_size=1,       # Smallest possible batch
        learning_rate=2e-5,
        warmup_ratio=0.05,  # Minimal warmup
        weight_decay=0.01,
        max_grad_norm=1.0,
        alpha=0.5,
        adaptive_alpha=False,  # Disable to save memory
        hidden_dropout_prob=0.1,
        attention_dropout_prob=0.1,
        classifier_dropout=0.1,
        max_length=128,     # Very short sequences
        save_total_limit=1  # Keep only 1 checkpoint
    )
    
    print(f"\n3️⃣ Ultra-lightweight config:")
    print(f"  Batch size: {config.batch_size}")
    print(f"  Max length: {config.max_length}")
    print(f"  Training samples: {len(external_data_splits['train']['texts'])}")
    print(f"  Epochs: {config.num_epochs}")
    
    # Clear memory before model
    aggressive_memory_cleanup()
    
    # Initialize trainer
    print(f"\n4️⃣ Initializing trainer...")
    trainer = MultiTaskTrainer(
        config=config,
        sentiment_num_classes=len(sentiment_encoder.classes_),
        emotion_num_classes=len(emotion_encoder.classes_)
    )
    
    # Setup with gradient checkpointing
    print(f"\n5️⃣ Setting up with memory optimizations...")
    trainer.setup(external_data_splits, sentiment_encoder, emotion_encoder)
    
    # Enable gradient checkpointing to save memory
    if hasattr(trainer.model.shared_encoder, 'gradient_checkpointing_enable'):
        trainer.model.shared_encoder.gradient_checkpointing_enable()
        print("✅ Gradient checkpointing enabled")
    
    # Train with memory monitoring
    print(f"\n6️⃣ Training with memory monitoring...")
    try:
        history = trainer.train()
        print("✅ Training completed!")
    except RuntimeError as e:
        if "out of memory" in str(e):
            print(f"❌ Still out of memory: {e}")
            print("💡 Try restarting kernel and using even smaller batch_size=1")
            return None, None
        else:
            raise e
    
    # Clear memory before evaluation
    aggressive_memory_cleanup()
    
    # Evaluate
    print(f"\n7️⃣ Evaluating...")
    evaluator = MultiTaskEvaluator(
        model=trainer.model,
        tokenizer=trainer.tokenizer,
        sentiment_encoder=sentiment_encoder,
        emotion_encoder=emotion_encoder,
        device=device
    )
    
    reddit_results = evaluator.evaluate_dataset(
        texts=reddit_evaluation_data['texts'],
        sentiment_labels=reddit_evaluation_data['sentiment_labels'],
        emotion_labels=reddit_evaluation_data['emotion_labels'],
        batch_size=1  # Ultra small batch for evaluation
    )
    
    # Save model
    save_model_and_encoders(
        model=trainer.model,
        tokenizer=trainer.tokenizer,
        sentiment_encoder=sentiment_encoder,
        emotion_encoder=emotion_encoder,
        output_dir=os.path.join(output_dir, 'final_model')
    )
    
    # Print results
    print(f"\n📈 Results:")
    print(f"Sentiment Accuracy: {reddit_results['sentiment']['accuracy']:.4f}")
    print(f"Emotion Accuracy: {reddit_results['emotion']['accuracy']:.4f}")
    
    # Final cleanup
    aggressive_memory_cleanup()
    
    return trainer.model, reddit_results

print("✅ Ultra-lightweight training function ready!")

✅ Ultra-lightweight training function ready!


In [33]:
# Cell 5: Training Utilities (FIXED)
class TrainingConfig:
    """Configuration class for training parameters"""
    
    def __init__(  # ← FIXED: was _init_ now __init__
        self,
        model_name: str = "microsoft/deberta-base",
        max_length: int = 256,  # Reduced from 512
        batch_size: int = 4,    # Much smaller batch size
        learning_rate: float = 2e-5,
        num_epochs: int = 3,    # Reduced epochs
        warmup_ratio: float = 0.1,
        weight_decay: float = 0.01,
        max_grad_norm: float = 1.0,
        alpha: float = 0.5,
        hidden_dropout_prob: float = 0.1,
        attention_dropout_prob: float = 0.1,
        classifier_dropout: float = 0.1,
        adaptive_alpha: bool = True,
        save_strategy: str = "epoch",
        evaluation_strategy: str = "epoch",
        output_dir: str = "./multitask_model",
        logging_steps: int = 20,  # Reduced logging frequency
        save_total_limit: int = 1  # Keep only 1 checkpoint
    ):
        self.model_name = model_name
        self.max_length = max_length
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.warmup_ratio = warmup_ratio
        self.weight_decay = weight_decay
        self.max_grad_norm = max_grad_norm
        self.alpha = alpha
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_dropout_prob = attention_dropout_prob
        self.classifier_dropout = classifier_dropout
        self.adaptive_alpha = adaptive_alpha
        self.save_strategy = save_strategy
        self.evaluation_strategy = evaluation_strategy
        self.output_dir = output_dir
        self.logging_steps = logging_steps
        self.save_total_limit = save_total_limit

def create_optimizer_and_scheduler(
    model: nn.Module,
    config: TrainingConfig,
    num_training_steps: int
) -> Tuple[AdamW, LambdaLR]:
    """
    Create optimizer and learning rate scheduler
    """
    # Separate parameters for different learning rates
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() 
                      if not any(nd in n for nd in no_decay)],
            "weight_decay": config.weight_decay,
        },
        {
            "params": [p for n, p in model.named_parameters() 
                      if any(nd in n for nd in no_decay)],
            "weight_decay": 0.0,
        },
    ]
    
    # AdamW optimizer
    optimizer = AdamW(
        optimizer_grouped_parameters,
        lr=config.learning_rate,
        eps=1e-8
    )
    
    # Linear warmup scheduler
    num_warmup_steps = int(num_training_steps * config.warmup_ratio)
    
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    return optimizer, scheduler

class EarlyStopping:
    """Early stopping utility"""
    
    def __init__(self, patience: int = 3, min_delta: float = 0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        
    def __call__(self, score: float) -> bool:
        """Returns True if training should be stopped"""
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        else:
            self.best_score = score
            self.counter = 0
        return False

class ModelCheckpointer:
    """Model checkpointing utility"""
    
    def __init__(self, output_dir: str, save_total_limit: int = 3):
        self.output_dir = output_dir
        self.save_total_limit = save_total_limit
        self.saved_checkpoints = []
        os.makedirs(output_dir, exist_ok=True)
    
    def save_checkpoint(
        self,
        model: nn.Module,
        tokenizer,
        optimizer: AdamW,
        scheduler: LambdaLR,
        epoch: int,
        metrics: Dict,
        is_best: bool = False
    ):
        """Save model checkpoint"""
        checkpoint_dir = os.path.join(self.output_dir, f"checkpoint-epoch-{epoch}")
        os.makedirs(checkpoint_dir, exist_ok=True)
        
        # Save model and tokenizer
        model.save_pretrained(checkpoint_dir)
        tokenizer.save_pretrained(checkpoint_dir)
        
        # Save training state
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'metrics': metrics
        }, os.path.join(checkpoint_dir, 'training_state.pt'))
        
        # Save best model separately
        if is_best:
            best_dir = os.path.join(self.output_dir, 'best_model')
            os.makedirs(best_dir, exist_ok=True)
            model.save_pretrained(best_dir)
            tokenizer.save_pretrained(best_dir)
        
        # Manage checkpoint limit
        self.saved_checkpoints.append(checkpoint_dir)
        if len(self.saved_checkpoints) > self.save_total_limit:
            old_checkpoint = self.saved_checkpoints.pop(0)
            if os.path.exists(old_checkpoint) and 'best_model' not in old_checkpoint:
                import shutil
                shutil.rmtree(old_checkpoint)

print("✅ Training utilities defined!")

✅ Training utilities defined!


In [34]:
# Cell 6: Training Loop
class MultiTaskTrainer:
    """
    Main trainer class for multitask learning
    """
    
    def __init__(
        self,
        config: TrainingConfig,
        sentiment_num_classes: int,
        emotion_num_classes: int
    ):
        self.config = config
        self.sentiment_num_classes = sentiment_num_classes
        self.emotion_num_classes = emotion_num_classes
        self.device = device
        
        # Initialize components
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        self.model = None
        self.loss_fn = None
        self.optimizer = None
        self.scheduler = None
        self.alpha_scheduler = None
        self.early_stopping = None
        self.checkpointer = None
        
        # Training history
        self.training_history = {
            'epoch': [],
            'train_loss': [],
            'train_sentiment_loss': [],
            'train_emotion_loss': [],
            'val_loss': [],
            'val_sentiment_loss': [],
            'val_emotion_loss': [],
            'val_sentiment_accuracy': [],
            'val_emotion_accuracy': [],
            'alpha': [],
            'learning_rate': []
        }
    
    def setup(
        self,
        data_splits: Dict,
        sentiment_encoder: LabelEncoder,
        emotion_encoder: LabelEncoder
    ):
        """Setup model, loss function, and training components"""
        
        # Initialize model
        self.model = MultiTaskTransformer(
            model_name=self.config.model_name,
            sentiment_num_classes=self.sentiment_num_classes,
            emotion_num_classes=self.emotion_num_classes,
            hidden_dropout_prob=self.config.hidden_dropout_prob,
            attention_dropout_prob=self.config.attention_dropout_prob,
            classifier_dropout=self.config.classifier_dropout
        ).to(self.device)
        
        # Compute class weights
        sentiment_weights = compute_class_weights_from_labels(
            data_splits['train']['sentiment_labels'], self.device
        )
        emotion_weights = compute_class_weights_from_labels(
            data_splits['train']['emotion_labels'], self.device
        )
        
        # Initialize loss function
        self.loss_fn = MultiTaskLoss(
            alpha=self.config.alpha,
            sentiment_class_weights=sentiment_weights,
            emotion_class_weights=emotion_weights,
            device=self.device
        )
        
        # Create datasets
        self.train_dataset = MultiTaskDataset(
            texts=data_splits['train']['texts'],
            sentiment_labels=data_splits['train']['sentiment_labels'],
            emotion_labels=data_splits['train']['emotion_labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length,
            sentiment_label_encoder=sentiment_encoder,
            emotion_label_encoder=emotion_encoder
        )
        
        self.val_dataset = MultiTaskDataset(
            texts=data_splits['val']['texts'],
            sentiment_labels=data_splits['val']['sentiment_labels'],
            emotion_labels=data_splits['val']['emotion_labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length,
            sentiment_label_encoder=sentiment_encoder,
            emotion_label_encoder=emotion_encoder
        )
        
        # Create data loaders
        train_sampler = create_stratified_sampler(
            data_splits['train']['sentiment_labels'],
            data_splits['train']['emotion_labels']
        ) if len(data_splits['train']['texts']) > 50 else None
        
        self.train_loader = DataLoader(
            self.train_dataset,
            batch_size=self.config.batch_size,
            sampler=train_sampler,
            shuffle=(train_sampler is None),
            num_workers=0,
            pin_memory=True
        )
        
        self.val_loader = DataLoader(
            self.val_dataset,
            batch_size=self.config.batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=True
        )
        
        # Setup optimizer and scheduler
        num_training_steps = len(self.train_loader) * self.config.num_epochs
        self.optimizer, self.scheduler = create_optimizer_and_scheduler(
            self.model, self.config, num_training_steps
        )
        
        # Initialize utilities
        if self.config.adaptive_alpha:
            self.alpha_scheduler = AdaptiveAlphaScheduler(
                initial_alpha=self.config.alpha
            )
        
        self.early_stopping = EarlyStopping(patience=3, min_delta=0.001)
        self.checkpointer = ModelCheckpointer(
            self.config.output_dir,
            self.config.save_total_limit
        )
        
        print(f"✅ Setup complete!")
        print(f"  Model: {self.config.model_name}")
        print(f"  Training samples: {len(self.train_dataset)}")
        print(f"  Validation samples: {len(self.val_dataset)}")
        print(f"  Training steps per epoch: {len(self.train_loader)}")
        print(f"  Total training steps: {num_training_steps}")
    
    def train_epoch(self) -> Dict[str, float]:
        """Train for one epoch"""
        self.model.train()
        
        total_loss = 0.0
        total_sentiment_loss = 0.0
        total_emotion_loss = 0.0
        num_batches = 0
        
        for batch_idx, batch in enumerate(self.train_loader):
            # Move batch to device
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            sentiment_labels = batch['sentiment_labels'].to(self.device)
            emotion_labels = batch['emotion_labels'].to(self.device)
            
            # Forward pass
            outputs = self.model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            
            # Calculate loss
            loss_dict = self.loss_fn(
                sentiment_logits=outputs['sentiment_logits'],
                emotion_logits=outputs['emotion_logits'],
                sentiment_labels=sentiment_labels,
                emotion_labels=emotion_labels
            )
            
            loss = loss_dict['total_loss']
            
            # Backward pass
            self.optimizer.zero_grad()
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(
                self.model.parameters(),
                self.config.max_grad_norm
            )
            
            self.optimizer.step()
            self.scheduler.step()
            
            # Accumulate losses
            total_loss += loss.item()
            total_sentiment_loss += loss_dict['sentiment_loss'].item()
            total_emotion_loss += loss_dict['emotion_loss'].item()
            num_batches += 1
            
            # Logging
            if (batch_idx + 1) % self.config.logging_steps == 0:
                avg_loss = total_loss / num_batches
                current_lr = self.scheduler.get_last_lr()[0]
                print(f"  Batch {batch_idx + 1}/{len(self.train_loader)} | "
                      f"Loss: {avg_loss:.4f} | "
                      f"LR: {current_lr:.2e} | "
                      f"Alpha: {self.loss_fn.alpha:.3f}")
        
        return {
            'train_loss': total_loss / num_batches,
            'train_sentiment_loss': total_sentiment_loss / num_batches,
            'train_emotion_loss': total_emotion_loss / num_batches
        }
    
    def evaluate(self) -> Dict[str, float]:
        """Evaluate on validation set"""
        self.model.eval()
        
        total_loss = 0.0
        total_sentiment_loss = 0.0
        total_emotion_loss = 0.0
        
        sentiment_predictions = []
        sentiment_true_labels = []
        emotion_predictions = []
        emotion_true_labels = []
        
        with torch.no_grad():
            for batch in self.val_loader:
                # Move batch to device
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                sentiment_labels = batch['sentiment_labels'].to(self.device)
                emotion_labels = batch['emotion_labels'].to(self.device)
                
                # Forward pass
                outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask
                )
                
                # Calculate loss
                loss_dict = self.loss_fn(
                    sentiment_logits=outputs['sentiment_logits'],
                    emotion_logits=outputs['emotion_logits'],
                    sentiment_labels=sentiment_labels,
                    emotion_labels=emotion_labels
                )
                
                # Accumulate losses
                total_loss += loss_dict['total_loss'].item()
                total_sentiment_loss += loss_dict['sentiment_loss'].item()
                total_emotion_loss += loss_dict['emotion_loss'].item()
                
                # Predictions
                sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
                emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
                
                sentiment_predictions.extend(sentiment_preds.cpu().numpy())
                sentiment_true_labels.extend(sentiment_labels.cpu().numpy())
                emotion_predictions.extend(emotion_preds.cpu().numpy())
                emotion_true_labels.extend(emotion_labels.cpu().numpy())
        
        # Calculate metrics
        num_batches = len(self.val_loader)
        sentiment_accuracy = accuracy_score(sentiment_true_labels, sentiment_predictions)
        emotion_accuracy = accuracy_score(emotion_true_labels, emotion_predictions)
        
        return {
            'val_loss': total_loss / num_batches,
            'val_sentiment_loss': total_sentiment_loss / num_batches,
            'val_emotion_loss': total_emotion_loss / num_batches,
            'val_sentiment_accuracy': sentiment_accuracy,
            'val_emotion_accuracy': emotion_accuracy,
            'sentiment_predictions': sentiment_predictions,
            'sentiment_true_labels': sentiment_true_labels,
            'emotion_predictions': emotion_predictions,
            'emotion_true_labels': emotion_true_labels
        }
    
    def train(self) -> Dict[str, List]:
        """Main training loop"""
        print(f"🚀 Starting training for {self.config.num_epochs} epochs...")
        
        best_combined_score = 0.0
        
        for epoch in range(self.config.num_epochs):
            print(f"\n📍 Epoch {epoch + 1}/{self.config.num_epochs}")
            print("-" * 50)
            
            # Train for one epoch
            train_metrics = self.train_epoch()
            
            # Evaluate
            val_metrics = self.evaluate()
            
            # Update alpha if adaptive
            if self.alpha_scheduler:
                new_alpha = self.alpha_scheduler.step(
                    val_metrics['val_sentiment_accuracy'],
                    val_metrics['val_emotion_accuracy']
                )
                self.loss_fn.update_alpha(new_alpha)
            
            # Calculate combined score for checkpointing
            combined_score = (
                val_metrics['val_sentiment_accuracy'] + 
                val_metrics['val_emotion_accuracy']
            ) / 2
            
            is_best = combined_score > best_combined_score
            if is_best:
                best_combined_score = combined_score
            
            # Log metrics
            current_lr = self.scheduler.get_last_lr()[0]
            
            print(f"📊 Epoch {epoch + 1} Results:")
            print(f"  Train Loss: {train_metrics['train_loss']:.4f}")
            print(f"  Val Loss: {val_metrics['val_loss']:.4f}")
            print(f"  Sentiment Accuracy: {val_metrics['val_sentiment_accuracy']:.4f}")
            print(f"  Emotion Accuracy: {val_metrics['val_emotion_accuracy']:.4f}")
            print(f"  Combined Score: {combined_score:.4f}")
            print(f"  Alpha: {self.loss_fn.alpha:.3f}")
            print(f"  Learning Rate: {current_lr:.2e}")
            
            # Save history
            self.training_history['epoch'].append(epoch + 1)
            self.training_history['train_loss'].append(train_metrics['train_loss'])
            self.training_history['train_sentiment_loss'].append(train_metrics['train_sentiment_loss'])
            self.training_history['train_emotion_loss'].append(train_metrics['train_emotion_loss'])
            self.training_history['val_loss'].append(val_metrics['val_loss'])
            self.training_history['val_sentiment_loss'].append(val_metrics['val_sentiment_loss'])
            self.training_history['val_emotion_loss'].append(val_metrics['val_emotion_loss'])
            self.training_history['val_sentiment_accuracy'].append(val_metrics['val_sentiment_accuracy'])
            self.training_history['val_emotion_accuracy'].append(val_metrics['val_emotion_accuracy'])
            self.training_history['alpha'].append(self.loss_fn.alpha)
            self.training_history['learning_rate'].append(current_lr)
            
            # Save checkpoint
            if self.config.save_strategy == "epoch":
                self.checkpointer.save_checkpoint(
                    model=self.model,
                    tokenizer=self.tokenizer,
                    optimizer=self.optimizer,
                    scheduler=self.scheduler,
                    epoch=epoch + 1,
                    metrics=val_metrics,
                    is_best=is_best
                )
            
            # Early stopping
            if self.early_stopping(combined_score):
                print(f"⏹️ Early stopping triggered at epoch {epoch + 1}")
                break
        
        print(f"\n🎉 Training completed!")
        print(f"Best combined score: {best_combined_score:.4f}")
        
        return self.training_history

print("✅ Training loop defined!")

✅ Training loop defined!


In [36]:
# Cell 2: Multitask Model Architecture (FIXED)
class MultiTaskTransformer(nn.Module):
    """
    Multitask Learning Framework for Sentiment and Emotion Classification
    
    Features:
    - Shared transformer encoder (BERTweet, DeBERTa)
    - Task-specific attention heads
    - Parallel classification heads
    - Dropout for regularization
    """
    
    def __init__(
        self,
        model_name: str = "microsoft/deberta-base",
        sentiment_num_classes: int = 3,
        emotion_num_classes: int = 6,
        hidden_dropout_prob: float = 0.1,
        attention_dropout_prob: float = 0.1,
        classifier_dropout: float = 0.1,
        freeze_encoder: bool = False
    ):
        super(MultiTaskTransformer, self).__init__()
        
        self.model_name = model_name
        self.sentiment_num_classes = sentiment_num_classes
        self.emotion_num_classes = emotion_num_classes
        
        # Load configuration and adjust dropout
        config = AutoConfig.from_pretrained(model_name)
        config.hidden_dropout_prob = hidden_dropout_prob
        config.attention_probs_dropout_prob = attention_dropout_prob
        
        # Shared transformer encoder
        self.shared_encoder = AutoModel.from_pretrained(
            model_name,
            config=config,
            ignore_mismatched_sizes=True
        )
        
        # Freeze encoder if specified
        if freeze_encoder:
            for param in self.shared_encoder.parameters():
                param.requires_grad = False
        
        hidden_size = self.shared_encoder.config.hidden_size
        
        # Task-specific attention layers
        self.sentiment_attention = nn.MultiheadAttention(
            embed_dim=hidden_size,
            num_heads=8,
            dropout=attention_dropout_prob,
            batch_first=True
        )
        
        self.emotion_attention = nn.MultiheadAttention(
            embed_dim=hidden_size,
            num_heads=8,
            dropout=attention_dropout_prob,
            batch_first=True
        )
        
        # Shared attention for common features
        self.shared_attention = nn.MultiheadAttention(
            embed_dim=hidden_size,
            num_heads=8,
            dropout=attention_dropout_prob,
            batch_first=True
        )
        
        # Layer normalization
        self.sentiment_norm = nn.LayerNorm(hidden_size)
        self.emotion_norm = nn.LayerNorm(hidden_size)
        self.shared_norm = nn.LayerNorm(hidden_size)
        
        # Dropout layers
        self.sentiment_dropout = nn.Dropout(classifier_dropout)
        self.emotion_dropout = nn.Dropout(classifier_dropout)
        self.shared_dropout = nn.Dropout(classifier_dropout)
        
        # Classification heads
        self.sentiment_classifier = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),  # *2 for shared + task-specific
            nn.ReLU(),
            nn.Dropout(classifier_dropout),
            nn.Linear(hidden_size, sentiment_num_classes)
        )
        
        self.emotion_classifier = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),  # *2 for shared + task-specific
            nn.ReLU(),
            nn.Dropout(classifier_dropout),
            nn.Linear(hidden_size, emotion_num_classes)
        )
        
        # Initialize weights
        self._init_weights()
    
    def _init_weights(self):
        """Initialize classification head weights"""
        for module in [self.sentiment_classifier, self.emotion_classifier]:
            for layer in module:
                if isinstance(layer, nn.Linear):
                    nn.init.xavier_uniform_(layer.weight)
                    nn.init.zeros_(layer.bias)
    
    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask: torch.Tensor,
        task: Optional[str] = None
    ) -> Dict[str, torch.Tensor]:
        """
        Forward pass
        
        Args:
            input_ids: Token IDs [batch_size, seq_len]
            attention_mask: Attention mask [batch_size, seq_len]
            task: Optional task specification ("sentiment", "emotion", or None for both)
        
        Returns:
            Dictionary containing logits for requested tasks
        """
        # Shared encoder
        encoder_outputs = self.shared_encoder(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=True
        )
        
        # Get sequence output [batch_size, seq_len, hidden_size]
        sequence_output = encoder_outputs.last_hidden_state
        
        # Apply shared attention to capture common linguistic features
        shared_attended, _ = self.shared_attention(
            sequence_output, sequence_output, sequence_output,
            key_padding_mask=~attention_mask.bool()
        )
        shared_attended = self.shared_norm(shared_attended + sequence_output)
        shared_attended = self.shared_dropout(shared_attended)
        
        # Pool shared features (use [CLS] token or mean pooling)
        shared_pooled = shared_attended[:, 0, :]  # [CLS] token
        
        outputs = {}
        
        # Sentiment branch
        if task is None or task == "sentiment":
            # Task-specific attention for sentiment
            sentiment_attended, sentiment_weights = self.sentiment_attention(
                sequence_output, sequence_output, sequence_output,
                key_padding_mask=~attention_mask.bool()
            )
            sentiment_attended = self.sentiment_norm(sentiment_attended + sequence_output)
            sentiment_attended = self.sentiment_dropout(sentiment_attended)
            
            # Pool sentiment features
            sentiment_pooled = sentiment_attended[:, 0, :]  # [CLS] token
            
            # Combine shared and task-specific features
            sentiment_features = torch.cat([shared_pooled, sentiment_pooled], dim=-1)
            
            # Sentiment classification
            sentiment_logits = self.sentiment_classifier(sentiment_features)
            outputs["sentiment_logits"] = sentiment_logits
            outputs["sentiment_attention_weights"] = sentiment_weights
        
        # Emotion branch
        if task is None or task == "emotion":
            # Task-specific attention for emotion
            emotion_attended, emotion_weights = self.emotion_attention(
                sequence_output, sequence_output, sequence_output,
                key_padding_mask=~attention_mask.bool()
            )
            emotion_attended = self.emotion_norm(emotion_attended + sequence_output)
            emotion_attended = self.emotion_dropout(emotion_attended)
            
            # Pool emotion features
            emotion_pooled = emotion_attended[:, 0, :]  # [CLS] token
            
            # Combine shared and task-specific features
            emotion_features = torch.cat([shared_pooled, emotion_pooled], dim=-1)
            
            # Emotion classification
            emotion_logits = self.emotion_classifier(emotion_features)
            outputs["emotion_logits"] = emotion_logits
            outputs["emotion_attention_weights"] = emotion_weights
        
        return outputs
    
    # ✅ ADD THESE MISSING HUGGING FACE COMPATIBLE METHODS
    def save_pretrained(self, save_directory: str):
        """Save the model in Hugging Face compatible format"""
        import os
        import json
        
        os.makedirs(save_directory, exist_ok=True)
        
        # Save model state dict
        model_path = os.path.join(save_directory, "pytorch_model.bin")
        torch.save(self.state_dict(), model_path)
        
        # Save config
        config = {
            "model_name": self.model_name,
            "sentiment_num_classes": self.sentiment_num_classes,
            "emotion_num_classes": self.emotion_num_classes,
            "model_type": "MultiTaskTransformer"
        }
        config_path = os.path.join(save_directory, "config.json")
        with open(config_path, 'w') as f:
            json.dump(config, f, indent=2)
        
        print(f"Model saved to {save_directory}")
    
    @classmethod
    def from_pretrained(cls, model_path: str, **kwargs):
        """Load the model in Hugging Face compatible format"""
        import os
        import json
        
        # Load config
        config_path = os.path.join(model_path, "config.json")
        with open(config_path, 'r') as f:
            config = json.load(f)
        
        # Create model instance
        model = cls(
            model_name=config["model_name"],
            sentiment_num_classes=config["sentiment_num_classes"],
            emotion_num_classes=config["emotion_num_classes"],
            **kwargs
        )
        
        # Load state dict
        model_file = os.path.join(model_path, "pytorch_model.bin")
        state_dict = torch.load(model_file, map_location='cpu')
        model.load_state_dict(state_dict)
        
        print(f"Model loaded from {model_path}")
        return model

# Model configuration options
MODEL_CONFIGS = {
    "bertweet": {
        "name": "vinai/bertweet-base",
        "description": "BERTweet optimized for social media text"
    },
    "deberta": {
        "name": "microsoft/deberta-base",
        "description": "DeBERTa with enhanced attention mechanism"
    }
}

print("✅ Multitask model architecture defined!")
print("Available models:", list(MODEL_CONFIGS.keys()))

✅ Multitask model architecture defined!
Available models: ['bertweet', 'deberta']


In [37]:
# Cell 4: Loss Function with Weighting
class MultiTaskLoss(nn.Module):
    """
    Weighted loss function for multitask learning
    """
    
    def __init__(
        self,
        alpha: float = 0.5,
        sentiment_class_weights: Optional[torch.Tensor] = None,
        emotion_class_weights: Optional[torch.Tensor] = None,
        device: torch.device = None
    ):
        """
        Args:
            alpha: Weight parameter between sentiment and emotion loss (0.3-0.7)
            sentiment_class_weights: Class weights for sentiment imbalance
            emotion_class_weights: Class weights for emotion imbalance
        """
        super(MultiTaskLoss, self).__init__()
        
        self.alpha = alpha
        self.device = device or torch.device('cpu')
        
        # Initialize loss functions with class weights
        self.sentiment_loss_fn = nn.CrossEntropyLoss(
            weight=sentiment_class_weights.to(self.device) if sentiment_class_weights is not None else None
        )
        self.emotion_loss_fn = nn.CrossEntropyLoss(
            weight=emotion_class_weights.to(self.device) if emotion_class_weights is not None else None
        )
    
    def forward(
        self,
        sentiment_logits: torch.Tensor,
        emotion_logits: torch.Tensor,
        sentiment_labels: torch.Tensor,
        emotion_labels: torch.Tensor
    ) -> Dict[str, torch.Tensor]:
        """
        Calculate weighted multitask loss
        
        Returns:
            Dictionary containing individual and combined losses
        """
        # Calculate individual losses
        sentiment_loss = self.sentiment_loss_fn(sentiment_logits, sentiment_labels)
        emotion_loss = self.emotion_loss_fn(emotion_logits, emotion_labels)
        
        # Weighted combination
        total_loss = self.alpha * sentiment_loss + (1 - self.alpha) * emotion_loss
        
        return {
            'total_loss': total_loss,
            'sentiment_loss': sentiment_loss,
            'emotion_loss': emotion_loss,
            'alpha': self.alpha
        }
    
    def update_alpha(self, new_alpha: float):
        """Update alpha parameter during training"""
        self.alpha = max(0.3, min(0.7, new_alpha))  # Constrain to [0.3, 0.7]

def compute_class_weights_from_labels(labels: List[int], device: torch.device) -> torch.Tensor:
    """Compute class weights for imbalanced datasets"""
    unique_labels = np.unique(labels)
    class_weights = compute_class_weight(
        'balanced',
        classes=unique_labels,
        y=labels
    )
    return torch.FloatTensor(class_weights).to(device)

class AdaptiveAlphaScheduler:
    """
    Adaptive alpha scheduler that adjusts the loss weighting based on task performance
    """
    
    def __init__(self, initial_alpha: float = 0.5, adaptation_rate: float = 0.1):
        self.alpha = initial_alpha
        self.adaptation_rate = adaptation_rate
        self.sentiment_history = []
        self.emotion_history = []
    
    def step(self, sentiment_accuracy: float, emotion_accuracy: float) -> float:
        """
        Adjust alpha based on relative task performance
        Better performing task gets lower weight to balance learning
        """
        self.sentiment_history.append(sentiment_accuracy)
        self.emotion_history.append(emotion_accuracy)
        
        if len(self.sentiment_history) >= 2:
            # Calculate performance difference
            sentiment_trend = sentiment_accuracy - np.mean(self.sentiment_history[-3:])
            emotion_trend = emotion_accuracy - np.mean(self.emotion_history[-3:])
            
            # Adjust alpha: if sentiment is improving faster, decrease its weight
            if sentiment_trend > emotion_trend:
                self.alpha -= self.adaptation_rate
            elif emotion_trend > sentiment_trend:
                self.alpha += self.adaptation_rate
            
            # Constrain alpha to [0.3, 0.7]
            self.alpha = max(0.3, min(0.7, self.alpha))
        
        return self.alpha

print("✅ Loss functions and schedulers defined!")

✅ Loss functions and schedulers defined!


In [38]:
# Cell 7: Evaluation Functions
class MultiTaskEvaluator:
    """
    Simplified evaluation for multitask models
    """
    
    def __init__(
        self,
        model: MultiTaskTransformer,
        tokenizer,
        sentiment_encoder: LabelEncoder,
        emotion_encoder: LabelEncoder,
        device: torch.device
    ):
        self.model = model
        self.tokenizer = tokenizer
        self.sentiment_encoder = sentiment_encoder
        self.emotion_encoder = emotion_encoder
        self.device = device
        
        self.model.eval()
    
    def evaluate_dataset(
        self,
        texts: List[str],
        sentiment_labels: List[int],
        emotion_labels: List[int],
        batch_size: int = 32
    ) -> Dict[str, any]:
        """
        Evaluate model on a dataset
        """
        dataset = MultiTaskDataset(
            texts=texts,
            sentiment_labels=sentiment_labels,
            emotion_labels=emotion_labels,
            tokenizer=self.tokenizer,
            max_length=512,
            sentiment_label_encoder=self.sentiment_encoder,
            emotion_label_encoder=self.emotion_encoder
        )
        
        dataloader = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=0
        )
        
        sentiment_predictions = []
        emotion_predictions = []
        sentiment_true_labels = []
        emotion_true_labels = []
        
        with torch.no_grad():
            for batch in dataloader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                
                outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask
                )
                
                # Get predictions
                sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
                emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
                
                # Store results
                sentiment_predictions.extend(sentiment_preds.cpu().numpy())
                emotion_predictions.extend(emotion_preds.cpu().numpy())
                sentiment_true_labels.extend(batch['sentiment_labels'].numpy())
                emotion_true_labels.extend(batch['emotion_labels'].numpy())
        
        # Calculate metrics
        results = self._calculate_metrics(
            sentiment_predictions=sentiment_predictions,
            emotion_predictions=emotion_predictions,
            sentiment_true_labels=sentiment_true_labels,
            emotion_true_labels=emotion_true_labels
        )
        
        return results
    
    def _calculate_metrics(
        self,
        sentiment_predictions: List[int],
        emotion_predictions: List[int],
        sentiment_true_labels: List[int],
        emotion_true_labels: List[int]
    ) -> Dict[str, any]:
        """Calculate simplified metrics: only accuracy and macro F1"""
        
        # Sentiment metrics
        sentiment_accuracy = accuracy_score(sentiment_true_labels, sentiment_predictions)
        sentiment_f1_macro = f1_score(sentiment_true_labels, sentiment_predictions, average='macro', zero_division=0)
        
        # Emotion metrics
        emotion_accuracy = accuracy_score(emotion_true_labels, emotion_predictions)
        emotion_f1_macro = f1_score(emotion_true_labels, emotion_predictions, average='macro', zero_division=0)
        
        return {
            'sentiment': {
                'accuracy': sentiment_accuracy,
                'f1_macro': sentiment_f1_macro,
                'predictions': sentiment_predictions,
                'true_labels': sentiment_true_labels
            },
            'emotion': {
                'accuracy': emotion_accuracy,
                'f1_macro': emotion_f1_macro,
                'predictions': emotion_predictions,
                'true_labels': emotion_true_labels
            },
            'combined': {
                'average_accuracy': (sentiment_accuracy + emotion_accuracy) / 2,
                'average_f1': (sentiment_f1_macro + emotion_f1_macro) / 2
            }
        }

print("✅ Simplified evaluation functions defined!")

✅ Simplified evaluation functions defined!


In [39]:
# Cell 8: Inference Functions
class MultiTaskPredictor:
    """
    Inference class for multitask model
    """
    
    def __init__(
        self,
        model_path: str,
        sentiment_encoder_path: str,
        emotion_encoder_path: str,
        device: torch.device = None
    ):
        self.device = device or torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # Load model
        self.model = MultiTaskTransformer.from_pretrained(model_path)
        self.model.to(self.device)
        self.model.eval()
        
        # Load label encoders
        import joblib
        self.sentiment_encoder = joblib.load(sentiment_encoder_path)
        self.emotion_encoder = joblib.load(emotion_encoder_path)
        
        print(f"✅ Model loaded successfully!")
        print(f"Device: {self.device}")
        print(f"Sentiment classes: {list(self.sentiment_encoder.classes_)}")
        print(f"Emotion classes: {list(self.emotion_encoder.classes_)}")
    
    def predict_single(
        self,
        text: str,
        return_probabilities: bool = True,
        return_attention: bool = False
    ) -> Dict[str, any]:
        """
        Predict sentiment and emotion for a single text
        """
        # Tokenize
        inputs = self.tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            padding="max_length",
            max_length=512
        )
        
        # Move to device
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = self.model(**inputs)
            
            # Get predictions
            sentiment_logits = outputs['sentiment_logits']
            emotion_logits = outputs['emotion_logits']
            
            sentiment_probs = F.softmax(sentiment_logits, dim=-1)
            emotion_probs = F.softmax(emotion_logits, dim=-1)
            
            sentiment_pred_id = torch.argmax(sentiment_logits, dim=-1).item()
            emotion_pred_id = torch.argmax(emotion_logits, dim=-1).item()
            
            # Decode predictions
            sentiment_label = self.sentiment_encoder.inverse_transform([sentiment_pred_id])[0]
            emotion_label = self.emotion_encoder.inverse_transform([emotion_pred_id])[0]
            
            result = {
                'text': text,
                'sentiment': {
                    'label': sentiment_label,
                    'confidence': sentiment_probs[0][sentiment_pred_id].item(),
                    'class_id': sentiment_pred_id
                },
                'emotion': {
                    'label': emotion_label,
                    'confidence': emotion_probs[0][emotion_pred_id].item(),
                    'class_id': emotion_pred_id
                }
            }
            
            if return_probabilities:
                result['sentiment']['probabilities'] = {
                    class_name: prob.item() for class_name, prob in 
                    zip(self.sentiment_encoder.classes_, sentiment_probs[0])
                }
                result['emotion']['probabilities'] = {
                    class_name: prob.item() for class_name, prob in 
                    zip(self.emotion_encoder.classes_, emotion_probs[0])
                }
            
            if return_attention:
                result['sentiment']['attention_weights'] = outputs['sentiment_attention_weights']
                result['emotion']['attention_weights'] = outputs['emotion_attention_weights']
        
        return result
    
    def predict_batch(
        self,
        texts: List[str],
        batch_size: int = 32,
        return_probabilities: bool = False
    ) -> List[Dict[str, any]]:
        """
        Predict sentiment and emotion for a batch of texts
        """
        results = []
        
        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size]
            
            # Tokenize batch
            inputs = self.tokenizer(
                batch_texts,
                return_tensors="pt",
                truncation=True,
                padding="max_length",
                max_length=512
            )
            
            # Move to device
            inputs = {k: v.to(self.device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = self.model(**inputs)
                
                sentiment_logits = outputs['sentiment_logits']
                emotion_logits = outputs['emotion_logits']
                
                sentiment_probs = F.softmax(sentiment_logits, dim=-1)
                emotion_probs = F.softmax(emotion_logits, dim=-1)
                
                sentiment_preds = torch.argmax(sentiment_logits, dim=-1)
                emotion_preds = torch.argmax(emotion_logits, dim=-1)
                
                # Process each item in batch
                for j in range(len(batch_texts)):
                    sentiment_pred_id = sentiment_preds[j].item()
                    emotion_pred_id = emotion_preds[j].item()
                    
                    sentiment_label = self.sentiment_encoder.inverse_transform([sentiment_pred_id])[0]
                    emotion_label = self.emotion_encoder.inverse_transform([emotion_pred_id])[0]
                    
                    result = {
                        'text': batch_texts[j],
                        'sentiment': {
                            'label': sentiment_label,
                            'confidence': sentiment_probs[j][sentiment_pred_id].item(),
                            'class_id': sentiment_pred_id
                        },
                        'emotion': {
                            'label': emotion_label,
                            'confidence': emotion_probs[j][emotion_pred_id].item(),
                            'class_id': emotion_pred_id
                        }
                    }
                    
                    if return_probabilities:
                        result['sentiment']['probabilities'] = {
                            class_name: prob.item() for class_name, prob in 
                            zip(self.sentiment_encoder.classes_, sentiment_probs[j])
                        }
                        result['emotion']['probabilities'] = {
                            class_name: prob.item() for class_name, prob in 
                            zip(self.emotion_encoder.classes_, emotion_probs[j])
                        }
                    
                    results.append(result)
        
        return results

def save_model_and_encoders(
    model: MultiTaskTransformer,
    tokenizer,
    sentiment_encoder: LabelEncoder,
    emotion_encoder: LabelEncoder,
    output_dir: str
):
    """Save complete model with encoders"""
    import joblib
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Save model and tokenizer
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    
    # Save encoders
    joblib.dump(sentiment_encoder, os.path.join(output_dir, 'sentiment_encoder.pkl'))
    joblib.dump(emotion_encoder, os.path.join(output_dir, 'emotion_encoder.pkl'))
    
    # Save model configuration
    config = {
        'sentiment_classes': list(sentiment_encoder.classes_),
        'emotion_classes': list(emotion_encoder.classes_),
        'sentiment_num_classes': len(sentiment_encoder.classes_),
        'emotion_num_classes': len(emotion_encoder.classes_)
    }
    
    with open(os.path.join(output_dir, 'model_config.json'), 'w') as f:
        json.dump(config, f, indent=2)
    
    print(f"✅ Model and encoders saved to: {output_dir}")

print("✅ Inference functions defined!")

✅ Inference functions defined!


# DeBERTa Training

In [17]:
# Clear everything first
aggressive_memory_cleanup()

# Run ultra-lightweight training
print("🚀 Starting Ultra-Lightweight Training for 8GB GPU...")
model, results = run_ultra_lightweight_training(
    reddit_data_path="annotated_reddit_posts.csv",
    model_name="microsoft/deberta-base",
    output_dir="./multitask_model_ultra_light",
    max_external_samples=500  # Very small training set
)

if model is not None:
    print("\n🎉 Training successful!")
    print(f"Sentiment Accuracy: {results['sentiment']['accuracy']:.4f}")
    print(f"Emotion Accuracy: {results['emotion']['accuracy']:.4f}")
else:
    print("\n💡 If still failing, try:")
    print("1. Restart kernel completely")
    print("2. Use CPU training: device = torch.device('cpu')")
    print("3. Use an even smaller model like 'distilbert-base-uncased'")

🧹 Memory cleaned!
  Allocated: 2.24 GB
  Cached: 4.82 GB
🚀 Starting Ultra-Lightweight Training for 8GB GPU...
🚀 Starting Ultra-Lightweight Training
🧹 Memory cleaned!
  Allocated: 2.24 GB
  Cached: 2.51 GB

1️⃣ Loading minimal external datasets...
Loading external datasets for training...
✅ SST-2 dataset loaded: 67349 train, 872 val
✅ GoEmotions dataset loaded: 43410 train, 5426 val
🔄 Preparing external datasets for multitask training...
✅ External data prepared:
  Train samples: 400
  Validation samples: 100
  Sentiment classes: [np.str_('Negative'), np.str_('Neutral'), np.str_('Positive')]
  Emotion classes: [np.str_('Anger'), np.str_('Fear'), np.str_('Joy'), np.str_('No Emotion'), np.str_('Sadness'), np.str_('Surprise')]

📈 Training set class distribution:
  Sentiment 'Negative': 192 samples
  Sentiment 'Neutral': 22 samples
  Sentiment 'Positive': 186 samples
  Emotion 'Anger': 129 samples
  Emotion 'Fear': 69 samples
  Emotion 'Joy': 50 samples
  Emotion 'No Emotion': 58 samples
  

### Hyperparameter Tuning

In [18]:
# Cell 10: Fixed Hyperparameter Tuning for External Datasets
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner

class MultiTaskHyperparameterTuner:
    """
    Hyperparameter tuning for multitask learning using Optuna
    Now properly uses external datasets for training!
    """
    
    def __init__(
        self,
        reddit_data_path: str,
        n_trials: int = 15,  # Reduced for faster tuning
        model_name: str = "microsoft/deberta-base",
        max_external_samples: int = 2000  # Small for fast tuning
    ):
        self.reddit_data_path = reddit_data_path
        self.n_trials = n_trials
        self.model_name = model_name
        self.max_external_samples = max_external_samples
        
        # Validate model choice
        if model_name not in [config["name"] for config in MODEL_CONFIGS.values()]:
            available_models = [config["name"] for config in MODEL_CONFIGS.values()]
            raise ValueError(f"Model must be one of: {available_models}")
        
        # Load external datasets for training (like your main training)
        print("🔄 Loading external datasets for hyperparameter tuning...")
        sentiment_data, emotion_data = load_external_datasets()
        
        # Prepare external data splits
        self.external_data_splits, self.sentiment_encoder, self.emotion_encoder = prepare_external_data_for_multitask(
            sentiment_data, emotion_data, max_samples=max_external_samples
        )
        
        # Load Reddit data for evaluation
        reddit_df = pd.read_csv(reddit_data_path)
        self.reddit_evaluation_data = prepare_reddit_data_for_evaluation(
            reddit_df, self.sentiment_encoder, self.emotion_encoder
        )
        
        print(f"✅ Hyperparameter tuner initialized")
        print(f"Model: {model_name}")
        print(f"Training data: {len(self.external_data_splits['train']['texts'])} external samples")
        print(f"Evaluation data: {len(self.reddit_evaluation_data['texts'])} Reddit samples")
        print(f"Trials: {n_trials}")
    
    def objective(self, trial):
        """Optuna objective function"""
        
        # Sample hyperparameters - focused on key ones
        learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-4, log=True)
        batch_size = trial.suggest_categorical('batch_size', [2, 4, 8])  # Small for memory
        alpha = trial.suggest_float('alpha', 0.3, 0.7)
        hidden_dropout = trial.suggest_float('hidden_dropout_prob', 0.05, 0.3)
        classifier_dropout = trial.suggest_float('classifier_dropout', 0.1, 0.4)
        weight_decay = trial.suggest_float('weight_decay', 0.001, 0.1)
        warmup_ratio = trial.suggest_float('warmup_ratio', 0.05, 0.2)
        num_epochs = trial.suggest_int('num_epochs', 2, 5)  # Few epochs for speed
        max_length = trial.suggest_categorical('max_length', [128, 256])  # Memory optimization
        
        # Create configuration
        config = TrainingConfig(
            model_name=self.model_name,
            batch_size=batch_size,
            learning_rate=learning_rate,
            num_epochs=num_epochs,
            warmup_ratio=warmup_ratio,
            weight_decay=weight_decay,
            alpha=alpha,
            hidden_dropout_prob=hidden_dropout,
            classifier_dropout=classifier_dropout,
            max_length=max_length,
            adaptive_alpha=False,  # Disable for consistent comparison
            output_dir=f"./temp_trial_{trial.number}",
            save_strategy="no",  # Don't save during tuning
            save_total_limit=1
        )
        
        try:
            # Clear memory before trial
            aggressive_memory_cleanup()
            
            # Initialize trainer
            trainer = MultiTaskTrainer(
                config=config,
                sentiment_num_classes=len(self.sentiment_encoder.classes_),
                emotion_num_classes=len(self.emotion_encoder.classes_)
            )
            
            # Setup with external data (not Reddit data!)
            trainer.setup(self.external_data_splits, self.sentiment_encoder, self.emotion_encoder)
            
            # Train model on external data
            history = trainer.train()
            
            # Clear memory before evaluation
            aggressive_memory_cleanup()
            
            # Evaluate on Reddit data (the real test!)
            evaluator = MultiTaskEvaluator(
                model=trainer.model,
                tokenizer=trainer.tokenizer,
                sentiment_encoder=self.sentiment_encoder,
                emotion_encoder=self.emotion_encoder,
                device=device
            )
            
            reddit_results = evaluator.evaluate_dataset(
                texts=self.reddit_evaluation_data['texts'],
                sentiment_labels=self.reddit_evaluation_data['sentiment_labels'],
                emotion_labels=self.reddit_evaluation_data['emotion_labels'],
                batch_size=2  # Small batch for evaluation
            )
            
            # Combined score based on Reddit evaluation (what we really care about)
            combined_score = (
                reddit_results['sentiment']['accuracy'] + 
                reddit_results['emotion']['accuracy']
            ) / 2
            
            print(f"Trial {trial.number}: Combined Score = {combined_score:.4f} "
                  f"(Sentiment: {reddit_results['sentiment']['accuracy']:.4f}, "
                  f"Emotion: {reddit_results['emotion']['accuracy']:.4f})")
            
            # Clean up
            del trainer, evaluator
            aggressive_memory_cleanup()
            
            return combined_score
            
        except Exception as e:
            print(f"Trial {trial.number} failed: {e}")
            aggressive_memory_cleanup()
            return 0.0
    
    def tune(self) -> optuna.Study:
        """Run hyperparameter optimization"""
        
        study = optuna.create_study(
            direction='maximize',
            sampler=TPESampler(seed=42),
            pruner=MedianPruner(n_startup_trials=3, n_warmup_steps=2)
        )
        
        print(f"🔍 Starting hyperparameter optimization...")
        print(f"This will run {self.n_trials} trials - each training and evaluating a model")
        print("=" * 60)
        
        study.optimize(self.objective, n_trials=self.n_trials)
        
        # Print results
        print(f"\n🏆 Optimization completed!")
        print(f"Best trial: {study.best_trial.number}")
        print(f"Best combined score: {study.best_value:.4f}")
        print(f"Best parameters:")
        for key, value in study.best_params.items():
            print(f"  {key}: {value}")
        
        return study

def run_hyperparameter_tuning_fixed(
    reddit_data_path: str = "annotated_reddit_posts.csv",
    n_trials: int = 15,
    model_name: str = "microsoft/deberta-base"
):
    """Run hyperparameter tuning and train final model with best params"""
    
    print("🚀 Starting Hyperparameter Tuning with External Training Data")
    print("=" * 60)
    
    # Run tuning
    tuner = MultiTaskHyperparameterTuner(
        reddit_data_path=reddit_data_path,
        n_trials=n_trials,
        model_name=model_name,
        max_external_samples=2000  # Keep small for fast tuning
    )
    
    study = tuner.tune()
    
    # Train final model with best parameters
    print(f"\n🚀 Training final model with best hyperparameters...")
    print("=" * 60)
    
    best_params = study.best_params
    
    # Run the optimized training
    model, results = run_ultra_lightweight_training(
        reddit_data_path=reddit_data_path,
        model_name=model_name,
        output_dir="./multitask_model_optimized",
        max_external_samples=5000  # Use more data for final model
    )
    
    # Apply best hyperparameters to final training config
    print(f"\n📋 Best hyperparameters found:")
    for key, value in best_params.items():
        print(f"  {key}: {value}")
    
    # Save tuning results
    import pickle
    os.makedirs("./multitask_model_optimized", exist_ok=True)
    with open("./multitask_model_optimized/hyperparameter_study.pkl", 'wb') as f:
        pickle.dump(study, f)
    
    print(f"\n📈 Final optimized results:")
    print(f"Sentiment Accuracy: {results['sentiment']['accuracy']:.4f}")
    print(f"Emotion Accuracy: {results['emotion']['accuracy']:.4f}")
    
    return model, results, study

print("✅ Fixed hyperparameter tuning functions defined!")

✅ Fixed hyperparameter tuning functions defined!


In [19]:
# Cell: Run Hyperparameter Tuning
print("Starting Hyperparameter Optimization...")

# Run tuning (this will take some time - each trial trains a model)
model_optimized, results_optimized, study = run_hyperparameter_tuning_fixed(
    reddit_data_path="annotated_reddit_posts.csv",
    n_trials=15,  # Adjust based on how much time you want to spend
    model_name="microsoft/deberta-base"
)

print("\nOptimization Complete!")
print("=" * 50)
print("📊 Before vs After Optimization:")
print(f"Original  - Sentiment: 0.5579, Emotion: 0.0842")
print(f"Optimized - Sentiment: {results_optimized['sentiment']['accuracy']:.4f}, Emotion: {results_optimized['emotion']['accuracy']:.4f}")

Starting Hyperparameter Optimization...
🚀 Starting Hyperparameter Tuning with External Training Data
🔄 Loading external datasets for hyperparameter tuning...
Loading external datasets for training...
✅ SST-2 dataset loaded: 67349 train, 872 val
✅ GoEmotions dataset loaded: 43410 train, 5426 val
🔄 Preparing external datasets for multitask training...


[I 2025-07-11 22:32:11,946] A new study created in memory with name: no-name-a6add346-b4ec-4566-8e1e-0b0b9a6df221


✅ External data prepared:
  Train samples: 1600
  Validation samples: 400
  Sentiment classes: [np.str_('Negative'), np.str_('Neutral'), np.str_('Positive')]
  Emotion classes: [np.str_('Anger'), np.str_('Fear'), np.str_('Joy'), np.str_('No Emotion'), np.str_('Sadness'), np.str_('Surprise')]

📈 Training set class distribution:
  Sentiment 'Negative': 721 samples
  Sentiment 'Neutral': 92 samples
  Sentiment 'Positive': 787 samples
  Emotion 'Anger': 522 samples
  Emotion 'Fear': 267 samples
  Emotion 'Joy': 186 samples
  Emotion 'No Emotion': 231 samples
  Emotion 'Sadness': 281 samples
  Emotion 'Surprise': 113 samples
🔄 Preparing Reddit data for evaluation...
✅ Reddit evaluation data prepared: 95 samples
✅ Hyperparameter tuner initialized
Model: microsoft/deberta-base
Training data: 1600 external samples
Evaluation data: 95 Reddit samples
Trials: 15
🔍 Starting hyperparameter optimization...
This will run 15 trials - each training and evaluating a model
🧹 Memory cleaned!
  Allocated: 

[I 2025-07-11 22:44:17,935] Trial 0 finished with value: 0.3368421052631579 and parameters: {'learning_rate': 2.368863950364079e-05, 'batch_size': 2, 'alpha': 0.36240745617697456, 'hidden_dropout_prob': 0.08899863008405066, 'classifier_dropout': 0.11742508365045984, 'weight_decay': 0.08675143843171859, 'warmup_ratio': 0.14016725176148134, 'num_epochs': 4, 'max_length': 256}. Best is trial 0 with value: 0.3368421052631579.


Trial 0: Combined Score = 0.3368 (Sentiment: 0.4421, Emotion: 0.2316)
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 800
  Total training steps: 1600
🚀 Starting training for 2 epochs...

📍 Epoch 1/2
--------------------------------------------------
  Batch 20/800 | Loss: 2.4552 | LR: 6.02e-06 | Alpha: 0.422
  Batch 40/800 | Loss: 2.2762 | LR: 1.20e-05 | Alpha: 0.422
  Batch 60/800 | Loss: 2.0872 | LR: 1.81e-05 | Alpha: 0.422
  Batch 80/800 | Loss: 2.0518 | LR: 2.41e-05 | Alpha: 0.422
  Batch 100/800 | Loss: 2.0283 | LR: 3.01e-05 | Alpha: 0.422
  Batch 120/800 | Loss: 1.9495 | LR: 3.61e-05 | Alpha: 0.422
  Batch 140/800 | Loss: 1.9547 | LR: 4.21e-05 | Alpha: 0.422
  Batch 160/800 | Loss: 1.9497 | LR: 4.81e-05 | Alpha: 0.422
  Batch 180/800 | Loss: 1.9626 | LR: 5.42e-05 | Alpha: 0.422
  Batch 2

[I 2025-07-11 22:56:12,242] Trial 1 finished with value: 0.17368421052631577 and parameters: {'learning_rate': 6.798962421591133e-05, 'batch_size': 2, 'alpha': 0.4216968971838151, 'hidden_dropout_prob': 0.18118910790805948, 'classifier_dropout': 0.22958350559263474, 'weight_decay': 0.029831684879606152, 'warmup_ratio': 0.14177793420835694, 'num_epochs': 2, 'max_length': 256}. Best is trial 0 with value: 0.3368421052631579.


🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 800
  Total training steps: 4000
🚀 Starting training for 5 epochs...

📍 Epoch 1/5
--------------------------------------------------
  Batch 20/800 | Loss: 2.0603 | LR: 2.39e-06 | Alpha: 0.537
  Batch 40/800 | Loss: 1.9639 | LR: 4.78e-06 | Alpha: 0.537
  Batch 60/800 | Loss: 1.9266 | LR: 7.18e-06 | Alpha: 0.537
  Batch 80/800 | Loss: 1.9171 | LR: 9.57e-06 | Alpha: 0.537
  Batch 100/800 | Loss: 1.8591 | LR: 1.20e-05 | Alpha: 0.537
  Batch 120/800 | Loss: 1.8642 | LR: 1.44e-05 | Alpha: 0.537
  Batch 140/800 | Loss: 1.8883 | LR: 1.67e-05 | Alpha: 0.537
  Batch 160/800 | Loss: 1.8616 | LR: 1.91e-05 | Alpha: 0.537
  Batch 180/800 | Loss: 1.8507 | LR: 2.15e-05 | Alpha: 0.537
  Batch 200/800 | Loss: 1.8490 | LR: 2.39e-05 | Alpha: 0.537
  Batch 220/800 | Loss: 1.8083 | LR: 2.63e-05 | Alpha: 0.537
  Batch 240/80

[I 2025-07-11 23:10:52,839] Trial 2 finished with value: 0.4263157894736842 and parameters: {'learning_rate': 2.858051065806938e-05, 'batch_size': 2, 'alpha': 0.5369658275448169, 'hidden_dropout_prob': 0.061612603179999434, 'classifier_dropout': 0.28226345557043153, 'weight_decay': 0.017881888245041864, 'warmup_ratio': 0.05975773894779193, 'num_epochs': 5, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


Trial 2: Combined Score = 0.4263 (Sentiment: 0.5789, Emotion: 0.2737)
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 400
  Total training steps: 1600
🚀 Starting training for 4 epochs...

📍 Epoch 1/4
--------------------------------------------------
  Batch 20/400 | Loss: 1.8216 | LR: 2.84e-06 | Alpha: 0.349
  Batch 40/400 | Loss: 1.7900 | LR: 5.68e-06 | Alpha: 0.349
  Batch 60/400 | Loss: 1.7716 | LR: 8.52e-06 | Alpha: 0.349
  Batch 80/400 | Loss: 1.7328 | LR: 1.14e-05 | Alpha: 0.349
  Batch 100/400 | Loss: 1.7410 | LR: 1.42e-05 | Alpha: 0.349
  Batch 120/400 | Loss: 1.7334 | LR: 1.70e-05 | Alpha: 0.349
  Batch 140/400 | Loss: 1.7176 | LR: 1.99e-05 | Alpha: 0.349
  Batch 160/400 | Loss: 1.6998 | LR: 1.99e-05 | Alpha: 0.349
  Batch 180/400 | Loss: 1.6841 | LR: 1.96e-05 | Alpha: 0.349
  Batch 2

[I 2025-07-12 00:12:16,150] Trial 3 finished with value: 0.368421052631579 and parameters: {'learning_rate': 2.0165721691808572e-05, 'batch_size': 4, 'alpha': 0.3488152939379115, 'hidden_dropout_prob': 0.17379422752781754, 'classifier_dropout': 0.11031655633456552, 'weight_decay': 0.09102271980579943, 'warmup_ratio': 0.08881699724000255, 'num_epochs': 4, 'max_length': 256}. Best is trial 2 with value: 0.4263157894736842.


🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 400
  Total training steps: 800
🚀 Starting training for 2 epochs...

📍 Epoch 1/2
--------------------------------------------------
  Batch 20/400 | Loss: 1.6111 | LR: 1.41e-05 | Alpha: 0.676
  Batch 40/400 | Loss: 1.5572 | LR: 2.82e-05 | Alpha: 0.676
  Batch 60/400 | Loss: 1.4706 | LR: 3.47e-05 | Alpha: 0.676
  Batch 80/400 | Loss: 1.4811 | LR: 3.38e-05 | Alpha: 0.676
  Batch 100/400 | Loss: 1.5272 | LR: 3.29e-05 | Alpha: 0.676
  Batch 120/400 | Loss: 1.5479 | LR: 3.19e-05 | Alpha: 0.676
  Batch 140/400 | Loss: 1.5466 | LR: 3.10e-05 | Alpha: 0.676
  Batch 160/400 | Loss: 1.5442 | LR: 3.00e-05 | Alpha: 0.676
  Batch 180/400 | Loss: 1.5515 | LR: 2.91e-05 | Alpha: 0.676
  Batch 200/400 | Loss: 1.5606 | LR: 2.82e-05 | Alpha: 0.676
  Batch 220/400 | L

[I 2025-07-12 00:44:35,625] Trial 4 finished with value: 0.1894736842105263 and parameters: {'learning_rate': 3.521358805467871e-05, 'batch_size': 4, 'alpha': 0.6757995766256756, 'hidden_dropout_prob': 0.2737068376069122, 'classifier_dropout': 0.2793699936433256, 'weight_decay': 0.09226554926728857, 'warmup_ratio': 0.06327387530778793, 'num_epochs': 2, 'max_length': 256}. Best is trial 2 with value: 0.4263157894736842.


🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 400
  Total training steps: 2000
🚀 Starting training for 5 epochs...

📍 Epoch 1/5
--------------------------------------------------
  Batch 20/400 | Loss: 2.1181 | LR: 4.01e-06 | Alpha: 0.412
  Batch 40/400 | Loss: 1.9063 | LR: 8.02e-06 | Alpha: 0.412
  Batch 60/400 | Loss: 1.7944 | LR: 1.20e-05 | Alpha: 0.412
  Batch 80/400 | Loss: 1.7587 | LR: 1.60e-05 | Alpha: 0.412
  Batch 100/400 | Loss: 1.7623 | LR: 2.01e-05 | Alpha: 0.412
  Batch 120/400 | Loss: 1.7479 | LR: 2.41e-05 | Alpha: 0.412
  Batch 140/400 | Loss: 1.7126 | LR: 2.42e-05 | Alpha: 0.412
  Batch 160/400 | Loss: 1.7053 | LR: 2.40e-05 | Alpha: 0.412
  Batch 180/400 | Loss: 1.6797 | LR: 2.37e-05 | Alpha: 0.412
  Batch 200/400 | Loss: 1.6813 | LR: 2.35e-05 | Alpha: 0.412
  Batch 220/400 | Loss: 1.6654 | LR: 2.32e-05 | Alpha: 0.412
  Batch 240/40

[I 2025-07-12 00:53:09,460] Trial 5 finished with value: 0.39473684210526316 and parameters: {'learning_rate': 2.4472440973990114e-05, 'batch_size': 4, 'alpha': 0.41237380387495226, 'hidden_dropout_prob': 0.18567402078956213, 'classifier_dropout': 0.1422772674924288, 'weight_decay': 0.08041750109464993, 'warmup_ratio': 0.06118259655196563, 'num_epochs': 5, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 800
  Total training steps: 3200
🚀 Starting training for 4 epochs...

📍 Epoch 1/4
--------------------------------------------------
  Batch 20/800 | Loss: 2.1773 | LR: 3.53e-07 | Alpha: 0.609
  Batch 40/800 | Loss: 2.2202 | LR: 7.06e-07 | Alpha: 0.609
  Batch 60/800 | Loss: 2.1256 | LR: 1.06e-06 | Alpha: 0.609
  Batch 80/800 | Loss: 2.0827 | LR: 1.41e-06 | Alpha: 0.609
  Batch 100/800 | Loss: 2.0332 | LR: 1.76e-06 | Alpha: 0.609
  Batch 120/800 | Loss: 1.9595 | LR: 2.12e-06 | Alpha: 0.609
  Batch 140/800 | Loss: 1.9284 | LR: 2.47e-06 | Alpha: 0.609
  Batch 160/800 | Loss: 1.9076 | LR: 2.82e-06 | Alpha: 0.609
  Batch 180/800 | Loss: 1.8841 | LR: 3.18e-06 | Alpha: 0.609
  Batch 200/800 | Loss: 1.8678 | LR: 3.53e-06 | Alpha: 0.609
  Batch 220/800 | 

[I 2025-07-12 01:07:13,178] Trial 6 finished with value: 0.3473684210526316 and parameters: {'learning_rate': 1.012796325733148e-05, 'batch_size': 2, 'alpha': 0.6085081386743783, 'hidden_dropout_prob': 0.06851116293352259, 'classifier_dropout': 0.2075397185632818, 'weight_decay': 0.012471036892987841, 'warmup_ratio': 0.17946551388133902, 'num_epochs': 4, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


Trial 6: Combined Score = 0.3474 (Sentiment: 0.5789, Emotion: 0.1158)
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 400
  Total training steps: 1600
🚀 Starting training for 4 epochs...

📍 Epoch 1/4
--------------------------------------------------
  Batch 20/400 | Loss: 1.7603 | LR: 1.56e-06 | Alpha: 0.655
  Batch 40/400 | Loss: 1.6183 | LR: 3.12e-06 | Alpha: 0.655
  Batch 60/400 | Loss: 1.5385 | LR: 4.69e-06 | Alpha: 0.655
  Batch 80/400 | Loss: 1.4815 | LR: 6.25e-06 | Alpha: 0.655
  Batch 100/400 | Loss: 1.4667 | LR: 7.81e-06 | Alpha: 0.655
  Batch 120/400 | Loss: 1.4496 | LR: 9.37e-06 | Alpha: 0.655
  Batch 140/400 | Loss: 1.4191 | LR: 1.09e-05 | Alpha: 0.655
  Batch 160/400 | Loss: 1.4158 | LR: 1.25e-05 | Alpha: 0.655
  Batch 180/400 | Loss: 1.3917 | LR: 1.41e-05 | Alpha: 0.655
  Batch 2

[I 2025-07-12 01:16:39,068] Trial 7 finished with value: 0.3736842105263158 and parameters: {'learning_rate': 2.0463613363481594e-05, 'batch_size': 4, 'alpha': 0.6548850970305305, 'hidden_dropout_prob': 0.16805373129048734, 'classifier_dropout': 0.13587827378149053, 'weight_decay': 0.07161123393507651, 'warmup_ratio': 0.16411775729253464, 'num_epochs': 4, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 800
  Total training steps: 1600
🚀 Starting training for 2 epochs...

📍 Epoch 1/2
--------------------------------------------------
  Batch 20/800 | Loss: 2.2357 | LR: 2.24e-06 | Alpha: 0.313
  Batch 40/800 | Loss: 2.2114 | LR: 4.49e-06 | Alpha: 0.313
  Batch 60/800 | Loss: 2.1841 | LR: 6.73e-06 | Alpha: 0.313
  Batch 80/800 | Loss: 2.1039 | LR: 8.98e-06 | Alpha: 0.313
  Batch 100/800 | Loss: 2.0801 | LR: 1.12e-05 | Alpha: 0.313
  Batch 120/800 | Loss: 2.0184 | LR: 1.35e-05 | Alpha: 0.313
  Batch 140/800 | Loss: 1.9915 | LR: 1.57e-05 | Alpha: 0.313
  Batch 160/800 | Loss: 1.9786 | LR: 1.80e-05 | Alpha: 0.313
  Batch 180/800 | Loss: 1.9980 | LR: 2.02e-05 | Alpha: 0.313
  Batch 200/800 | Loss: 1.9838 | LR: 2.24e-05 | Alpha: 0.313
  Batch 220/800 | 

[I 2025-07-12 01:23:02,826] Trial 8 finished with value: 0.16842105263157894 and parameters: {'learning_rate': 3.332213575546236e-05, 'batch_size': 2, 'alpha': 0.3125716742746937, 'hidden_dropout_prob': 0.20910260281594512, 'classifier_dropout': 0.19430679432289802, 'weight_decay': 0.051348498425305575, 'warmup_ratio': 0.18613497108891397, 'num_epochs': 2, 'max_length': 256}. Best is trial 2 with value: 0.4263157894736842.


Trial 8: Combined Score = 0.1684 (Sentiment: 0.2211, Emotion: 0.1158)
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 400
  Total training steps: 800
🚀 Starting training for 2 epochs...

📍 Epoch 1/2
--------------------------------------------------
  Batch 20/400 | Loss: 1.9237 | LR: 2.49e-06 | Alpha: 0.672
  Batch 40/400 | Loss: 1.8148 | LR: 4.98e-06 | Alpha: 0.672
  Batch 60/400 | Loss: 1.7651 | LR: 7.47e-06 | Alpha: 0.672
  Batch 80/400 | Loss: 1.7332 | LR: 9.96e-06 | Alpha: 0.672
  Batch 100/400 | Loss: 1.7455 | LR: 1.25e-05 | Alpha: 0.672
  Batch 120/400 | Loss: 1.7221 | LR: 1.49e-05 | Alpha: 0.672
  Batch 140/400 | Loss: 1.7057 | LR: 1.68e-05 | Alpha: 0.672
  Batch 160/400 | Loss: 1.6521 | LR: 1.63e-05 | Alpha: 0.672
  Batch 180/400 | Loss: 1.6408 | LR: 1.58e-05 | Alpha: 0.672
  Batch 20

[I 2025-07-12 01:26:49,271] Trial 9 finished with value: 0.32105263157894737 and parameters: {'learning_rate': 1.693550554929792e-05, 'batch_size': 4, 'alpha': 0.6718790609370292, 'hidden_dropout_prob': 0.25203009489110423, 'classifier_dropout': 0.2900211269531271, 'weight_decay': 0.08727459842858405, 'warmup_ratio': 0.17055081153486717, 'num_epochs': 2, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 200
  Total training steps: 1000
🚀 Starting training for 5 epochs...

📍 Epoch 1/5
--------------------------------------------------
  Batch 20/200 | Loss: 2.8077 | LR: 1.15e-05 | Alpha: 0.528
  Batch 40/200 | Loss: 2.3659 | LR: 2.30e-05 | Alpha: 0.528
  Batch 60/200 | Loss: 2.1232 | LR: 3.45e-05 | Alpha: 0.528
  Batch 80/200 | Loss: 2.0062 | LR: 4.61e-05 | Alpha: 0.528
  Batch 100/200 | Loss: 1.9342 | LR: 5.76e-05 | Alpha: 0.528
  Batch 120/200 | Loss: 1.8811 | LR: 5.88e-05 | Alpha: 0.528
  Batch 140/200 | Loss: 1.8708 | LR: 5.75e-05 | Alpha: 0.528
  Batch 160/200 | Loss: 1.8535 | LR: 5.61e-05 | Alpha: 0.528
  Batch 180/200 | Loss: 1.8445 | LR: 5.48e-05 | Alpha: 0.528
  Batch 200/200 | Loss: 1.8310 | LR: 5.35e-05 | Alpha: 0.528
📊 Epoch 1 Results:
  Train Loss: 1.8310
  Val Loss: 2.9437
  Sentiment Accu

[I 2025-07-12 01:47:06,341] Trial 10 finished with value: 0.17368421052631577 and parameters: {'learning_rate': 5.987384184453176e-05, 'batch_size': 8, 'alpha': 0.527827263346871, 'hidden_dropout_prob': 0.11240633271029743, 'classifier_dropout': 0.38923847118213384, 'weight_decay': 0.004833794796157115, 'warmup_ratio': 0.1041967994662714, 'num_epochs': 5, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


Trial 10: Combined Score = 0.1737 (Sentiment: 0.2211, Emotion: 0.1263)
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 200
  Total training steps: 1000
🚀 Starting training for 5 epochs...

📍 Epoch 1/5
--------------------------------------------------
  Batch 20/200 | Loss: 2.2936 | LR: 1.66e-05 | Alpha: 0.476
  Batch 40/200 | Loss: 1.9937 | LR: 3.32e-05 | Alpha: 0.476
  Batch 60/200 | Loss: 1.8716 | LR: 4.37e-05 | Alpha: 0.476
  Batch 80/200 | Loss: 1.8119 | LR: 4.28e-05 | Alpha: 0.476
  Batch 100/200 | Loss: 1.8144 | LR: 4.18e-05 | Alpha: 0.476
  Batch 120/200 | Loss: 1.7658 | LR: 4.09e-05 | Alpha: 0.476
  Batch 140/200 | Loss: 1.7280 | LR: 4.00e-05 | Alpha: 0.476
  Batch 160/200 | Loss: 1.6971 | LR: 3.91e-05 | Alpha: 0.476
  Batch 180/200 | Loss: 1.6687 | LR: 3.81e-05 | Alpha: 0.476
  Batch 

[I 2025-07-12 02:14:35,402] Trial 11 finished with value: 0.34736842105263155 and parameters: {'learning_rate': 4.402635555643861e-05, 'batch_size': 8, 'alpha': 0.47597968181131295, 'hidden_dropout_prob': 0.12926204858999552, 'classifier_dropout': 0.3224490909495544, 'weight_decay': 0.048707736636669656, 'warmup_ratio': 0.053010363186629, 'num_epochs': 5, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 800
  Total training steps: 4000
🚀 Starting training for 5 epochs...

📍 Epoch 1/5
--------------------------------------------------
  Batch 20/800 | Loss: 2.1519 | LR: 7.75e-07 | Alpha: 0.541
  Batch 40/800 | Loss: 1.9872 | LR: 1.55e-06 | Alpha: 0.541
  Batch 60/800 | Loss: 1.9896 | LR: 2.33e-06 | Alpha: 0.541
  Batch 80/800 | Loss: 1.8751 | LR: 3.10e-06 | Alpha: 0.541
  Batch 100/800 | Loss: 1.8404 | LR: 3.88e-06 | Alpha: 0.541
  Batch 120/800 | Loss: 1.7883 | LR: 4.65e-06 | Alpha: 0.541
  Batch 140/800 | Loss: 1.7871 | LR: 5.43e-06 | Alpha: 0.541
  Batch 160/800 | Loss: 1.7767 | LR: 6.20e-06 | Alpha: 0.541
  Batch 180/800 | Loss: 1.7853 | LR: 6.98e-06 | Alpha: 0.541
  Batch 200/800 | Loss: 1.7541 | LR: 7.75e-06 | Alpha: 0.541
  Batch 220/800 | Loss: 1.7388 | LR: 8.53e-06 | Alpha: 0.541
  Batch 240/80

[I 2025-07-12 02:23:20,758] Trial 12 finished with value: 0.3368421052631579 and parameters: {'learning_rate': 1.3567901359939668e-05, 'batch_size': 2, 'alpha': 0.5414951563175963, 'hidden_dropout_prob': 0.22238415028016056, 'classifier_dropout': 0.17185882911732656, 'weight_decay': 0.058092639497543494, 'warmup_ratio': 0.08764864857717905, 'num_epochs': 5, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


Trial 12: Combined Score = 0.3368 (Sentiment: 0.5263, Emotion: 0.1474)
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 400
  Total training steps: 1200
🚀 Starting training for 3 epochs...

📍 Epoch 1/3
--------------------------------------------------
  Batch 20/400 | Loss: 2.4159 | LR: 2.15e-05 | Alpha: 0.461
  Batch 40/400 | Loss: 2.2261 | LR: 4.31e-05 | Alpha: 0.461
  Batch 60/400 | Loss: 2.0970 | LR: 6.46e-05 | Alpha: 0.461
  Batch 80/400 | Loss: 2.0756 | LR: 8.62e-05 | Alpha: 0.461
  Batch 100/400 | Loss: 2.0575 | LR: 9.84e-05 | Alpha: 0.461
  Batch 120/400 | Loss: 1.9941 | LR: 9.66e-05 | Alpha: 0.461
  Batch 140/400 | Loss: 2.0086 | LR: 9.48e-05 | Alpha: 0.461
  Batch 160/400 | Loss: 2.0283 | LR: 9.30e-05 | Alpha: 0.461
  Batch 180/400 | Loss: 2.0306 | LR: 9.12e-05 | Alpha: 0.461
  Batch 

[I 2025-07-12 02:26:42,675] Trial 13 finished with value: 0.17368421052631577 and parameters: {'learning_rate': 9.908696623769013e-05, 'batch_size': 4, 'alpha': 0.460532761225948, 'hidden_dropout_prob': 0.13596253859670623, 'classifier_dropout': 0.3588816179337121, 'weight_decay': 0.025765020727265504, 'warmup_ratio': 0.07705191423702486, 'num_epochs': 3, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


Trial 13: Combined Score = 0.1737 (Sentiment: 0.2211, Emotion: 0.1263)
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB
✅ Setup complete!
  Model: microsoft/deberta-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 200
  Total training steps: 1000
🚀 Starting training for 5 epochs...

📍 Epoch 1/5
--------------------------------------------------
  Batch 20/200 | Loss: 2.1462 | LR: 4.89e-06 | Alpha: 0.571
  Batch 40/200 | Loss: 1.7950 | LR: 9.79e-06 | Alpha: 0.571
  Batch 60/200 | Loss: 1.6766 | LR: 1.47e-05 | Alpha: 0.571
  Batch 80/200 | Loss: 1.6119 | LR: 1.96e-05 | Alpha: 0.571
  Batch 100/200 | Loss: 1.5773 | LR: 2.45e-05 | Alpha: 0.571
  Batch 120/200 | Loss: 1.5420 | LR: 2.72e-05 | Alpha: 0.571
  Batch 140/200 | Loss: 1.4893 | LR: 2.65e-05 | Alpha: 0.571
  Batch 160/200 | Loss: 1.4625 | LR: 2.59e-05 | Alpha: 0.571
  Batch 180/200 | Loss: 1.4416 | LR: 2.53e-05 | Alpha: 0.571
  Batch 

[I 2025-07-12 02:40:52,434] Trial 14 finished with value: 0.34210526315789475 and parameters: {'learning_rate': 2.7410638932887623e-05, 'batch_size': 8, 'alpha': 0.5710146980775144, 'hidden_dropout_prob': 0.05724867212399054, 'classifier_dropout': 0.25660486731910687, 'weight_decay': 0.06780121012459085, 'warmup_ratio': 0.11228099245449503, 'num_epochs': 5, 'max_length': 128}. Best is trial 2 with value: 0.4263157894736842.


Trial 14: Combined Score = 0.3421 (Sentiment: 0.5684, Emotion: 0.1158)
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB

🏆 Optimization completed!
Best trial: 2
Best combined score: 0.4263
Best parameters:
  learning_rate: 2.858051065806938e-05
  batch_size: 2
  alpha: 0.5369658275448169
  hidden_dropout_prob: 0.061612603179999434
  classifier_dropout: 0.28226345557043153
  weight_decay: 0.017881888245041864
  warmup_ratio: 0.05975773894779193
  num_epochs: 5
  max_length: 128

🚀 Training final model with best hyperparameters...
🚀 Starting Ultra-Lightweight Training
🧹 Memory cleaned!
  Allocated: 3.36 GB
  Cached: 3.66 GB

1️⃣ Loading minimal external datasets...
Loading external datasets for training...
✅ SST-2 dataset loaded: 67349 train, 872 val
✅ GoEmotions dataset loaded: 43410 train, 5426 val
🔄 Preparing external datasets for multitask training...
✅ External data prepared:
  Train samples: 4000
  Validation samples: 1000
  Sentiment classes: [np.str_('Negative'), np.str_(

In [20]:
print(f"\n🏆 Optimization completed!")
print(f"Best trial: {study.best_trial.number}")
print(f"Best combined score: {study.best_value:.4f}")
print(f"Best parameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")


🏆 Optimization completed!
Best trial: 2
Best combined score: 0.4263
Best parameters:
  learning_rate: 2.858051065806938e-05
  batch_size: 2
  alpha: 0.5369658275448169
  hidden_dropout_prob: 0.061612603179999434
  classifier_dropout: 0.28226345557043153
  weight_decay: 0.017881888245041864
  warmup_ratio: 0.05975773894779193
  num_epochs: 5
  max_length: 128


#### EVAL

In [16]:
# Cell: Create Fixed Predictor Class
class FixedMultiTaskPredictor:
    """
    Fixed inference class for multitask model - handles token_type_ids issue
    """
    
    def __init__(
        self,
        model_path: str,
        sentiment_encoder_path: str,
        emotion_encoder_path: str,
        device: torch.device = None
    ):
        self.device = device or torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # Load model
        self.model = MultiTaskTransformer.from_pretrained(model_path)
        self.model.to(self.device)
        self.model.eval()
        
        # Load label encoders
        import joblib
        self.sentiment_encoder = joblib.load(sentiment_encoder_path)
        self.emotion_encoder = joblib.load(emotion_encoder_path)
        
        print(f"✅ Fixed Model loaded successfully!")
        print(f"Device: {self.device}")
        print(f"Sentiment classes: {list(self.sentiment_encoder.classes_)}")
        print(f"Emotion classes: {list(self.emotion_encoder.classes_)}")
    
    def predict_batch(
        self,
        texts: List[str],
        batch_size: int = 32,
        return_probabilities: bool = False
    ) -> List[Dict[str, any]]:
        """
        Predict sentiment and emotion for a batch of texts - FIXED VERSION
        """
        results = []
        
        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size]
            
            # Tokenize batch
            inputs = self.tokenizer(
                batch_texts,
                return_tensors="pt",
                truncation=True,
                padding="max_length",
                max_length=512
            )
            
            # FIXED: Only pass the required inputs, filter out token_type_ids
            model_inputs = {
                'input_ids': inputs['input_ids'].to(self.device),
                'attention_mask': inputs['attention_mask'].to(self.device)
            }
            
            with torch.no_grad():
                outputs = self.model(**model_inputs)  # Only pass filtered inputs
                
                sentiment_logits = outputs['sentiment_logits']
                emotion_logits = outputs['emotion_logits']
                
                sentiment_probs = F.softmax(sentiment_logits, dim=-1)
                emotion_probs = F.softmax(emotion_logits, dim=-1)
                
                sentiment_preds = torch.argmax(sentiment_logits, dim=-1)
                emotion_preds = torch.argmax(emotion_logits, dim=-1)
                
                # Process each item in batch
                for j in range(len(batch_texts)):
                    sentiment_pred_id = sentiment_preds[j].item()
                    emotion_pred_id = emotion_preds[j].item()
                    
                    sentiment_label = self.sentiment_encoder.inverse_transform([sentiment_pred_id])[0]
                    emotion_label = self.emotion_encoder.inverse_transform([emotion_pred_id])[0]
                    
                    result = {
                        'text': batch_texts[j],
                        'sentiment': {
                            'label': sentiment_label,
                            'confidence': sentiment_probs[j][sentiment_pred_id].item(),
                            'class_id': sentiment_pred_id
                        },
                        'emotion': {
                            'label': emotion_label,
                            'confidence': emotion_probs[j][emotion_pred_id].item(),
                            'class_id': emotion_pred_id
                        }
                    }
                    
                    if return_probabilities:
                        result['sentiment']['probabilities'] = {
                            class_name: prob.item() for class_name, prob in 
                            zip(self.sentiment_encoder.classes_, sentiment_probs[j])
                        }
                        result['emotion']['probabilities'] = {
                            class_name: prob.item() for class_name, prob in 
                            zip(self.emotion_encoder.classes_, emotion_probs[j])
                        }
                    
                    results.append(result)
        
        return results

print("✅ Fixed predictor class created!")

✅ Fixed predictor class created!


In [40]:
# Cell: Run Evaluation with Fixed Predictor
def evaluate_model_final():
    """
    Final evaluation using the fixed predictor
    """
    print("🚀 Final Evaluation with Fixed Predictor")
    print("=" * 50)
    
    # Model paths
    model_path = "multitask_model_optimized/final_model"
    sentiment_encoder_path = "multitask_model_optimized/final_model/sentiment_encoder.pkl"
    emotion_encoder_path = "multitask_model_optimized/final_model/emotion_encoder.pkl"
    data_path = "annotated_reddit_posts.csv"
    
    # Load the model with FIXED predictor
    print("📥 Loading model with fixed predictor...")
    predictor = FixedMultiTaskPredictor(  # Using the new class!
        model_path=model_path,
        sentiment_encoder_path=sentiment_encoder_path,
        emotion_encoder_path=emotion_encoder_path
    )
    
    # Load the annotated data
    print("📥 Loading annotated data...")
    df = pd.read_csv(data_path)
    
    # Prepare data
    texts = df['text_content'].tolist()
    true_sentiments = df['sentiment'].tolist()
    true_emotions = df['emotion'].tolist()
    
    print(f"📊 Data loaded: {len(texts)} samples")
    
    # Run inference
    print("\n🔮 Running inference...")
    predictions = predictor.predict_batch(texts, batch_size=8)
    
    # Extract predictions
    pred_sentiments = [pred['sentiment']['label'] for pred in predictions]
    pred_emotions = [pred['emotion']['label'] for pred in predictions]
    
    # Calculate metrics
    print("\n📊 Calculating metrics...")
    sentiment_accuracy = accuracy_score(true_sentiments, pred_sentiments)
    sentiment_f1_macro = f1_score(true_sentiments, pred_sentiments, average='macro', zero_division=0)
    
    emotion_accuracy = accuracy_score(true_emotions, pred_emotions)
    emotion_f1_macro = f1_score(true_emotions, pred_emotions, average='macro', zero_division=0)
    
    # Print results
    print("\n🎯 FINAL EVALUATION RESULTS")
    print("=" * 50)
    print(f"📊 SENTIMENT CLASSIFICATION:")
    print(f"   Accuracy:    {sentiment_accuracy:.4f} ({sentiment_accuracy*100:.2f}%)")
    print(f"   Macro F1:    {sentiment_f1_macro:.4f}")
    
    print(f"\n😊 EMOTION CLASSIFICATION:")
    print(f"   Accuracy:    {emotion_accuracy:.4f} ({emotion_accuracy*100:.2f}%)")
    print(f"   Macro F1:    {emotion_f1_macro:.4f}")
    
    print(f"\n🏆 COMBINED PERFORMANCE:")
    print(f"   Average Accuracy: {(sentiment_accuracy + emotion_accuracy)/2:.4f}")
    print(f"   Average Macro F1: {(sentiment_f1_macro + emotion_f1_macro)/2:.4f}")
    
    # Show some example predictions
    print(f"\n📝 EXAMPLE PREDICTIONS:")
    for i in range(min(5, len(predictions))):
        correct_sent = "✅" if pred_sentiments[i] == true_sentiments[i] else "❌"
        correct_emot = "✅" if pred_emotions[i] == true_emotions[i] else "❌"
        
        print(f"\nSample {i+1}:")
        print(f"  Text: {texts[i][:80]}...")
        print(f"  True:  Sentiment={true_sentiments[i]}, Emotion={true_emotions[i]}")
        print(f"  Pred:  Sentiment={pred_sentiments[i]} {correct_sent}, Emotion={pred_emotions[i]} {correct_emot}")
        print(f"  Conf:  Sentiment={predictions[i]['sentiment']['confidence']:.3f}, Emotion={predictions[i]['emotion']['confidence']:.3f}")
    
    return {
        'sentiment_accuracy': sentiment_accuracy,
        'sentiment_f1_macro': sentiment_f1_macro,
        'emotion_accuracy': emotion_accuracy,
        'emotion_f1_macro': emotion_f1_macro,
        'predictions': predictions
    }

In [19]:
# Run the final evaluation
print("🔧 Running final evaluation...")
results = evaluate_model_final()

🔧 Running final evaluation...
🚀 Final Evaluation with Fixed Predictor
📥 Loading model with fixed predictor...
Model loaded from multitask_model_optimized/final_model
✅ Fixed Model loaded successfully!
Device: cuda
Sentiment classes: [np.str_('Negative'), np.str_('Neutral'), np.str_('Positive')]
Emotion classes: [np.str_('Anger'), np.str_('Fear'), np.str_('Joy'), np.str_('No Emotion'), np.str_('Sadness'), np.str_('Surprise')]
📥 Loading annotated data...
📊 Data loaded: 95 samples

🔮 Running inference...

📊 Calculating metrics...

🎯 FINAL EVALUATION RESULTS
📊 SENTIMENT CLASSIFICATION:
   Accuracy:    0.5895 (58.95%)
   Macro F1:    0.3723

😊 EMOTION CLASSIFICATION:
   Accuracy:    0.1474 (14.74%)
   Macro F1:    0.1260

🏆 COMBINED PERFORMANCE:
   Average Accuracy: 0.3684
   Average Macro F1: 0.2492

📝 EXAMPLE PREDICTIONS:

Sample 1:
  Text: ya screw username really looking forward note 7 iphone 6plus issue screen userna...
  True:  Sentiment=Negative, Emotion=Sadness
  Pred:  Sentiment=Ne

## BERTweet Training

In [25]:
# Cell: Fixed MultiTaskEvaluator for BERTweet
class FixedMultiTaskEvaluator:
    """
    Fixed evaluation class that handles different max_length values
    """
    
    def __init__(
        self,
        model: MultiTaskTransformer,
        tokenizer,
        sentiment_encoder: LabelEncoder,
        emotion_encoder: LabelEncoder,
        device: torch.device,
        max_length: int = 128  # Make it configurable
    ):
        self.model = model
        self.tokenizer = tokenizer
        self.sentiment_encoder = sentiment_encoder
        self.emotion_encoder = emotion_encoder
        self.device = device
        self.max_length = max_length
        
        self.model.eval()
    
    def evaluate_dataset(
        self,
        texts: List[str],
        sentiment_labels: List[int],
        emotion_labels: List[int],
        batch_size: int = 32
    ) -> Dict[str, any]:
        """
        Evaluate model on a dataset with fixed max_length
        """
        dataset = MultiTaskDataset(
            texts=texts,
            sentiment_labels=sentiment_labels,
            emotion_labels=emotion_labels,
            tokenizer=self.tokenizer,
            max_length=self.max_length,  # Use the same max_length as training
            sentiment_label_encoder=self.sentiment_encoder,
            emotion_label_encoder=self.emotion_encoder
        )
        
        dataloader = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=0
        )
        
        sentiment_predictions = []
        emotion_predictions = []
        sentiment_true_labels = []
        emotion_true_labels = []
        
        with torch.no_grad():
            for batch in dataloader:
                # Only pass the required inputs to avoid token_type_ids issue
                model_inputs = {
                    'input_ids': batch['input_ids'].to(self.device),
                    'attention_mask': batch['attention_mask'].to(self.device)
                }
                
                outputs = self.model(**model_inputs)
                
                # Get predictions
                sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
                emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
                
                # Store results
                sentiment_predictions.extend(sentiment_preds.cpu().numpy())
                emotion_predictions.extend(emotion_preds.cpu().numpy())
                sentiment_true_labels.extend(batch['sentiment_labels'].numpy())
                emotion_true_labels.extend(batch['emotion_labels'].numpy())
        
        # Calculate metrics
        results = self._calculate_metrics(
            sentiment_predictions=sentiment_predictions,
            emotion_predictions=emotion_predictions,
            sentiment_true_labels=sentiment_true_labels,
            emotion_true_labels=emotion_true_labels
        )
        
        return results
    
    def _calculate_metrics(
        self,
        sentiment_predictions: List[int],
        emotion_predictions: List[int],
        sentiment_true_labels: List[int],
        emotion_true_labels: List[int]
    ) -> Dict[str, any]:
        """Calculate simplified metrics: only accuracy and macro F1"""
        
        # Sentiment metrics
        sentiment_accuracy = accuracy_score(sentiment_true_labels, sentiment_predictions)
        sentiment_f1_macro = f1_score(sentiment_true_labels, sentiment_predictions, average='macro', zero_division=0)
        
        # Emotion metrics
        emotion_accuracy = accuracy_score(emotion_true_labels, emotion_predictions)
        emotion_f1_macro = f1_score(emotion_true_labels, emotion_predictions, average='macro', zero_division=0)
        
        return {
            'sentiment': {
                'accuracy': sentiment_accuracy,
                'f1_macro': sentiment_f1_macro,
                'predictions': sentiment_predictions,
                'true_labels': sentiment_true_labels
            },
            'emotion': {
                'accuracy': emotion_accuracy,
                'f1_macro': emotion_f1_macro,
                'predictions': emotion_predictions,
                'true_labels': emotion_true_labels
            },
            'combined': {
                'average_accuracy': (sentiment_accuracy + emotion_accuracy) / 2,
                'average_f1': (sentiment_f1_macro + emotion_f1_macro) / 2
            }
        }

print("✅ Fixed MultiTaskEvaluator defined!")

✅ Fixed MultiTaskEvaluator defined!


In [26]:
# Cell: Fixed BERTweet Training Function
def run_ultra_lightweight_training_fixed(
    reddit_data_path: str = "annotated_reddit_posts.csv",
    model_name: str = "vinai/bertweet-base",
    output_dir: str = "./bertweet_model_ultra_light",
    max_external_samples: int = 500
):
    """
    Fixed ultra-lightweight training for BERTweet
    """
    print("🚀 Starting Fixed Ultra-Lightweight Training")
    print("=" * 50)
    
    # Aggressive memory cleanup first
    aggressive_memory_cleanup()
    
    # Load external datasets
    print("\n1️⃣ Loading minimal external datasets...")
    sentiment_data, emotion_data = load_external_datasets()
    
    # Prepare very small external data
    external_data_splits, sentiment_encoder, emotion_encoder = prepare_external_data_for_multitask(
        sentiment_data, emotion_data, max_samples=max_external_samples
    )
    
    # Load Reddit data
    print("\n2️⃣ Loading Reddit data...")
    reddit_df = pd.read_csv(reddit_data_path)
    reddit_evaluation_data = prepare_reddit_data_for_evaluation(
        reddit_df, sentiment_encoder, emotion_encoder
    )
    
    # Ultra-lightweight config with consistent max_length
    config = TrainingConfig(
        model_name=model_name,
        output_dir=output_dir,
        num_epochs=2,       # Minimal epochs
        batch_size=1,       # Smallest possible batch
        learning_rate=2e-5,
        warmup_ratio=0.05,  # Minimal warmup
        weight_decay=0.01,
        max_grad_norm=1.0,
        alpha=0.5,
        adaptive_alpha=False,  # Disable to save memory
        hidden_dropout_prob=0.1,
        attention_dropout_prob=0.1,
        classifier_dropout=0.1,
        max_length=128,     # Keep consistent max_length
        save_total_limit=1  # Keep only 1 checkpoint
    )
    
    print(f"\n3️⃣ Ultra-lightweight config:")
    print(f"  Model: {model_name}")
    print(f"  Batch size: {config.batch_size}")
    print(f"  Max length: {config.max_length}")
    print(f"  Training samples: {len(external_data_splits['train']['texts'])}")
    print(f"  Epochs: {config.num_epochs}")
    
    # Clear memory before model
    aggressive_memory_cleanup()
    
    # Initialize trainer
    print(f"\n4️⃣ Initializing trainer...")
    trainer = MultiTaskTrainer(
        config=config,
        sentiment_num_classes=len(sentiment_encoder.classes_),
        emotion_num_classes=len(emotion_encoder.classes_)
    )
    
    # Setup with gradient checkpointing
    print(f"\n5️⃣ Setting up with memory optimizations...")
    trainer.setup(external_data_splits, sentiment_encoder, emotion_encoder)
    
    # Enable gradient checkpointing to save memory
    if hasattr(trainer.model.shared_encoder, 'gradient_checkpointing_enable'):
        trainer.model.shared_encoder.gradient_checkpointing_enable()
        print("✅ Gradient checkpointing enabled")
    
    # Train with memory monitoring
    print(f"\n6️⃣ Training with memory monitoring...")
    try:
        history = trainer.train()
        print("✅ Training completed!")
    except RuntimeError as e:
        if "out of memory" in str(e):
            print(f"❌ Still out of memory: {e}")
            print("💡 Try restarting kernel and using even smaller batch_size=1")
            return None, None
        else:
            raise e
    
    # Clear memory before evaluation
    aggressive_memory_cleanup()
    
    # Evaluate with FIXED evaluator
    print(f"\n7️⃣ Evaluating with fixed evaluator...")
    evaluator = FixedMultiTaskEvaluator(  # Use the fixed evaluator
        model=trainer.model,
        tokenizer=trainer.tokenizer,
        sentiment_encoder=sentiment_encoder,
        emotion_encoder=emotion_encoder,
        device=device,
        max_length=config.max_length  # Use the same max_length as training
    )
    
    reddit_results = evaluator.evaluate_dataset(
        texts=reddit_evaluation_data['texts'],
        sentiment_labels=reddit_evaluation_data['sentiment_labels'],
        emotion_labels=reddit_evaluation_data['emotion_labels'],
        batch_size=1  # Ultra small batch for evaluation
    )
    
    # Save model
    save_model_and_encoders(
        model=trainer.model,
        tokenizer=trainer.tokenizer,
        sentiment_encoder=sentiment_encoder,
        emotion_encoder=emotion_encoder,
        output_dir=os.path.join(output_dir, 'final_model')
    )
    
    # Print results
    print(f"\n📈 Results:")
    print(f"Sentiment Accuracy: {reddit_results['sentiment']['accuracy']:.4f}")
    print(f"Emotion Accuracy: {reddit_results['emotion']['accuracy']:.4f}")
    
    # Final cleanup
    aggressive_memory_cleanup()
    
    return trainer.model, reddit_results

print("✅ Fixed ultra-lightweight training function ready!")

✅ Fixed ultra-lightweight training function ready!


In [23]:
# Cell: Restart BERTweet Training with Fixed Function
print("🚀 Restarting BERTweet Training with Fixed Function")
print("=" * 60)

# Clear memory first
aggressive_memory_cleanup()

# Run fixed ultra-lightweight training with BERTweet
print("🔥 Step 1: Fixed Ultra-Lightweight BERTweet Training")
print("-" * 50)

bertweet_model, bertweet_results = run_ultra_lightweight_training_fixed(
    reddit_data_path="annotated_reddit_posts.csv",
    model_name="vinai/bertweet-base",  # Using BERTweet instead of DeBERTa
    output_dir="./bertweet_model_ultra_light",
    max_external_samples=500  # Small training set for initial training
)

if bertweet_model is not None:
    print("\n🎉 BERTweet Initial Training Successful!")
    print(f"📊 Initial Results:")
    print(f"   Sentiment Accuracy: {bertweet_results['sentiment']['accuracy']:.4f}")
    print(f"   Emotion Accuracy: {bertweet_results['emotion']['accuracy']:.4f}")
    print(f"   Combined Score: {(bertweet_results['sentiment']['accuracy'] + bertweet_results['emotion']['accuracy'])/2:.4f}")
else:
    print("\n❌ Initial training failed!")
    print("💡 Troubleshooting options:")
    print("1. Restart kernel completely")
    print("2. Use CPU training: device = torch.device('cpu')")
    print("3. Reduce batch size further")

🚀 Restarting BERTweet Training with Fixed Function
🧹 Memory cleaned!
  Allocated: 2.16 GB
  Cached: 2.92 GB
🔥 Step 1: Fixed Ultra-Lightweight BERTweet Training
--------------------------------------------------
🚀 Starting Fixed Ultra-Lightweight Training
🧹 Memory cleaned!
  Allocated: 2.16 GB
  Cached: 2.35 GB

1️⃣ Loading minimal external datasets...
Loading external datasets for training...
✅ SST-2 dataset loaded: 67349 train, 872 val
✅ GoEmotions dataset loaded: 43410 train, 5426 val
🔄 Preparing external datasets for multitask training...
✅ External data prepared:
  Train samples: 400
  Validation samples: 100
  Sentiment classes: [np.str_('Negative'), np.str_('Neutral'), np.str_('Positive')]
  Emotion classes: [np.str_('Anger'), np.str_('Fear'), np.str_('Joy'), np.str_('No Emotion'), np.str_('Sadness'), np.str_('Surprise')]

📈 Training set class distribution:
  Sentiment 'Negative': 192 samples
  Sentiment 'Neutral': 24 samples
  Sentiment 'Positive': 184 samples
  Emotion 'Anger':

In [42]:
# Cell: Import Required Libraries for BERTweet Hyperparameter Tuning
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
import os
import shutil


In [44]:
# Cell: Fixed Hyperparameter Tuning for BERTweet (With Imports)
class BERTweetHyperparameterTuner:
    """
    Hyperparameter tuning for BERTweet using the same Bayesian optimization method as DeBERTa
    """
    
    def __init__(
        self,
        reddit_data_path: str,
        n_trials: int = 15,
        model_name: str = "vinai/bertweet-base",
        max_external_samples: int = 2000
    ):
        self.reddit_data_path = reddit_data_path
        self.n_trials = n_trials
        self.model_name = model_name
        self.max_external_samples = max_external_samples
        
        # Load external datasets for training (same as DeBERTa)
        print("🔄 Loading external datasets for BERTweet hyperparameter tuning...")
        sentiment_data, emotion_data = load_external_datasets()
        
        # Prepare external data splits
        self.external_data_splits, self.sentiment_encoder, self.emotion_encoder = prepare_external_data_for_multitask(
            sentiment_data, emotion_data, max_samples=max_external_samples
        )
        
        # Load Reddit data for evaluation
        reddit_df = pd.read_csv(reddit_data_path)
        self.reddit_evaluation_data = prepare_reddit_data_for_evaluation(
            reddit_df, self.sentiment_encoder, self.emotion_encoder
        )
        
        print(f"✅ BERTweet Hyperparameter tuner initialized")
        print(f"Model: {model_name}")
        print(f"Training data: {len(self.external_data_splits['train']['texts'])} external samples")
        print(f"Evaluation data: {len(self.reddit_evaluation_data['texts'])} Reddit samples")
        print(f"Trials: {n_trials}")
    
    def objective(self, trial):
        """Optuna objective function for BERTweet - same as DeBERTa"""
        
        # Sample hyperparameters - same ranges as DeBERTa
        learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-4, log=True)
        batch_size = trial.suggest_categorical('batch_size', [2, 4, 8])
        alpha = trial.suggest_float('alpha', 0.3, 0.7)
        hidden_dropout = trial.suggest_float('hidden_dropout_prob', 0.05, 0.3)
        classifier_dropout = trial.suggest_float('classifier_dropout', 0.1, 0.4)
        weight_decay = trial.suggest_float('weight_decay', 0.001, 0.1)
        warmup_ratio = trial.suggest_float('warmup_ratio', 0.05, 0.2)
        num_epochs = trial.suggest_int('num_epochs', 2, 5)
        max_length = trial.suggest_categorical('max_length', [128, 256])
        
        # Create configuration
        config = TrainingConfig(
            model_name=self.model_name,  # BERTweet model
            batch_size=batch_size,
            learning_rate=learning_rate,
            num_epochs=num_epochs,
            warmup_ratio=warmup_ratio,
            weight_decay=weight_decay,
            alpha=alpha,
            hidden_dropout_prob=hidden_dropout,
            classifier_dropout=classifier_dropout,
            max_length=max_length,
            adaptive_alpha=False,
            output_dir=f"./temp_bertweet_trial_{trial.number}",
            save_strategy="no",
            save_total_limit=1
        )
        
        try:
            # Clear memory before trial
            aggressive_memory_cleanup()
            
            # Initialize trainer
            trainer = MultiTaskTrainer(
                config=config,
                sentiment_num_classes=len(self.sentiment_encoder.classes_),
                emotion_num_classes=len(self.emotion_encoder.classes_)
            )
            
            # Setup with external data
            trainer.setup(self.external_data_splits, self.sentiment_encoder, self.emotion_encoder)
            
            # Train model on external data
            history = trainer.train()
            
            # Clear memory before evaluation
            aggressive_memory_cleanup()
            
            # Evaluate on Reddit data using FIXED evaluator
            evaluator = FixedMultiTaskEvaluator(
                model=trainer.model,
                tokenizer=trainer.tokenizer,
                sentiment_encoder=self.sentiment_encoder,
                emotion_encoder=self.emotion_encoder,
                device=device,
                max_length=config.max_length  # Use same max_length as training
            )
            
            reddit_results = evaluator.evaluate_dataset(
                texts=self.reddit_evaluation_data['texts'],
                sentiment_labels=self.reddit_evaluation_data['sentiment_labels'],
                emotion_labels=self.reddit_evaluation_data['emotion_labels'],
                batch_size=2  # Small batch for evaluation
            )
            
            # Combined score based on Reddit evaluation
            combined_score = (
                reddit_results['sentiment']['accuracy'] + 
                reddit_results['emotion']['accuracy']
            ) / 2
            
            print(f"Trial {trial.number}: Combined Score = {combined_score:.4f} "
                  f"(Sentiment: {reddit_results['sentiment']['accuracy']:.4f}, "
                  f"Emotion: {reddit_results['emotion']['accuracy']:.4f})")
            
            # Clean up
            del trainer, evaluator
            aggressive_memory_cleanup()
            
            return combined_score
            
        except Exception as e:
            print(f"Trial {trial.number} failed: {e}")
            aggressive_memory_cleanup()
            return 0.0
    
    def tune(self) -> optuna.Study:
        """Run hyperparameter optimization - same method as DeBERTa"""
        
        study = optuna.create_study(
            direction='maximize',
            sampler=TPESampler(seed=42),
            pruner=MedianPruner(n_startup_trials=3, n_warmup_steps=2)
        )
        
        print(f"🔍 Starting BERTweet hyperparameter optimization...")
        print(f"This will run {self.n_trials} trials - each training and evaluating a model")
        print("=" * 60)
        
        study.optimize(self.objective, n_trials=self.n_trials)
        
        # Print results
        print(f"\n🏆 BERTweet Optimization completed!")
        print(f"Best trial: {study.best_trial.number}")
        print(f"Best combined score: {study.best_value:.4f}")
        print(f"Best parameters:")
        for key, value in study.best_params.items():
            print(f"  {key}: {value}")
        
        return study

print("✅ BERTweet hyperparameter tuning class defined!")

✅ BERTweet hyperparameter tuning class defined!


In [None]:
# Cell: Run BERTweet Hyperparameter Tuning
print("\n🔍 Step 2: BERTweet Hyperparameter Optimization (Same Method as DeBERTa)")
print("-" * 50)

# Clear memory first
aggressive_memory_cleanup()

# Run BERTweet hyperparameter tuning using the same Bayesian optimization method
print("🔍 Starting BERTweet hyperparameter optimization...")

try:
    # Initialize BERTweet tuner
    bertweet_tuner = BERTweetHyperparameterTuner(
        reddit_data_path="annotated_reddit_posts.csv",
        n_trials=15,  # Same number of trials as DeBERTa
        model_name="vinai/bertweet-base",
        max_external_samples=2000
    )
    
    # Run the same Bayesian optimization process
    bertweet_study = bertweet_tuner.tune()
    
    print("\n✅ BERTweet hyperparameter optimization completed!")
    
except Exception as e:
    print(f"❌ BERTweet hyperparameter optimization failed: {e}")
    print("💡 This might be due to memory constraints or other issues")
    bertweet_study = None


🔍 Step 2: BERTweet Hyperparameter Optimization (Same Method as DeBERTa)
--------------------------------------------------
🧹 Memory cleaned!
  Allocated: 0.02 GB
  Cached: 2.35 GB
🔍 Starting BERTweet hyperparameter optimization...
🔄 Loading external datasets for BERTweet hyperparameter tuning...
Loading external datasets for training...
✅ SST-2 dataset loaded: 67349 train, 872 val
✅ GoEmotions dataset loaded: 43410 train, 5426 val
🔄 Preparing external datasets for multitask training...
✅ External data prepared:
  Train samples: 1600
  Validation samples: 400
  Sentiment classes: [np.str_('Negative'), np.str_('Neutral'), np.str_('Positive')]
  Emotion classes: [np.str_('Anger'), np.str_('Fear'), np.str_('Joy'), np.str_('No Emotion'), np.str_('Sadness'), np.str_('Surprise')]

📈 Training set class distribution:
  Sentiment 'Negative': 721 samples
  Sentiment 'Neutral': 101 samples
  Sentiment 'Positive': 778 samples
  Emotion 'Anger': 522 samples
  Emotion 'Fear': 267 samples
  Emotion '

[I 2025-07-12 11:28:39,047] A new study created in memory with name: no-name-55e08ef5-017c-4c93-8c3f-fc8a65e3be63


🔍 Starting BERTweet hyperparameter optimization...
This will run 15 trials - each training and evaluating a model
🧹 Memory cleaned!
  Allocated: 0.02 GB
  Cached: 0.04 GB


[I 2025-07-12 11:28:42,250] Trial 0 finished with value: 0.0 and parameters: {'learning_rate': 2.368863950364079e-05, 'batch_size': 2, 'alpha': 0.36240745617697456, 'hidden_dropout_prob': 0.08899863008405066, 'classifier_dropout': 0.11742508365045984, 'weight_decay': 0.08675143843171859, 'warmup_ratio': 0.14016725176148134, 'num_epochs': 4, 'max_length': 256}. Best is trial 0 with value: 0.0.


✅ Setup complete!
  Model: vinai/bertweet-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 800
  Total training steps: 3200
🚀 Starting training for 4 epochs...

📍 Epoch 1/4
--------------------------------------------------
Trial 0 failed: The expanded size of the tensor (256) must match the existing size (130) at non-singleton dimension 1.  Target sizes: [2, 256].  Tensor sizes: [1, 130]
🧹 Memory cleaned!
  Allocated: 0.56 GB
  Cached: 0.60 GB
🧹 Memory cleaned!
  Allocated: 0.02 GB
  Cached: 0.04 GB


[I 2025-07-12 11:28:45,078] Trial 1 finished with value: 0.0 and parameters: {'learning_rate': 6.798962421591133e-05, 'batch_size': 2, 'alpha': 0.4216968971838151, 'hidden_dropout_prob': 0.18118910790805948, 'classifier_dropout': 0.22958350559263474, 'weight_decay': 0.029831684879606152, 'warmup_ratio': 0.14177793420835694, 'num_epochs': 2, 'max_length': 256}. Best is trial 0 with value: 0.0.


✅ Setup complete!
  Model: vinai/bertweet-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 800
  Total training steps: 1600
🚀 Starting training for 2 epochs...

📍 Epoch 1/2
--------------------------------------------------
Trial 1 failed: The expanded size of the tensor (256) must match the existing size (130) at non-singleton dimension 1.  Target sizes: [2, 256].  Tensor sizes: [1, 130]
🧹 Memory cleaned!
  Allocated: 0.56 GB
  Cached: 0.60 GB
🧹 Memory cleaned!
  Allocated: 0.02 GB
  Cached: 0.04 GB
✅ Setup complete!
  Model: vinai/bertweet-base
  Training samples: 1600
  Validation samples: 400
  Training steps per epoch: 800
  Total training steps: 4000
🚀 Starting training for 5 epochs...

📍 Epoch 1/5
--------------------------------------------------
  Batch 20/800 | Loss: 2.2177 | LR: 2.39e-06 | Alpha: 0.537
  Batch 40/800 | Loss: 2.1162 | LR: 4.78e-06 | Alpha: 0.537
  Batch 60/800 | Loss: 2.0796 | LR: 7.18e-06 | Alpha: 0.537
  Batch 80/800 | Los