In [22]:
# Cell 1: Imports and Configuration
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.optim import AdamW
from torch.optim.lr_scheduler import LambdaLR
import numpy as np
import pandas as pd
import json
import os
from typing import Dict, List, Tuple, Optional, Union
import random
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from transformers import (
    AutoTokenizer, AutoModel, AutoConfig,
    get_linear_schedule_with_warmup
)
import warnings
warnings.filterwarnings('ignore')

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")

Using device: cuda
GPU: NVIDIA GeForce RTX 4060
CUDA Version: 12.1


In [23]:
# Cell 2: Multitask Model Architecture
class MultiTaskTransformer(nn.Module):
    """
    Multitask Learning Framework for Sentiment and Emotion Classification
    
    Features:
    - Shared transformer encoder (BERTweet, DeBERTa)
    - Task-specific attention heads
    - Parallel classification heads
    - Dropout for regularization
    """
    
    def __init__(
        self,
        model_name: str = "microsoft/deberta-base",
        sentiment_num_classes: int = 3,
        emotion_num_classes: int = 6,
        hidden_dropout_prob: float = 0.1,
        attention_dropout_prob: float = 0.1,
        classifier_dropout: float = 0.1,
        freeze_encoder: bool = False
    ):
        super(MultiTaskTransformer, self).__init__()
        
        self.model_name = model_name
        self.sentiment_num_classes = sentiment_num_classes
        self.emotion_num_classes = emotion_num_classes
        
        # Load configuration and adjust dropout
        config = AutoConfig.from_pretrained(model_name)
        config.hidden_dropout_prob = hidden_dropout_prob
        config.attention_probs_dropout_prob = attention_dropout_prob
        
        # Shared transformer encoder
        self.shared_encoder = AutoModel.from_pretrained(
            model_name,
            config=config,
            ignore_mismatched_sizes=True
        )
        
        # Freeze encoder if specified
        if freeze_encoder:
            for param in self.shared_encoder.parameters():
                param.requires_grad = False
        
        hidden_size = self.shared_encoder.config.hidden_size
        
        # Task-specific attention layers
        self.sentiment_attention = nn.MultiheadAttention(
            embed_dim=hidden_size,
            num_heads=8,
            dropout=attention_dropout_prob,
            batch_first=True
        )
        
        self.emotion_attention = nn.MultiheadAttention(
            embed_dim=hidden_size,
            num_heads=8,
            dropout=attention_dropout_prob,
            batch_first=True
        )
        
        # Shared attention for common features
        self.shared_attention = nn.MultiheadAttention(
            embed_dim=hidden_size,
            num_heads=8,
            dropout=attention_dropout_prob,
            batch_first=True
        )
        
        # Layer normalization
        self.sentiment_norm = nn.LayerNorm(hidden_size)
        self.emotion_norm = nn.LayerNorm(hidden_size)
        self.shared_norm = nn.LayerNorm(hidden_size)
        
        # Dropout layers
        self.sentiment_dropout = nn.Dropout(classifier_dropout)
        self.emotion_dropout = nn.Dropout(classifier_dropout)
        self.shared_dropout = nn.Dropout(classifier_dropout)
        
        # Classification heads
        self.sentiment_classifier = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),  # *2 for shared + task-specific
            nn.ReLU(),
            nn.Dropout(classifier_dropout),
            nn.Linear(hidden_size, sentiment_num_classes)
        )
        
        self.emotion_classifier = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),  # *2 for shared + task-specific
            nn.ReLU(),
            nn.Dropout(classifier_dropout),
            nn.Linear(hidden_size, emotion_num_classes)
        )
        
        # Initialize weights
        self._init_weights()
    
    def _init_weights(self):
        """Initialize classification head weights"""
        for module in [self.sentiment_classifier, self.emotion_classifier]:
            for layer in module:
                if isinstance(layer, nn.Linear):
                    nn.init.xavier_uniform_(layer.weight)
                    nn.init.zeros_(layer.bias)
    
    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask: torch.Tensor,
        task: Optional[str] = None
    ) -> Dict[str, torch.Tensor]:
        """
        Forward pass
        
        Args:
            input_ids: Token IDs [batch_size, seq_len]
            attention_mask: Attention mask [batch_size, seq_len]
            task: Optional task specification ("sentiment", "emotion", or None for both)
        
        Returns:
            Dictionary containing logits for requested tasks
        """
        # Shared encoder
        encoder_outputs = self.shared_encoder(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=True
        )
        
        # Get sequence output [batch_size, seq_len, hidden_size]
        sequence_output = encoder_outputs.last_hidden_state
        
        # Apply shared attention to capture common linguistic features
        shared_attended, _ = self.shared_attention(
            sequence_output, sequence_output, sequence_output,
            key_padding_mask=~attention_mask.bool()
        )
        shared_attended = self.shared_norm(shared_attended + sequence_output)
        shared_attended = self.shared_dropout(shared_attended)
        
        # Pool shared features (use [CLS] token or mean pooling)
        shared_pooled = shared_attended[:, 0, :]  # [CLS] token
        
        outputs = {}
        
        # Sentiment branch
        if task is None or task == "sentiment":
            # Task-specific attention for sentiment
            sentiment_attended, sentiment_weights = self.sentiment_attention(
                sequence_output, sequence_output, sequence_output,
                key_padding_mask=~attention_mask.bool()
            )
            sentiment_attended = self.sentiment_norm(sentiment_attended + sequence_output)
            sentiment_attended = self.sentiment_dropout(sentiment_attended)
            
            # Pool sentiment features
            sentiment_pooled = sentiment_attended[:, 0, :]  # [CLS] token
            
            # Combine shared and task-specific features
            sentiment_features = torch.cat([shared_pooled, sentiment_pooled], dim=-1)
            
            # Sentiment classification
            sentiment_logits = self.sentiment_classifier(sentiment_features)
            outputs["sentiment_logits"] = sentiment_logits
            outputs["sentiment_attention_weights"] = sentiment_weights
        
        # Emotion branch
        if task is None or task == "emotion":
            # Task-specific attention for emotion
            emotion_attended, emotion_weights = self.emotion_attention(
                sequence_output, sequence_output, sequence_output,
                key_padding_mask=~attention_mask.bool()
            )
            emotion_attended = self.emotion_norm(emotion_attended + sequence_output)
            emotion_attended = self.emotion_dropout(emotion_attended)
            
            # Pool emotion features
            emotion_pooled = emotion_attended[:, 0, :]  # [CLS] token
            
            # Combine shared and task-specific features
            emotion_features = torch.cat([shared_pooled, emotion_pooled], dim=-1)
            
            # Emotion classification
            emotion_logits = self.emotion_classifier(emotion_features)
            outputs["emotion_logits"] = emotion_logits
            outputs["emotion_attention_weights"] = emotion_weights
        
        return outputs

# Model configuration options
MODEL_CONFIGS = {
    "bertweet": {
        "name": "vinai/bertweet-base",
        "description": "BERTweet optimized for social media text"
    },
    "deberta": {
        "name": "microsoft/deberta-base",
        "description": "DeBERTa with enhanced attention mechanism"
    }
}

print("✅ Multitask model architecture defined!")
print("Available models:", list(MODEL_CONFIGS.keys()))

✅ Multitask model architecture defined!
Available models: ['bertweet', 'deberta']


In [None]:
# Cell 3: Updated Dataset Class and External Data Loading (FIXED)
from datasets import load_dataset, Dataset as HFDataset
from collections import Counter

class MultiTaskDataset(Dataset):
    """
    Dataset class for multitask learning with sentiment and emotion labels
    """
    
    def __init__(
        self,
        texts: List[str],
        sentiment_labels: List[int],
        emotion_labels: List[int],
        tokenizer,
        max_length: int = 512,
        sentiment_label_encoder=None,
        emotion_label_encoder=None
    ):
        self.texts = texts
        self.sentiment_labels = sentiment_labels
        self.emotion_labels = emotion_labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.sentiment_label_encoder = sentiment_label_encoder
        self.emotion_label_encoder = emotion_label_encoder
        
        # Validate data
        assert len(texts) == len(sentiment_labels) == len(emotion_labels), \
            "All inputs must have the same length"
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        sentiment_label = self.sentiment_labels[idx]
        emotion_label = self.emotion_labels[idx]
        
        # Tokenize text
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'sentiment_labels': torch.tensor(sentiment_label, dtype=torch.long),
            'emotion_labels': torch.tensor(emotion_label, dtype=torch.long),
            'text': text
        }

def load_external_datasets() -> Tuple[Dict, Dict]:
    """
    Load external datasets for training (SST-2 and GoEmotions)
    """
    print("📁 Loading external datasets for training...")
    
    # Load SST-2 for sentiment
    try:
        sst2_dataset = load_dataset("sst2")
        sentiment_data = {
            'train': sst2_dataset['train'],
            'validation': sst2_dataset['validation']
        }
        print(f"✅ SST-2 dataset loaded: {len(sentiment_data['train'])} train, {len(sentiment_data['validation'])} val")
    except Exception as e:
        print(f"⚠️ Could not load SST-2: {e}. Using dummy data.")
        sentiment_data = _create_dummy_sentiment_data()
    
    # Load GoEmotions for emotion
    try:
        emotions_dataset = load_dataset("go_emotions", "simplified")
        emotion_data = {
            'train': emotions_dataset['train'],
            'validation': emotions_dataset['validation']
        }
        print(f"✅ GoEmotions dataset loaded: {len(emotion_data['train'])} train, {len(emotion_data['validation'])} val")
    except Exception as e:
        print(f"⚠️ Could not load GoEmotions: {e}. Using dummy data.")
        emotion_data = _create_dummy_emotion_data()
    
    return sentiment_data, emotion_data

def _create_dummy_sentiment_data() -> Dict:
    """Create dummy sentiment data for testing"""
    dummy_texts = [
        "I love this product!", "This is terrible", "It's okay",
        "Amazing quality", "Worst experience ever", "Not bad"
    ] * 200
    dummy_labels = [1, 0, 1, 1, 0, 1] * 200
    
    dummy_data = {
        'sentence': dummy_texts,
        'label': dummy_labels
    }
    
    dataset = HFDataset.from_dict(dummy_data)
    return {'train': dataset, 'validation': dataset.select(range(200))}

def _create_dummy_emotion_data() -> Dict:
    """Create dummy emotion data for testing"""
    dummy_texts = [
        "I'm so happy!", "This is sad", "I'm angry", "That's scary",
        "What a surprise!", "This is neutral", "I love this!", "Great stuff"
    ] * 200
    dummy_labels = [0, 1, 2, 3, 4, 5, 0, 0] * 200  # Map to 6 classes
    
    dummy_data = {
        'text': dummy_texts,
        'labels': dummy_labels
    }
    
    dataset = HFDataset.from_dict(dummy_data)
    return {'train': dataset, 'validation': dataset.select(range(200))}

def prepare_external_data_for_multitask(
    sentiment_data: Dict,
    emotion_data: Dict,
    max_samples: int = 10000
) -> Tuple[Dict, LabelEncoder, LabelEncoder]:
    """
    Prepare external datasets for multitask training
    """
    print("🔄 Preparing external datasets for multitask training...")
    
    # Filter emotion data to first 6 classes only (to match your Reddit data)
    def filter_emotion_classes(example):
        # Handle both single-label and multi-label
        if isinstance(example['labels'], list):
            return example['labels'] and example['labels'][0] in range(6)
        else:
            return example['labels'] in range(6)
    
    emotion_data['train'] = emotion_data['train'].filter(filter_emotion_classes)
    emotion_data['validation'] = emotion_data['validation'].filter(filter_emotion_classes)
    
    # Extract texts and labels
    # Sentiment (SST-2)
    sentiment_texts = sentiment_data['train']['sentence'][:max_samples]
    sentiment_labels = sentiment_data['train']['label'][:max_samples]
    
    # Emotion (GoEmotions) 
    emotion_texts = emotion_data['train']['text'][:max_samples]
    emotion_labels_raw = emotion_data['train']['labels'][:max_samples]
    
    # Handle multi-label to single-label conversion for emotions
    emotion_labels = []
    for label in emotion_labels_raw:
        if isinstance(label, list):
            emotion_labels.append(label[0] if label else 0)
        else:
            emotion_labels.append(label)
    
    # Create label encoders based on your Reddit data classes
    sentiment_encoder = LabelEncoder()
    emotion_encoder = LabelEncoder()
    
    # Fit with the classes that match your Reddit data
    # SST-2: 0=negative, 1=positive. We need: Negative, Neutral, Positive
    # Map SST labels to 3-class: 0->0 (Negative), 1->2 (Positive), add 1 (Neutral) artificially
    sentiment_encoder.classes_ = np.array(['Negative', 'Neutral', 'Positive'])
    
    # GoEmotions: Map to your 6 classes
    emotion_encoder.classes_ = np.array(['Anger', 'Fear', 'Joy', 'No Emotion', 'Sadness', 'Surprise'])
    
    # Convert SST labels: 0->0 (Negative), 1->2 (Positive)
    # We'll add some neutral examples by randomly converting some to class 1
    converted_sentiment_labels = []
    for label in sentiment_labels:
        if label == 0:  # Negative
            converted_sentiment_labels.append(0)
        elif label == 1:  # Positive
            # Randomly assign some as neutral (class 1) to have all 3 classes
            if np.random.random() < 0.1:  # 10% chance
                converted_sentiment_labels.append(1)  # Neutral
            else:
                converted_sentiment_labels.append(2)  # Positive
    
    # Ensure we have all 3 sentiment classes
    if 1 not in converted_sentiment_labels:
        # Force some examples to be neutral
        neutral_indices = np.random.choice(len(converted_sentiment_labels), size=50, replace=False)
        for idx in neutral_indices:
            converted_sentiment_labels[idx] = 1
    
    # Balance the datasets - use minimum length
    min_length = min(len(sentiment_texts), len(emotion_texts))
    
    final_texts = sentiment_texts[:min_length]
    final_sentiment_labels = converted_sentiment_labels[:min_length]
    final_emotion_labels = emotion_labels[:min_length]
    
    # Create train/val splits
    split_idx = int(0.8 * min_length)
    
    data_splits = {
        'train': {
            'texts': final_texts[:split_idx],
            'sentiment_labels': final_sentiment_labels[:split_idx],
            'emotion_labels': final_emotion_labels[:split_idx]
        },
        'val': {
            'texts': final_texts[split_idx:],
            'sentiment_labels': final_sentiment_labels[split_idx:],
            'emotion_labels': final_emotion_labels[split_idx:]
        }
    }
    
    print(f"✅ External data prepared:")
    print(f"  Train samples: {len(data_splits['train']['texts'])}")
    print(f"  Validation samples: {len(data_splits['val']['texts'])}")
    print(f"  Sentiment classes: {list(sentiment_encoder.classes_)}")
    print(f"  Emotion classes: {list(emotion_encoder.classes_)}")
    
    # Print class distribution
    train_sentiment_counts = Counter(data_splits['train']['sentiment_labels'])
    train_emotion_counts = Counter(data_splits['train']['emotion_labels'])
    
    print(f"\n📈 Training set class distribution:")
    for i, class_name in enumerate(sentiment_encoder.classes_):
        count = train_sentiment_counts.get(i, 0)
        print(f"  Sentiment '{class_name}': {count} samples")
    
    for i, class_name in enumerate(emotion_encoder.classes_):
        count = train_emotion_counts.get(i, 0)
        print(f"  Emotion '{class_name}': {count} samples")
    
    return data_splits, sentiment_encoder, emotion_encoder

def prepare_reddit_data_for_evaluation(
    df: pd.DataFrame,
    sentiment_encoder: LabelEncoder,
    emotion_encoder: LabelEncoder,
    sentiment_column: str = 'sentiment',
    emotion_column: str = 'emotion',
    text_column: str = 'text_content'
) -> Dict:
    """
    Prepare Reddit data for evaluation only (not training)
    """
    print("🔄 Preparing Reddit data for evaluation...")
    
    # Extract data
    texts = df[text_column].tolist()
    sentiment_labels_text = df[sentiment_column].tolist()
    emotion_labels_text = df[emotion_column].tolist()
    
    # Transform labels using pre-fitted encoders
    try:
        sentiment_labels = sentiment_encoder.transform(sentiment_labels_text)
    except ValueError as e:
        print(f"⚠️ Sentiment label mismatch: {e}")
        # Handle unknown labels by mapping them to existing classes
        sentiment_labels = []
        for label in sentiment_labels_text:
            if label in sentiment_encoder.classes_:
                sentiment_labels.append(sentiment_encoder.transform([label])[0])
            else:
                print(f"⚠️ Unknown sentiment label '{label}', mapping to 'Neutral'")
                sentiment_labels.append(sentiment_encoder.transform(['Neutral'])[0])
        sentiment_labels = np.array(sentiment_labels)
    
    try:
        emotion_labels = emotion_encoder.transform(emotion_labels_text)
    except ValueError as e:
        print(f"⚠️ Emotion label mismatch: {e}")
        # Handle unknown labels
        emotion_labels = []
        for label in emotion_labels_text:
            if label in emotion_encoder.classes_:
                emotion_labels.append(emotion_encoder.transform([label])[0])
            else:
                print(f"⚠️ Unknown emotion label '{label}', mapping to 'No Emotion'")
                emotion_labels.append(emotion_encoder.transform(['No Emotion'])[0])
        emotion_labels = np.array(emotion_labels)
    
    evaluation_data = {
        'texts': texts,
        'sentiment_labels': sentiment_labels.tolist(),
        'emotion_labels': emotion_labels.tolist()
    }
    
    print(f"✅ Reddit evaluation data prepared: {len(texts)} samples")
    
    return evaluation_data

def create_stratified_sampler(sentiment_labels: List[int], emotion_labels: List[int]) -> WeightedRandomSampler:
    """
    Create a weighted random sampler for stratified sampling
    considering both sentiment and emotion class distributions
    """
    # Combine labels to create compound classes for stratification
    compound_labels = [f"{s}_{e}" for s, e in zip(sentiment_labels, emotion_labels)]
    
    # Calculate class weights
    unique_labels = list(set(compound_labels))
    
    # FIX: Convert to numpy array as required by compute_class_weight
    unique_labels_array = np.array(unique_labels)
    
    class_weights = compute_class_weight(
        'balanced',
        classes=unique_labels_array,  # Now it's a numpy array
        y=compound_labels
    )
    
    # Create weight dictionary
    weight_dict = dict(zip(unique_labels, class_weights))
    
    # Assign weights to each sample
    sample_weights = [weight_dict[label] for label in compound_labels]
    
    return WeightedRandomSampler(
        weights=sample_weights,
        num_samples=len(sample_weights),
        replacement=True
    )

print("✅ Updated dataset preparation functions defined!")

✅ Updated dataset preparation functions defined!


In [25]:
# Cell 4: Loss Function with Weighting
class MultiTaskLoss(nn.Module):
    """
    Weighted loss function for multitask learning
    """
    
    def __init__(
        self,
        alpha: float = 0.5,
        sentiment_class_weights: Optional[torch.Tensor] = None,
        emotion_class_weights: Optional[torch.Tensor] = None,
        device: torch.device = None
    ):
        """
        Args:
            alpha: Weight parameter between sentiment and emotion loss (0.3-0.7)
            sentiment_class_weights: Class weights for sentiment imbalance
            emotion_class_weights: Class weights for emotion imbalance
        """
        super(MultiTaskLoss, self).__init__()
        
        self.alpha = alpha
        self.device = device or torch.device('cpu')
        
        # Initialize loss functions with class weights
        self.sentiment_loss_fn = nn.CrossEntropyLoss(
            weight=sentiment_class_weights.to(self.device) if sentiment_class_weights is not None else None
        )
        self.emotion_loss_fn = nn.CrossEntropyLoss(
            weight=emotion_class_weights.to(self.device) if emotion_class_weights is not None else None
        )
    
    def forward(
        self,
        sentiment_logits: torch.Tensor,
        emotion_logits: torch.Tensor,
        sentiment_labels: torch.Tensor,
        emotion_labels: torch.Tensor
    ) -> Dict[str, torch.Tensor]:
        """
        Calculate weighted multitask loss
        
        Returns:
            Dictionary containing individual and combined losses
        """
        # Calculate individual losses
        sentiment_loss = self.sentiment_loss_fn(sentiment_logits, sentiment_labels)
        emotion_loss = self.emotion_loss_fn(emotion_logits, emotion_labels)
        
        # Weighted combination
        total_loss = self.alpha * sentiment_loss + (1 - self.alpha) * emotion_loss
        
        return {
            'total_loss': total_loss,
            'sentiment_loss': sentiment_loss,
            'emotion_loss': emotion_loss,
            'alpha': self.alpha
        }
    
    def update_alpha(self, new_alpha: float):
        """Update alpha parameter during training"""
        self.alpha = max(0.3, min(0.7, new_alpha))  # Constrain to [0.3, 0.7]

def compute_class_weights_from_labels(labels: List[int], device: torch.device) -> torch.Tensor:
    """Compute class weights for imbalanced datasets"""
    unique_labels = np.unique(labels)
    class_weights = compute_class_weight(
        'balanced',
        classes=unique_labels,
        y=labels
    )
    return torch.FloatTensor(class_weights).to(device)

class AdaptiveAlphaScheduler:
    """
    Adaptive alpha scheduler that adjusts the loss weighting based on task performance
    """
    
    def __init__(self, initial_alpha: float = 0.5, adaptation_rate: float = 0.1):
        self.alpha = initial_alpha
        self.adaptation_rate = adaptation_rate
        self.sentiment_history = []
        self.emotion_history = []
    
    def step(self, sentiment_accuracy: float, emotion_accuracy: float) -> float:
        """
        Adjust alpha based on relative task performance
        Better performing task gets lower weight to balance learning
        """
        self.sentiment_history.append(sentiment_accuracy)
        self.emotion_history.append(emotion_accuracy)
        
        if len(self.sentiment_history) >= 2:
            # Calculate performance difference
            sentiment_trend = sentiment_accuracy - np.mean(self.sentiment_history[-3:])
            emotion_trend = emotion_accuracy - np.mean(self.emotion_history[-3:])
            
            # Adjust alpha: if sentiment is improving faster, decrease its weight
            if sentiment_trend > emotion_trend:
                self.alpha -= self.adaptation_rate
            elif emotion_trend > sentiment_trend:
                self.alpha += self.adaptation_rate
            
            # Constrain alpha to [0.3, 0.7]
            self.alpha = max(0.3, min(0.7, self.alpha))
        
        return self.alpha

print("✅ Loss functions and schedulers defined!")

✅ Loss functions and schedulers defined!


In [26]:
# Cell 5: Training Utilities
class TrainingConfig:
    """Configuration class for training parameters"""
    
    def __init__(
        self,
        model_name: str = "roberta-base",
        max_length: int = 512,
        batch_size: int = 16,
        learning_rate: float = 2e-5,
        num_epochs: int = 5,
        warmup_ratio: float = 0.1,
        weight_decay: float = 0.01,
        max_grad_norm: float = 1.0,
        alpha: float = 0.5,
        hidden_dropout_prob: float = 0.1,
        attention_dropout_prob: float = 0.1,
        classifier_dropout: float = 0.1,
        adaptive_alpha: bool = True,
        save_strategy: str = "epoch",
        evaluation_strategy: str = "epoch",
        output_dir: str = "./multitask_model",
        logging_steps: int = 50,
        save_total_limit: int = 3
    ):
        self.model_name = model_name
        self.max_length = max_length
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.warmup_ratio = warmup_ratio
        self.weight_decay = weight_decay
        self.max_grad_norm = max_grad_norm
        self.alpha = alpha
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_dropout_prob = attention_dropout_prob
        self.classifier_dropout = classifier_dropout
        self.adaptive_alpha = adaptive_alpha
        self.save_strategy = save_strategy
        self.evaluation_strategy = evaluation_strategy
        self.output_dir = output_dir
        self.logging_steps = logging_steps
        self.save_total_limit = save_total_limit

def create_optimizer_and_scheduler(
    model: nn.Module,
    config: TrainingConfig,
    num_training_steps: int
) -> Tuple[AdamW, LambdaLR]:
    """
    Create optimizer and learning rate scheduler
    """
    # Separate parameters for different learning rates
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() 
                      if not any(nd in n for nd in no_decay)],
            "weight_decay": config.weight_decay,
        },
        {
            "params": [p for n, p in model.named_parameters() 
                      if any(nd in n for nd in no_decay)],
            "weight_decay": 0.0,
        },
    ]
    
    # AdamW optimizer
    optimizer = AdamW(
        optimizer_grouped_parameters,
        lr=config.learning_rate,
        eps=1e-8
    )
    
    # Linear warmup scheduler
    num_warmup_steps = int(num_training_steps * config.warmup_ratio)
    
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    return optimizer, scheduler

class EarlyStopping:
    """Early stopping utility"""
    
    def __init__(self, patience: int = 3, min_delta: float = 0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        
    def __call__(self, score: float) -> bool:
        """Returns True if training should be stopped"""
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        else:
            self.best_score = score
            self.counter = 0
        return False

class ModelCheckpointer:
    """Model checkpointing utility"""
    
    def __init__(self, output_dir: str, save_total_limit: int = 3):
        self.output_dir = output_dir
        self.save_total_limit = save_total_limit
        self.saved_checkpoints = []
        os.makedirs(output_dir, exist_ok=True)
    
    def save_checkpoint(
        self,
        model: nn.Module,
        tokenizer,
        optimizer: AdamW,
        scheduler: LambdaLR,
        epoch: int,
        metrics: Dict,
        is_best: bool = False
    ):
        """Save model checkpoint"""
        checkpoint_dir = os.path.join(self.output_dir, f"checkpoint-epoch-{epoch}")
        os.makedirs(checkpoint_dir, exist_ok=True)
        
        # Save model and tokenizer
        model.save_pretrained(checkpoint_dir)
        tokenizer.save_pretrained(checkpoint_dir)
        
        # Save training state
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'metrics': metrics
        }, os.path.join(checkpoint_dir, 'training_state.pt'))
        
        # Save best model separately
        if is_best:
            best_dir = os.path.join(self.output_dir, 'best_model')
            os.makedirs(best_dir, exist_ok=True)
            model.save_pretrained(best_dir)
            tokenizer.save_pretrained(best_dir)
        
        # Manage checkpoint limit
        self.saved_checkpoints.append(checkpoint_dir)
        if len(self.saved_checkpoints) > self.save_total_limit:
            old_checkpoint = self.saved_checkpoints.pop(0)
            if os.path.exists(old_checkpoint) and 'best_model' not in old_checkpoint:
                import shutil
                shutil.rmtree(old_checkpoint)

print("✅ Training utilities defined!")

✅ Training utilities defined!


In [27]:
# Cell 6: Training Loop
class MultiTaskTrainer:
    """
    Main trainer class for multitask learning
    """
    
    def __init__(
        self,
        config: TrainingConfig,
        sentiment_num_classes: int,
        emotion_num_classes: int
    ):
        self.config = config
        self.sentiment_num_classes = sentiment_num_classes
        self.emotion_num_classes = emotion_num_classes
        self.device = device
        
        # Initialize components
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        self.model = None
        self.loss_fn = None
        self.optimizer = None
        self.scheduler = None
        self.alpha_scheduler = None
        self.early_stopping = None
        self.checkpointer = None
        
        # Training history
        self.training_history = {
            'epoch': [],
            'train_loss': [],
            'train_sentiment_loss': [],
            'train_emotion_loss': [],
            'val_loss': [],
            'val_sentiment_loss': [],
            'val_emotion_loss': [],
            'val_sentiment_accuracy': [],
            'val_emotion_accuracy': [],
            'alpha': [],
            'learning_rate': []
        }
    
    def setup(
        self,
        data_splits: Dict,
        sentiment_encoder: LabelEncoder,
        emotion_encoder: LabelEncoder
    ):
        """Setup model, loss function, and training components"""
        
        # Initialize model
        self.model = MultiTaskTransformer(
            model_name=self.config.model_name,
            sentiment_num_classes=self.sentiment_num_classes,
            emotion_num_classes=self.emotion_num_classes,
            hidden_dropout_prob=self.config.hidden_dropout_prob,
            attention_dropout_prob=self.config.attention_dropout_prob,
            classifier_dropout=self.config.classifier_dropout
        ).to(self.device)
        
        # Compute class weights
        sentiment_weights = compute_class_weights_from_labels(
            data_splits['train']['sentiment_labels'], self.device
        )
        emotion_weights = compute_class_weights_from_labels(
            data_splits['train']['emotion_labels'], self.device
        )
        
        # Initialize loss function
        self.loss_fn = MultiTaskLoss(
            alpha=self.config.alpha,
            sentiment_class_weights=sentiment_weights,
            emotion_class_weights=emotion_weights,
            device=self.device
        )
        
        # Create datasets
        self.train_dataset = MultiTaskDataset(
            texts=data_splits['train']['texts'],
            sentiment_labels=data_splits['train']['sentiment_labels'],
            emotion_labels=data_splits['train']['emotion_labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length,
            sentiment_label_encoder=sentiment_encoder,
            emotion_label_encoder=emotion_encoder
        )
        
        self.val_dataset = MultiTaskDataset(
            texts=data_splits['val']['texts'],
            sentiment_labels=data_splits['val']['sentiment_labels'],
            emotion_labels=data_splits['val']['emotion_labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length,
            sentiment_label_encoder=sentiment_encoder,
            emotion_label_encoder=emotion_encoder
        )
        
        # Create data loaders
        train_sampler = create_stratified_sampler(
            data_splits['train']['sentiment_labels'],
            data_splits['train']['emotion_labels']
        ) if len(data_splits['train']['texts']) > 50 else None
        
        self.train_loader = DataLoader(
            self.train_dataset,
            batch_size=self.config.batch_size,
            sampler=train_sampler,
            shuffle=(train_sampler is None),
            num_workers=0,
            pin_memory=True
        )
        
        self.val_loader = DataLoader(
            self.val_dataset,
            batch_size=self.config.batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=True
        )
        
        # Setup optimizer and scheduler
        num_training_steps = len(self.train_loader) * self.config.num_epochs
        self.optimizer, self.scheduler = create_optimizer_and_scheduler(
            self.model, self.config, num_training_steps
        )
        
        # Initialize utilities
        if self.config.adaptive_alpha:
            self.alpha_scheduler = AdaptiveAlphaScheduler(
                initial_alpha=self.config.alpha
            )
        
        self.early_stopping = EarlyStopping(patience=3, min_delta=0.001)
        self.checkpointer = ModelCheckpointer(
            self.config.output_dir,
            self.config.save_total_limit
        )
        
        print(f"✅ Setup complete!")
        print(f"  Model: {self.config.model_name}")
        print(f"  Training samples: {len(self.train_dataset)}")
        print(f"  Validation samples: {len(self.val_dataset)}")
        print(f"  Training steps per epoch: {len(self.train_loader)}")
        print(f"  Total training steps: {num_training_steps}")
    
    def train_epoch(self) -> Dict[str, float]:
        """Train for one epoch"""
        self.model.train()
        
        total_loss = 0.0
        total_sentiment_loss = 0.0
        total_emotion_loss = 0.0
        num_batches = 0
        
        for batch_idx, batch in enumerate(self.train_loader):
            # Move batch to device
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            sentiment_labels = batch['sentiment_labels'].to(self.device)
            emotion_labels = batch['emotion_labels'].to(self.device)
            
            # Forward pass
            outputs = self.model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            
            # Calculate loss
            loss_dict = self.loss_fn(
                sentiment_logits=outputs['sentiment_logits'],
                emotion_logits=outputs['emotion_logits'],
                sentiment_labels=sentiment_labels,
                emotion_labels=emotion_labels
            )
            
            loss = loss_dict['total_loss']
            
            # Backward pass
            self.optimizer.zero_grad()
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(
                self.model.parameters(),
                self.config.max_grad_norm
            )
            
            self.optimizer.step()
            self.scheduler.step()
            
            # Accumulate losses
            total_loss += loss.item()
            total_sentiment_loss += loss_dict['sentiment_loss'].item()
            total_emotion_loss += loss_dict['emotion_loss'].item()
            num_batches += 1
            
            # Logging
            if (batch_idx + 1) % self.config.logging_steps == 0:
                avg_loss = total_loss / num_batches
                current_lr = self.scheduler.get_last_lr()[0]
                print(f"  Batch {batch_idx + 1}/{len(self.train_loader)} | "
                      f"Loss: {avg_loss:.4f} | "
                      f"LR: {current_lr:.2e} | "
                      f"Alpha: {self.loss_fn.alpha:.3f}")
        
        return {
            'train_loss': total_loss / num_batches,
            'train_sentiment_loss': total_sentiment_loss / num_batches,
            'train_emotion_loss': total_emotion_loss / num_batches
        }
    
    def evaluate(self) -> Dict[str, float]:
        """Evaluate on validation set"""
        self.model.eval()
        
        total_loss = 0.0
        total_sentiment_loss = 0.0
        total_emotion_loss = 0.0
        
        sentiment_predictions = []
        sentiment_true_labels = []
        emotion_predictions = []
        emotion_true_labels = []
        
        with torch.no_grad():
            for batch in self.val_loader:
                # Move batch to device
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                sentiment_labels = batch['sentiment_labels'].to(self.device)
                emotion_labels = batch['emotion_labels'].to(self.device)
                
                # Forward pass
                outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask
                )
                
                # Calculate loss
                loss_dict = self.loss_fn(
                    sentiment_logits=outputs['sentiment_logits'],
                    emotion_logits=outputs['emotion_logits'],
                    sentiment_labels=sentiment_labels,
                    emotion_labels=emotion_labels
                )
                
                # Accumulate losses
                total_loss += loss_dict['total_loss'].item()
                total_sentiment_loss += loss_dict['sentiment_loss'].item()
                total_emotion_loss += loss_dict['emotion_loss'].item()
                
                # Predictions
                sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
                emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
                
                sentiment_predictions.extend(sentiment_preds.cpu().numpy())
                sentiment_true_labels.extend(sentiment_labels.cpu().numpy())
                emotion_predictions.extend(emotion_preds.cpu().numpy())
                emotion_true_labels.extend(emotion_labels.cpu().numpy())
        
        # Calculate metrics
        num_batches = len(self.val_loader)
        sentiment_accuracy = accuracy_score(sentiment_true_labels, sentiment_predictions)
        emotion_accuracy = accuracy_score(emotion_true_labels, emotion_predictions)
        
        return {
            'val_loss': total_loss / num_batches,
            'val_sentiment_loss': total_sentiment_loss / num_batches,
            'val_emotion_loss': total_emotion_loss / num_batches,
            'val_sentiment_accuracy': sentiment_accuracy,
            'val_emotion_accuracy': emotion_accuracy,
            'sentiment_predictions': sentiment_predictions,
            'sentiment_true_labels': sentiment_true_labels,
            'emotion_predictions': emotion_predictions,
            'emotion_true_labels': emotion_true_labels
        }
    
    def train(self) -> Dict[str, List]:
        """Main training loop"""
        print(f"🚀 Starting training for {self.config.num_epochs} epochs...")
        
        best_combined_score = 0.0
        
        for epoch in range(self.config.num_epochs):
            print(f"\n📍 Epoch {epoch + 1}/{self.config.num_epochs}")
            print("-" * 50)
            
            # Train for one epoch
            train_metrics = self.train_epoch()
            
            # Evaluate
            val_metrics = self.evaluate()
            
            # Update alpha if adaptive
            if self.alpha_scheduler:
                new_alpha = self.alpha_scheduler.step(
                    val_metrics['val_sentiment_accuracy'],
                    val_metrics['val_emotion_accuracy']
                )
                self.loss_fn.update_alpha(new_alpha)
            
            # Calculate combined score for checkpointing
            combined_score = (
                val_metrics['val_sentiment_accuracy'] + 
                val_metrics['val_emotion_accuracy']
            ) / 2
            
            is_best = combined_score > best_combined_score
            if is_best:
                best_combined_score = combined_score
            
            # Log metrics
            current_lr = self.scheduler.get_last_lr()[0]
            
            print(f"📊 Epoch {epoch + 1} Results:")
            print(f"  Train Loss: {train_metrics['train_loss']:.4f}")
            print(f"  Val Loss: {val_metrics['val_loss']:.4f}")
            print(f"  Sentiment Accuracy: {val_metrics['val_sentiment_accuracy']:.4f}")
            print(f"  Emotion Accuracy: {val_metrics['val_emotion_accuracy']:.4f}")
            print(f"  Combined Score: {combined_score:.4f}")
            print(f"  Alpha: {self.loss_fn.alpha:.3f}")
            print(f"  Learning Rate: {current_lr:.2e}")
            
            # Save history
            self.training_history['epoch'].append(epoch + 1)
            self.training_history['train_loss'].append(train_metrics['train_loss'])
            self.training_history['train_sentiment_loss'].append(train_metrics['train_sentiment_loss'])
            self.training_history['train_emotion_loss'].append(train_metrics['train_emotion_loss'])
            self.training_history['val_loss'].append(val_metrics['val_loss'])
            self.training_history['val_sentiment_loss'].append(val_metrics['val_sentiment_loss'])
            self.training_history['val_emotion_loss'].append(val_metrics['val_emotion_loss'])
            self.training_history['val_sentiment_accuracy'].append(val_metrics['val_sentiment_accuracy'])
            self.training_history['val_emotion_accuracy'].append(val_metrics['val_emotion_accuracy'])
            self.training_history['alpha'].append(self.loss_fn.alpha)
            self.training_history['learning_rate'].append(current_lr)
            
            # Save checkpoint
            if self.config.save_strategy == "epoch":
                self.checkpointer.save_checkpoint(
                    model=self.model,
                    tokenizer=self.tokenizer,
                    optimizer=self.optimizer,
                    scheduler=self.scheduler,
                    epoch=epoch + 1,
                    metrics=val_metrics,
                    is_best=is_best
                )
            
            # Early stopping
            if self.early_stopping(combined_score):
                print(f"⏹️ Early stopping triggered at epoch {epoch + 1}")
                break
        
        print(f"\n🎉 Training completed!")
        print(f"Best combined score: {best_combined_score:.4f}")
        
        return self.training_history

print("✅ Training loop defined!")

✅ Training loop defined!


In [28]:
# Cell 7: Evaluation Functions
class MultiTaskEvaluator:
    """
    Simplified evaluation for multitask models
    """
    
    def __init__(
        self,
        model: MultiTaskTransformer,
        tokenizer,
        sentiment_encoder: LabelEncoder,
        emotion_encoder: LabelEncoder,
        device: torch.device
    ):
        self.model = model
        self.tokenizer = tokenizer
        self.sentiment_encoder = sentiment_encoder
        self.emotion_encoder = emotion_encoder
        self.device = device
        
        self.model.eval()
    
    def evaluate_dataset(
        self,
        texts: List[str],
        sentiment_labels: List[int],
        emotion_labels: List[int],
        batch_size: int = 32
    ) -> Dict[str, any]:
        """
        Evaluate model on a dataset
        """
        dataset = MultiTaskDataset(
            texts=texts,
            sentiment_labels=sentiment_labels,
            emotion_labels=emotion_labels,
            tokenizer=self.tokenizer,
            max_length=512,
            sentiment_label_encoder=self.sentiment_encoder,
            emotion_label_encoder=self.emotion_encoder
        )
        
        dataloader = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=0
        )
        
        sentiment_predictions = []
        emotion_predictions = []
        sentiment_true_labels = []
        emotion_true_labels = []
        
        with torch.no_grad():
            for batch in dataloader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                
                outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask
                )
                
                # Get predictions
                sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
                emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
                
                # Store results
                sentiment_predictions.extend(sentiment_preds.cpu().numpy())
                emotion_predictions.extend(emotion_preds.cpu().numpy())
                sentiment_true_labels.extend(batch['sentiment_labels'].numpy())
                emotion_true_labels.extend(batch['emotion_labels'].numpy())
        
        # Calculate metrics
        results = self._calculate_metrics(
            sentiment_predictions=sentiment_predictions,
            emotion_predictions=emotion_predictions,
            sentiment_true_labels=sentiment_true_labels,
            emotion_true_labels=emotion_true_labels
        )
        
        return results
    
    def _calculate_metrics(
        self,
        sentiment_predictions: List[int],
        emotion_predictions: List[int],
        sentiment_true_labels: List[int],
        emotion_true_labels: List[int]
    ) -> Dict[str, any]:
        """Calculate simplified metrics: only accuracy and macro F1"""
        
        # Sentiment metrics
        sentiment_accuracy = accuracy_score(sentiment_true_labels, sentiment_predictions)
        sentiment_f1_macro = f1_score(sentiment_true_labels, sentiment_predictions, average='macro', zero_division=0)
        
        # Emotion metrics
        emotion_accuracy = accuracy_score(emotion_true_labels, emotion_predictions)
        emotion_f1_macro = f1_score(emotion_true_labels, emotion_predictions, average='macro', zero_division=0)
        
        return {
            'sentiment': {
                'accuracy': sentiment_accuracy,
                'f1_macro': sentiment_f1_macro,
                'predictions': sentiment_predictions,
                'true_labels': sentiment_true_labels
            },
            'emotion': {
                'accuracy': emotion_accuracy,
                'f1_macro': emotion_f1_macro,
                'predictions': emotion_predictions,
                'true_labels': emotion_true_labels
            },
            'combined': {
                'average_accuracy': (sentiment_accuracy + emotion_accuracy) / 2,
                'average_f1': (sentiment_f1_macro + emotion_f1_macro) / 2
            }
        }

print("✅ Simplified evaluation functions defined!")

✅ Simplified evaluation functions defined!


In [29]:
# Cell 8: Inference Functions
class MultiTaskPredictor:
    """
    Inference class for multitask model
    """
    
    def __init__(
        self,
        model_path: str,
        sentiment_encoder_path: str,
        emotion_encoder_path: str,
        device: torch.device = None
    ):
        self.device = device or torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # Load model
        self.model = MultiTaskTransformer.from_pretrained(model_path)
        self.model.to(self.device)
        self.model.eval()
        
        # Load label encoders
        import joblib
        self.sentiment_encoder = joblib.load(sentiment_encoder_path)
        self.emotion_encoder = joblib.load(emotion_encoder_path)
        
        print(f"✅ Model loaded successfully!")
        print(f"Device: {self.device}")
        print(f"Sentiment classes: {list(self.sentiment_encoder.classes_)}")
        print(f"Emotion classes: {list(self.emotion_encoder.classes_)}")
    
    def predict_single(
        self,
        text: str,
        return_probabilities: bool = True,
        return_attention: bool = False
    ) -> Dict[str, any]:
        """
        Predict sentiment and emotion for a single text
        """
        # Tokenize
        inputs = self.tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            padding="max_length",
            max_length=512
        )
        
        # Move to device
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = self.model(**inputs)
            
            # Get predictions
            sentiment_logits = outputs['sentiment_logits']
            emotion_logits = outputs['emotion_logits']
            
            sentiment_probs = F.softmax(sentiment_logits, dim=-1)
            emotion_probs = F.softmax(emotion_logits, dim=-1)
            
            sentiment_pred_id = torch.argmax(sentiment_logits, dim=-1).item()
            emotion_pred_id = torch.argmax(emotion_logits, dim=-1).item()
            
            # Decode predictions
            sentiment_label = self.sentiment_encoder.inverse_transform([sentiment_pred_id])[0]
            emotion_label = self.emotion_encoder.inverse_transform([emotion_pred_id])[0]
            
            result = {
                'text': text,
                'sentiment': {
                    'label': sentiment_label,
                    'confidence': sentiment_probs[0][sentiment_pred_id].item(),
                    'class_id': sentiment_pred_id
                },
                'emotion': {
                    'label': emotion_label,
                    'confidence': emotion_probs[0][emotion_pred_id].item(),
                    'class_id': emotion_pred_id
                }
            }
            
            if return_probabilities:
                result['sentiment']['probabilities'] = {
                    class_name: prob.item() for class_name, prob in 
                    zip(self.sentiment_encoder.classes_, sentiment_probs[0])
                }
                result['emotion']['probabilities'] = {
                    class_name: prob.item() for class_name, prob in 
                    zip(self.emotion_encoder.classes_, emotion_probs[0])
                }
            
            if return_attention:
                result['sentiment']['attention_weights'] = outputs['sentiment_attention_weights']
                result['emotion']['attention_weights'] = outputs['emotion_attention_weights']
        
        return result
    
    def predict_batch(
        self,
        texts: List[str],
        batch_size: int = 32,
        return_probabilities: bool = False
    ) -> List[Dict[str, any]]:
        """
        Predict sentiment and emotion for a batch of texts
        """
        results = []
        
        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size]
            
            # Tokenize batch
            inputs = self.tokenizer(
                batch_texts,
                return_tensors="pt",
                truncation=True,
                padding="max_length",
                max_length=512
            )
            
            # Move to device
            inputs = {k: v.to(self.device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = self.model(**inputs)
                
                sentiment_logits = outputs['sentiment_logits']
                emotion_logits = outputs['emotion_logits']
                
                sentiment_probs = F.softmax(sentiment_logits, dim=-1)
                emotion_probs = F.softmax(emotion_logits, dim=-1)
                
                sentiment_preds = torch.argmax(sentiment_logits, dim=-1)
                emotion_preds = torch.argmax(emotion_logits, dim=-1)
                
                # Process each item in batch
                for j in range(len(batch_texts)):
                    sentiment_pred_id = sentiment_preds[j].item()
                    emotion_pred_id = emotion_preds[j].item()
                    
                    sentiment_label = self.sentiment_encoder.inverse_transform([sentiment_pred_id])[0]
                    emotion_label = self.emotion_encoder.inverse_transform([emotion_pred_id])[0]
                    
                    result = {
                        'text': batch_texts[j],
                        'sentiment': {
                            'label': sentiment_label,
                            'confidence': sentiment_probs[j][sentiment_pred_id].item(),
                            'class_id': sentiment_pred_id
                        },
                        'emotion': {
                            'label': emotion_label,
                            'confidence': emotion_probs[j][emotion_pred_id].item(),
                            'class_id': emotion_pred_id
                        }
                    }
                    
                    if return_probabilities:
                        result['sentiment']['probabilities'] = {
                            class_name: prob.item() for class_name, prob in 
                            zip(self.sentiment_encoder.classes_, sentiment_probs[j])
                        }
                        result['emotion']['probabilities'] = {
                            class_name: prob.item() for class_name, prob in 
                            zip(self.emotion_encoder.classes_, emotion_probs[j])
                        }
                    
                    results.append(result)
        
        return results

def save_model_and_encoders(
    model: MultiTaskTransformer,
    tokenizer,
    sentiment_encoder: LabelEncoder,
    emotion_encoder: LabelEncoder,
    output_dir: str
):
    """Save complete model with encoders"""
    import joblib
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Save model and tokenizer
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    
    # Save encoders
    joblib.dump(sentiment_encoder, os.path.join(output_dir, 'sentiment_encoder.pkl'))
    joblib.dump(emotion_encoder, os.path.join(output_dir, 'emotion_encoder.pkl'))
    
    # Save model configuration
    config = {
        'sentiment_classes': list(sentiment_encoder.classes_),
        'emotion_classes': list(emotion_encoder.classes_),
        'sentiment_num_classes': len(sentiment_encoder.classes_),
        'emotion_num_classes': len(emotion_encoder.classes_)
    }
    
    with open(os.path.join(output_dir, 'model_config.json'), 'w') as f:
        json.dump(config, f, indent=2)
    
    print(f"✅ Model and encoders saved to: {output_dir}")

print("✅ Inference functions defined!")

✅ Inference functions defined!


In [30]:
# Cell 9: Updated Main Execution Function

def run_multitask_training(
    reddit_data_path: str = "annotated_reddit_posts.csv",
    model_name: str = "microsoft/deberta-base",
    output_dir: str = "./multitask_model",
    config_overrides: Dict = None,
    max_external_samples: int = 10000
) -> Tuple[MultiTaskTransformer, Dict]:
    """
    Main function to run complete multitask training pipeline
    - Trains on external datasets (SST-2 + GoEmotions)
    - Evaluates on Reddit data
    """
    print("🚀 Starting Multitask Learning Pipeline")
    print("=" * 60)
    print("📋 Training Strategy:")
    print("  • Train on: SST-2 (sentiment) + GoEmotions (emotion)")
    print("  • Evaluate on: Reddit Note 7 data")
    print("=" * 60)
    
    # Validate model choice
    if model_name not in [config["name"] for config in MODEL_CONFIGS.values()]:
        available_models = [config["name"] for config in MODEL_CONFIGS.values()]
        raise ValueError(f"Model must be one of: {available_models}")
    
    # Load external datasets for training
    print("\n1️⃣ Loading external datasets for training...")
    sentiment_data, emotion_data = load_external_datasets()
    
    # Prepare external data for multitask training
    print("\n2️⃣ Preparing external data for multitask training...")
    external_data_splits, sentiment_encoder, emotion_encoder = prepare_external_data_for_multitask(
        sentiment_data, emotion_data, max_samples=max_external_samples
    )
    
    # Load Reddit data for evaluation
    print("\n3️⃣ Loading Reddit data for evaluation...")
    reddit_df = pd.read_csv(reddit_data_path)
    print(f"Loaded {len(reddit_df)} Reddit samples for evaluation")
    
    # Prepare Reddit data for evaluation using the same encoders
    reddit_evaluation_data = prepare_reddit_data_for_evaluation(
        reddit_df, sentiment_encoder, emotion_encoder
    )
    
    # Create training configuration
    config = TrainingConfig(
        model_name=model_name,
        output_dir=output_dir,
        num_epochs=5,  # Reduced since we have more data
        batch_size=16,
        learning_rate=2e-5,
        warmup_ratio=0.1,
        weight_decay=0.01,
        max_grad_norm=1.0,
        alpha=0.5,
        adaptive_alpha=True,
        hidden_dropout_prob=0.1,
        attention_dropout_prob=0.1,
        classifier_dropout=0.1
    )
    
    # Apply any configuration overrides
    if config_overrides:
        for key, value in config_overrides.items():
            if hasattr(config, key):
                setattr(config, key, value)
                print(f"Updated config.{key} = {value}")
    
    print(f"\n4️⃣ Initializing multitask trainer...")
    # Initialize trainer
    trainer = MultiTaskTrainer(
        config=config,
        sentiment_num_classes=len(sentiment_encoder.classes_),
        emotion_num_classes=len(emotion_encoder.classes_)
    )
    
    # Setup training with external data
    print(f"\n5️⃣ Setting up training...")
    trainer.setup(external_data_splits, sentiment_encoder, emotion_encoder)
    
    # Train model on external data
    print(f"\n6️⃣ Training model on external datasets...")
    history = trainer.train()
    
    # Save final model
    save_model_and_encoders(
        model=trainer.model,
        tokenizer=trainer.tokenizer,
        sentiment_encoder=sentiment_encoder,
        emotion_encoder=emotion_encoder,
        output_dir=os.path.join(output_dir, 'final_model')
    )
    
    # Evaluate on Reddit data
    print(f"\n7️⃣ Evaluating on Reddit data...")
    evaluator = MultiTaskEvaluator(
        model=trainer.model,
        tokenizer=trainer.tokenizer,
        sentiment_encoder=sentiment_encoder,
        emotion_encoder=emotion_encoder,
        device=device
    )
    
    reddit_results = evaluator.evaluate_dataset(
        texts=reddit_evaluation_data['texts'],
        sentiment_labels=reddit_evaluation_data['sentiment_labels'],
        emotion_labels=reddit_evaluation_data['emotion_labels']
    )
    
    # Print results summary
    print(f"\n📈 Final Results Summary:")
    print("=" * 60)
    print(f"📊 Training Data: SST-2 + GoEmotions ({len(external_data_splits['train']['texts'])} samples)")
    print(f"📊 Evaluation Data: Reddit Note 7 ({len(reddit_evaluation_data['texts'])} samples)")
    print("=" * 60)
    print(f"Sentiment Classification (on Reddit data):")
    print(f"  Accuracy: {reddit_results['sentiment']['accuracy']:.4f}")
    print(f"  F1-Score (Macro): {reddit_results['sentiment']['f1_macro']:.4f}")
    print(f"")
    print(f"Emotion Classification (on Reddit data):")
    print(f"  Accuracy: {reddit_results['emotion']['accuracy']:.4f}")
    print(f"  F1-Score (Macro): {reddit_results['emotion']['f1_macro']:.4f}")
    print(f"")
    print(f"Combined Performance:")
    print(f"  Average Accuracy: {reddit_results['combined']['average_accuracy']:.4f}")
    print(f"  Average F1-Score: {reddit_results['combined']['average_f1']:.4f}")
    
    # Save detailed results
    results_file = os.path.join(output_dir, 'evaluation_results.json')
    
    # Convert numpy arrays to lists for JSON serialization
    def convert_for_json(obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, dict):
            return {key: convert_for_json(value) for key, value in obj.items()}
        elif isinstance(obj, list):
            return [convert_for_json(item) for item in obj]
        else:
            return obj
    
    serializable_results = convert_for_json(reddit_results)
    
    with open(results_file, 'w') as f:
        json.dump(serializable_results, f, indent=2)
    
    print(f"✅ Training completed successfully!")
    print(f"📁 Output saved to: {output_dir}")
    print(f"📁 Final model: {os.path.join(output_dir, 'final_model')}")
    print(f"📁 Results: {results_file}")
    
    return trainer.model, reddit_results

# Example usage and testing function (updated)
def test_multitask_model():
    """Test the multitask model with sample texts"""
    print("\n🧪 Testing Multitask Model")
    print("=" * 40)
    
    # Sample test cases
    test_texts = [
        "I absolutely love this product! It's amazing and makes me so happy! 😍",
        "This is terrible... I hate it so much. It makes me really angry! 😠",
        "The service was okay, nothing special. Just neutral feelings about it.",
        "I'm so excited about this! Can't wait to try it out! 🎉",
        "This is really scary and makes me worried about the future. 😰",
        "What a surprise! I never expected this to happen!"
    ]
    
    # Load trained model (update path as needed)
    model_path = "./multitask_model/final_model"
    
    if os.path.exists(model_path):
        predictor = MultiTaskPredictor(
            model_path=model_path,
            sentiment_encoder_path=os.path.join(model_path, 'sentiment_encoder.pkl'),
            emotion_encoder_path=os.path.join(model_path, 'emotion_encoder.pkl'),
            device=device
        )
        
        print("\n🔮 Predictions:")
        print("-" * 60)
        
        for text in test_texts:
            result = predictor.predict_single(text, return_probabilities=True)
            
            print(f"Text: {text}")
            print(f"Sentiment: {result['sentiment']['label']} "
                  f"(confidence: {result['sentiment']['confidence']:.3f})")
            print(f"Emotion: {result['emotion']['label']} "
                  f"(confidence: {result['emotion']['confidence']:.3f})")
            print("-" * 60)
    
    else:
        print(f"⚠️ Model not found at {model_path}")
        print("Please run training first!")

print("✅ Updated main execution functions defined!")
print("\n🎯 Ready to start multitask learning!")
print("\nTo begin training with DeBERTa on external datasets:")
print("model, results = run_multitask_training(model_name='microsoft/deberta-base')")

✅ Updated main execution functions defined!

🎯 Ready to start multitask learning!

To begin training with DeBERTa on external datasets:
model, results = run_multitask_training(model_name='microsoft/deberta-base')


In [13]:
# Cell 10: Hyperparameter Tuning with Optuna (Updated)
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner

class MultiTaskHyperparameterTuner:
    """
    Hyperparameter tuning for multitask learning using Optuna
    """
    
    def __init__(
        self,
        data_path: str,
        n_trials: int = 20,
        cv_folds: int = 3,
        model_name: str = "microsoft/deberta-base"
    ):
        self.data_path = data_path
        self.n_trials = n_trials
        self.cv_folds = cv_folds
        self.model_name = model_name
        
        # Validate model choice
        if model_name not in [config["name"] for config in MODEL_CONFIGS.values()]:
            available_models = [config["name"] for config in MODEL_CONFIGS.values()]
            raise ValueError(f"Model must be one of: {available_models}")
        
        # Load and prepare data
        df = pd.read_csv(data_path)
        self.data_splits, self.sentiment_encoder, self.emotion_encoder = prepare_multitask_data(df)
        
        print(f"✅ Hyperparameter tuner initialized")
        print(f"Model: {model_name}")
        print(f"Data: {len(df)} samples")
        print(f"Trials: {n_trials}")
        print(f"CV Folds: {cv_folds}")
    
    def objective(self, trial):
        """Optuna objective function"""
        
        # Sample hyperparameters
        learning_rate = trial.suggest_float('learning_rate', 1e-5, 5e-4, log=True)
        batch_size = trial.suggest_categorical('batch_size', [8, 16, 32])
        alpha = trial.suggest_float('alpha', 0.3, 0.7)
        hidden_dropout = trial.suggest_float('hidden_dropout_prob', 0.05, 0.3)
        classifier_dropout = trial.suggest_float('classifier_dropout', 0.1, 0.5)
        weight_decay = trial.suggest_float('weight_decay', 0.01, 0.3)
        warmup_ratio = trial.suggest_float('warmup_ratio', 0.05, 0.2)
        num_epochs = trial.suggest_int('num_epochs', 3, 8)
        
        # Create configuration
        config = TrainingConfig(
            model_name=self.model_name,
            batch_size=batch_size,
            learning_rate=learning_rate,
            num_epochs=num_epochs,
            warmup_ratio=warmup_ratio,
            weight_decay=weight_decay,
            alpha=alpha,
            hidden_dropout_prob=hidden_dropout,
            classifier_dropout=classifier_dropout,
            adaptive_alpha=False,  # Disable for consistent comparison
            output_dir=f"./temp_trial_{trial.number}",
            save_strategy="no"  # Don't save during tuning
        )
        
        try:
            # Initialize trainer
            trainer = MultiTaskTrainer(
                config=config,
                sentiment_num_classes=len(self.sentiment_encoder.classes_),
                emotion_num_classes=len(self.emotion_encoder.classes_)
            )
            
            # Setup with reduced data for faster tuning
            trainer.setup(self.data_splits, self.sentiment_encoder, self.emotion_encoder)
            
            # Train model
            history = trainer.train()
            
            # Calculate final combined score
            final_sentiment_acc = history['val_sentiment_accuracy'][-1]
            final_emotion_acc = history['val_emotion_accuracy'][-1]
            combined_score = (final_sentiment_acc + final_emotion_acc) / 2
            
            # Clean up
            del trainer
            torch.cuda.empty_cache()
            
            return combined_score
            
        except Exception as e:
            print(f"Trial {trial.number} failed: {e}")
            return 0.0
    
    def tune(self) -> optuna.Study:
        """Run hyperparameter optimization"""
        
        study = optuna.create_study(
            direction='maximize',
            sampler=TPESampler(seed=42),
            pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=3)
        )
        
        print(f"🔍 Starting hyperparameter optimization...")
        study.optimize(self.objective, n_trials=self.n_trials)
        
        # Print results
        print(f"\n🏆 Optimization completed!")
        print(f"Best trial: {study.best_trial.number}")
        print(f"Best score: {study.best_value:.4f}")
        print(f"Best parameters:")
        for key, value in study.best_params.items():
            print(f"  {key}: {value}")
        
        return study

def run_hyperparameter_tuning(
    data_path: str = "annotated_reddit_posts.csv",
    n_trials: int = 20,
    model_name: str = "microsoft/deberta-base"
):
    """Run hyperparameter tuning and train final model with best params"""
    
    # Run tuning
    tuner = MultiTaskHyperparameterTuner(
        data_path=data_path,
        n_trials=n_trials,
        model_name=model_name
    )
    
    study = tuner.tune()
    
    # Train final model with best parameters
    print(f"\n🚀 Training final model with best hyperparameters...")
    
    best_params = study.best_params
    model, results = run_multitask_training(
        data_path=data_path,
        model_name=model_name,
        output_dir="./multitask_model_optimized",
        config_overrides=best_params
    )
    
    # Save tuning results
    import pickle
    with open("./multitask_model_optimized/hyperparameter_study.pkl", 'wb') as f:
        pickle.dump(study, f)
    
    return model, results, study

print("✅ Hyperparameter tuning functions defined!")

✅ Hyperparameter tuning functions defined!


# DeBERTa Training

In [31]:
# Cell 11: Run Training
# Now let's run the multitask training on your data!
print("Starting Multitask Training with DeBERTa on External Datasets...")
model_deberta, results_deberta = run_multitask_training(
    reddit_data_path="annotated_reddit_posts.csv",
    model_name="microsoft/deberta-base",
    output_dir="./multitask_model_deberta_external",
    max_external_samples=15000  # Use more external data for better training
)

Starting Multitask Training with DeBERTa on External Datasets...
🚀 Starting Multitask Learning Pipeline
📋 Training Strategy:
  • Train on: SST-2 (sentiment) + GoEmotions (emotion)
  • Evaluate on: Reddit Note 7 data

1️⃣ Loading external datasets for training...
📁 Loading external datasets for training...
✅ SST-2 dataset loaded: 67349 train, 872 val
✅ GoEmotions dataset loaded: 43410 train, 5426 val

2️⃣ Preparing external data for multitask training...
🔄 Preparing external datasets for multitask training...


Filter: 100%|██████████| 43410/43410 [00:00<00:00, 199127.19 examples/s]
Filter: 100%|██████████| 5426/5426 [00:00<00:00, 175026.10 examples/s]


✅ External data prepared:
  Train samples: 10896
  Validation samples: 2725
  Sentiment classes: [np.str_('Negative'), np.str_('Neutral'), np.str_('Positive')]
  Emotion classes: [np.str_('Anger'), np.str_('Fear'), np.str_('Joy'), np.str_('No Emotion'), np.str_('Sadness'), np.str_('Surprise')]

📈 Training set class distribution:
  Sentiment 'Negative': 4888 samples
  Sentiment 'Neutral': 650 samples
  Sentiment 'Positive': 5358 samples
  Emotion 'Anger': 3279 samples
  Emotion 'Fear': 1794 samples
  Emotion 'Joy': 1238 samples
  Emotion 'No Emotion': 1735 samples
  Emotion 'Sadness': 2091 samples
  Emotion 'Surprise': 759 samples

3️⃣ Loading Reddit data for evaluation...
Loaded 95 Reddit samples for evaluation
🔄 Preparing Reddit data for evaluation...
✅ Reddit evaluation data prepared: 95 samples

4️⃣ Initializing multitask trainer...

5️⃣ Setting up training...


InvalidParameterError: The 'classes' parameter of compute_class_weight must be an instance of 'numpy.ndarray'. Got ['2_0', '1_1', '1_5', '1_0', '0_2', '1_2', '0_0', '1_4', '1_3', '0_5', '0_4', '2_2', '2_1', '2_4', '2_3', '0_1', '0_3', '2_5'] instead.

In [None]:
# Compare results
print("\n📊 Model Comparison:")
print("=" * 50)
print("DeBERTa Results:")
print(f"  Sentiment Accuracy: {results_deberta['sentiment']['accuracy']:.4f}")
print(f"  Sentiment F1 (Macro): {results_deberta['sentiment']['f1_macro']:.4f}")
print(f"  Emotion Accuracy: {results_deberta['emotion']['accuracy']:.4f}")
print(f"  Emotion F1 (Macro): {results_deberta['emotion']['f1_macro']:.4f}")
print(f"  Combined Score: {results_deberta['combined']['average_f1']:.4f}")

In [None]:
# Test the trained model
test_multitask_model()

In [None]:
# Training with BERTweet (optimized for social media text)
print("\n🚀 Starting Multitask Training with BERTweet...")
model_bertweet, results_bertweet = run_multitask_training(
    data_path="annotated_reddit_posts.csv",
    model_name="vinai/bertweet-base", 
    output_dir="./multitask_model_bertweet"
)


print("\nBERTweet Results:")
print(f"  Sentiment Accuracy: {results_bertweet['sentiment']['accuracy']:.4f}")
print(f"  Sentiment F1 (Macro): {results_bertweet['sentiment']['f1_macro']:.4f}")
print(f"  Emotion Accuracy: {results_bertweet['emotion']['accuracy']:.4f}")
print(f"  Emotion F1 (Macro): {results_bertweet['emotion']['f1_macro']:.4f}")
print(f"  Combined Score: {results_bertweet['combined']['average_f1']:.4f}")

# Test the better performing model
better_model = "deberta" if results_deberta['combined']['average_f1'] > results_bertweet['combined']['average_f1'] else "bertweet"
print(f"\n🏆 Better performing model: {better_model}")

# Test the trained model
test_multitask_model()

In [None]:
# Cell 12: Optional - Run Hyperparameter Tuning
# Uncomment to run hyperparameter optimization for either model

# # Tune DeBERTa
# print("🔍 Starting Hyperparameter Optimization for DeBERTa...")
# optimized_model_deberta, optimized_results_deberta, study_deberta = run_hyperparameter_tuning(
#     data_path="annotated_reddit_posts.csv",
#     n_trials=15,
#     model_name="microsoft/deberta-base"
# )

# # Tune BERTweet
# print("🔍 Starting Hyperparameter Optimization for BERTweet...")
# optimized_model_bertweet, optimized_results_bertweet, study_bertweet = run_hyperparameter_tuning(
#     data_path="annotated_reddit_posts.csv",
#     n_trials=15,
#     model_name="vinai/bertweet-base"
# )

print("✅ Multitask Learning Framework Complete!")
print("""
🎯 What you can do now:
1. Use the trained models for inference
2. Compare DeBERTa vs BERTweet performance
3. Fine-tune on additional data  
4. Adjust loss weighting (alpha parameter)
5. Try different attention mechanisms
""")