In [None]:
# In[1]:

import os
import json
import time
import random
import numpy as np
import pandas as pd
from typing import Dict, List, Tuple, Optional
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import LinearLR

from transformers import (
    AutoTokenizer, 
    AutoModel,
    get_linear_schedule_with_warmup
)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
import joblib

# Hyperparameter tuning
import optuna

# Dataset loading
from datasets import load_dataset

# Set random seeds for reproducibility
def set_random_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_random_seed(42)

# GPU setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

print("All imports completed and GPU configured")

Using device: cuda
GPU: NVIDIA GeForce RTX 4060
GPU Memory: 8.0 GB
All imports completed and GPU configured


In [None]:
# In[2]:

import shutil
import os
import gc
import torch
import time

output_dir = "./initial_distilroberta_sentiment_model"
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(output_dir)

# Ensure CUDA cache is cleared
if torch.cuda.is_available():
    torch.cuda.empty_cache()
gc.collect()

20

In [None]:
# In[3]:

from dataclasses import dataclass

@dataclass
class TrainingConfig:
    model_name: str = "distilroberta-base"
    max_length: int = 128
    batch_size: int = 8  # Standardize to match BERTweet
    learning_rate: float = 2e-5
    num_epochs: int = 3
    warmup_ratio: float = 0.1
    weight_decay: float = 0.01
    max_grad_norm: float = 1.0
    hidden_dropout_prob: float = 0.1
    attention_dropout_prob: float = 0.1
    classifier_dropout: float = 0.1
    output_dir: str = "./distilroberta_model_output"
    alpha: float = 0.5  # For multitask loss weighting
    task_type: str = "multitask"  # "sentiment", "emotion", or "multitask"

class distilroBERTaModelConfig:
    def __init__(self):
        self.sentiment_classes = ['Negative', 'Neutral', 'Positive']
        self.emotion_classes = ['Anger', 'Fear', 'Joy', 'No Emotion', 'Sadness', 'Surprise']
        self.sentiment_num_classes = len(self.sentiment_classes)
        self.emotion_num_classes = len(self.emotion_classes)

roberta_model_config = distilroBERTaModelConfig()
print("Configuration classes defined")

Configuration classes defined


In [None]:
# In[4]:

class distilroBERTaModelConfig:
    def __init__(self):
        self.sentiment_classes = ['Negative', 'Neutral', 'Positive']
        self.emotion_classes = ['Anger', 'Fear', 'Joy', 'No Emotion', 'Sadness', 'Surprise']
        self.sentiment_num_classes = len(self.sentiment_classes)
        self.emotion_num_classes = len(self.emotion_classes)

roberta_model_config = distilroBERTaModelConfig()
print("Configuration classes defined")

Configuration classes defined


In [None]:
# In[5]:

class distilroBERTaSingleTaskDataset:
    def __init__(
        self,
        texts: List[str],
        labels: List[int],
        tokenizer,
        max_length: int = 128
    ):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        assert len(texts) == len(labels), "Texts and labels must have same length"
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        # RoBERTa tokenization
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long),
            'text': text
        }

class distilroBERTaMultiTaskDataset(Dataset):
    
    def __init__(
        self,
        texts: List[str],
        sentiment_labels: List[int],
        emotion_labels: List[int],
        tokenizer,
        max_length: int = 128
    ):
        self.texts = texts
        self.sentiment_labels = sentiment_labels
        self.emotion_labels = emotion_labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        assert len(texts) == len(sentiment_labels) == len(emotion_labels), \
            "All inputs must have same length"
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        sentiment_label = self.sentiment_labels[idx]
        emotion_label = self.emotion_labels[idx]
        
        # RoBERTa tokenization
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'sentiment_labels': torch.tensor(sentiment_label, dtype=torch.long),
            'emotion_labels': torch.tensor(emotion_label, dtype=torch.long),
            'text': text
        }

print("distilroBERTa Dataset classes defined")

distilroBERTa Dataset classes defined


In [None]:
# In[6]:

# Cell 4: Model Architectures
class distilroBERTaSingleTaskTransformer(nn.Module):
    
    def __init__(
        self,
        model_name: str = "distilroberta-base",
        num_classes: int = 3,
        hidden_dropout_prob: float = 0.1,
        attention_dropout_prob: float = 0.1,
        classifier_dropout: float = 0.1
    ):
        super().__init__()
        self.num_classes = num_classes
        
        # Load RoBERTa model
        self.roberta = AutoModel.from_pretrained(
            model_name,
            hidden_dropout_prob=hidden_dropout_prob,
            attention_probs_dropout_prob=attention_dropout_prob
        )
        
        # Classification head
        self.dropout = nn.Dropout(classifier_dropout)
        self.classifier = nn.Linear(self.roberta.config.hidden_size, num_classes)
    
    def forward(self, input_ids, attention_mask):
        # Get RoBERTa outputs
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        
        # Use [CLS] token representation
        pooled_output = outputs.last_hidden_state[:, 0]  # [CLS] token
        pooled_output = self.dropout(pooled_output)
        
        # Classification
        logits = self.classifier(pooled_output)
        
        return {'logits': logits}

class distilroBERTaMultiTaskTransformer(nn.Module):
    
    def __init__(
        self,
        model_name: str = "distilroberta-base",
        sentiment_num_classes: int = 3,
        emotion_num_classes: int = 6,
        hidden_dropout_prob: float = 0.1,
        attention_dropout_prob: float = 0.1,
        classifier_dropout: float = 0.1
    ):
        super().__init__()
        self.sentiment_num_classes = sentiment_num_classes
        self.emotion_num_classes = emotion_num_classes
        
        # Shared RoBERTa encoder
        self.roberta = AutoModel.from_pretrained(
            model_name,
            hidden_dropout_prob=hidden_dropout_prob,
            attention_probs_dropout_prob=attention_dropout_prob
        )
        
        # Task-specific heads
        self.dropout = nn.Dropout(classifier_dropout)
        
        # Sentiment classification head
        self.sentiment_classifier = nn.Linear(
            self.roberta.config.hidden_size, 
            sentiment_num_classes
        )
        
        # Emotion classification head
        self.emotion_classifier = nn.Linear(
            self.roberta.config.hidden_size, 
            emotion_num_classes
        )
    
    def forward(self, input_ids, attention_mask):
        # Get shared RoBERTa representations
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        
        # Use [CLS] token representation
        pooled_output = outputs.last_hidden_state[:, 0]  # [CLS] token
        pooled_output = self.dropout(pooled_output)
        
        # Task-specific predictions
        sentiment_logits = self.sentiment_classifier(pooled_output)
        emotion_logits = self.emotion_classifier(pooled_output)
        
        return {
            'sentiment_logits': sentiment_logits,
            'emotion_logits': emotion_logits
        }

print("distilroBERTa Model architectures defined")

distilroBERTa Model architectures defined


In [None]:
# In[7]:

def aggressive_memory_cleanup():
    import gc
    gc.collect()
    torch.cuda.empty_cache()

def load_and_process_datasets_roberta():
    print("Loading external datasets for distilroBERTa...")
    
    # Load SST-2 for sentiment
    try:
        sst2_dataset = load_dataset("sst2")
        print(f"SST-2 dataset loaded: {len(sst2_dataset['train'])} train samples")
    except Exception as e:
        print(f"Error loading SST-2: {e}")
        raise
    
    # Load GoEmotions for emotion
    try:
        emotions_dataset = load_dataset("go_emotions", "simplified")
        print(f"GoEmotions dataset loaded: {len(emotions_dataset['train'])} train samples")
    except Exception as e:
        print(f"Error loading GoEmotions: {e}")
        raise
    
    # Process sentiment data
    sentiment_data = process_sentiment_data_roberta(sst2_dataset)
    
    # Process emotion data  
    emotion_data = process_emotion_data_roberta(emotions_dataset)
    
    return sentiment_data, emotion_data

def load_reddit_evaluation_data():
    """Load Reddit data for evaluation"""
    print("Loading Reddit evaluation data...")
    
    try:
        # Load the annotated Reddit posts
        df = pd.read_csv('annotated_reddit_posts.csv')
        print(f"‚úÖ Reddit data loaded: {len(df)} samples")
        
        # Create label encoders that match the model classes
        sentiment_encoder = LabelEncoder()
        emotion_encoder = LabelEncoder()
        
        # Fit encoders on Reddit data
        sentiment_encoder.fit(df['sentiment'].tolist())
        emotion_encoder.fit(df['emotion'].tolist())
        
        # Transform labels
        sentiment_labels = sentiment_encoder.transform(df['sentiment'].tolist())
        emotion_labels = emotion_encoder.transform(df['emotion'].tolist())
        
        # Create Reddit data in the format expected by evaluation functions
        reddit_data = {
            # For single-task sentiment evaluation
            'sentiment': {
                'texts': df['text_content'].tolist(),
                'labels': sentiment_labels,
                'labels_text': df['sentiment'].tolist()
            },
            # For single-task emotion evaluation
            'emotion': {
                'texts': df['text_content'].tolist(),
                'labels': emotion_labels,
                'labels_text': df['emotion'].tolist()
            },
            # For multitask evaluation
            'multitask': {
                'texts': df['text_content'].tolist(),
                'sentiment_labels': sentiment_labels,
                'emotion_labels': emotion_labels,
                'sentiment_labels_text': df['sentiment'].tolist(),
                'emotion_labels_text': df['emotion'].tolist()
            },
            # Keep encoders for reference
            'sentiment_encoder': sentiment_encoder,
            'emotion_encoder': emotion_encoder
        }
        
        print(f"‚úÖ Reddit data prepared: {len(reddit_data['sentiment']['texts'])} samples")
        print(f"   Sentiment classes: {list(sentiment_encoder.classes_)}")
        print(f"   Emotion classes: {list(emotion_encoder.classes_)}")
        
        return reddit_data
        
    except Exception as e:
        print(f"‚ùå Error loading Reddit data: {e}")
        print("Falling back to empty Reddit data")
        return None

def process_sentiment_data_roberta(sst2_dataset, max_samples=10000):
    
    print("üîÑ Processing sentiment data for distilroBERTa...")
    
    # Extract texts and labels
    train_texts = sst2_dataset['train']['sentence'][:max_samples]
    train_labels = sst2_dataset['train']['label'][:max_samples]
    
    # Map SST-2 labels to 3 classes: 0->Negative, 1->Positive
    # Add some neutral examples by random assignment
    expanded_labels = []
    expanded_texts = []
    
    for text, label in zip(train_texts, train_labels):
        if label == 0:  # Negative
            expanded_labels.append(0)
            expanded_texts.append(text)
        elif label == 1:  # Positive
            # Sometimes assign as positive, sometimes as neutral
            if np.random.random() < 0.15:  # 15% chance to be neutral
                expanded_labels.append(1)  # Neutral
            else:
                expanded_labels.append(2)  # Positive
            expanded_texts.append(text)
    
    # Ensure we have all 3 classes
    if 1 not in expanded_labels:
        # Force some examples to be neutral
        neutral_indices = np.random.choice(len(expanded_labels), size=100, replace=False)
        for idx in neutral_indices:
            expanded_labels[idx] = 1
    
    # Create train/val/test splits
    train_texts, temp_texts, train_labels, temp_labels = train_test_split(
        expanded_texts, expanded_labels, test_size=0.3, random_state=42, stratify=expanded_labels
    )
    
    val_texts, test_texts, val_labels, test_labels = train_test_split(
        temp_texts, temp_labels, test_size=0.5, random_state=42, stratify=temp_labels
    )
    
    sentiment_data = {
        'train': {'texts': train_texts, 'labels': train_labels},
        'val': {'texts': val_texts, 'labels': val_labels},
        'test': {'texts': test_texts, 'labels': test_labels}
    }
    
    print(f"‚úÖ RoBERTa Sentiment data processed:")
    print(f"  Train: {len(train_texts)} samples")
    print(f"  Val: {len(val_texts)} samples")
    print(f"  Test: {len(test_texts)} samples")
    
    return sentiment_data

def process_emotion_data_roberta(emotion_dataset, max_samples=10000):
    
    print("Processing emotion data for distilroBERTa...")
    
    # Filter to first 6 emotions only
    def filter_emotions(example):
        if isinstance(example['labels'], list):
            return example['labels'] and example['labels'][0] in range(6)
        else:
            return example['labels'] in range(6)
    
    filtered_train = emotion_dataset['train'].filter(filter_emotions)
    filtered_val = emotion_dataset['validation'].filter(filter_emotions)
    
    # Extract texts and labels
    train_texts = filtered_train['text'][:max_samples]
    train_labels_raw = filtered_train['labels'][:max_samples]
    
    # Handle multi-label to single-label conversion
    train_labels = []
    for label in train_labels_raw:
        if isinstance(label, list):
            train_labels.append(label[0] if label else 0)
        else:
            train_labels.append(label)
    
    # Create train/val/test splits
    train_texts, temp_texts, train_labels, temp_labels = train_test_split(
        train_texts, train_labels, test_size=0.3, random_state=42, stratify=train_labels
    )
    
    val_texts, test_texts, val_labels, test_labels = train_test_split(
        temp_texts, temp_labels, test_size=0.5, random_state=42, stratify=temp_labels
    )
    
    emotion_data = {
        'train': {'texts': train_texts, 'labels': train_labels},
        'val': {'texts': val_texts, 'labels': val_labels},
        'test': {'texts': test_texts, 'labels': test_labels}
    }
    
    print(f"distilroBERTa Emotion data processed:")
    print(f"  Train: {len(train_texts)} samples")
    print(f"  Val: {len(val_texts)} samples")
    print(f"  Test: {len(test_texts)} samples")
    
    return emotion_data

def create_multitask_data_roberta(sentiment_data, emotion_data):
    
    print("üîÑ Creating multi-task dataset for distilroBERTa...")
    
    # Take minimum length to balance datasets
    min_train_len = min(len(sentiment_data['train']['texts']), len(emotion_data['train']['texts']))
    min_val_len = min(len(sentiment_data['val']['texts']), len(emotion_data['val']['texts']))
    min_test_len = min(len(sentiment_data['test']['texts']), len(emotion_data['test']['texts']))
    
    multitask_data = {
        'train': {
            'texts': sentiment_data['train']['texts'][:min_train_len],
            'sentiment_labels': sentiment_data['train']['labels'][:min_train_len],
            'emotion_labels': emotion_data['train']['labels'][:min_train_len]
        },
        'val': {
            'texts': sentiment_data['val']['texts'][:min_val_len],
            'sentiment_labels': sentiment_data['val']['labels'][:min_val_len],
            'emotion_labels': emotion_data['val']['labels'][:min_val_len]
        },
        'test': {
            'texts': sentiment_data['test']['texts'][:min_test_len],
            'sentiment_labels': sentiment_data['test']['labels'][:min_test_len],
            'emotion_labels': emotion_data['test']['labels'][:min_test_len]
        }
    }
    
    print(f"distilroBERTa Multi-task data created:")
    print(f"  Train: {len(multitask_data['train']['texts'])} samples")
    print(f"  Val: {len(multitask_data['val']['texts'])} samples")
    print(f"  Test: {len(multitask_data['test']['texts'])} samples")
    
    return multitask_data

print("distilroBERTa data processing functions defined!")

distilroBERTa data processing functions defined!


In [None]:
# In[8]:

class distilroBERTaSingleTaskTrainer:
    
    def __init__(self, config: TrainingConfig, num_classes: int):
        self.config = config
        self.num_classes = num_classes
        self.device = device
        
        # Initialize distilroBERTa tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        # Initialize distilroBERTa model
        self.model = distilroBERTaSingleTaskTransformer(
            model_name=config.model_name,
            num_classes=num_classes,
            hidden_dropout_prob=config.hidden_dropout_prob,
            attention_dropout_prob=config.attention_dropout_prob,
            classifier_dropout=config.classifier_dropout
        ).to(self.device)
        
        # Loss function
        self.loss_fn = nn.CrossEntropyLoss()
        
        # Initialize tracking
        self.training_history = {
            'train_loss': [],
            'train_accuracy': [],
            'val_loss': [],
            'val_accuracy': [],
            'val_f1_macro': []
        }
    
    def create_data_loaders(self, data_splits: Dict):
        
        # Create datasets
        train_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['train']['texts'],
            labels=data_splits['train']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        val_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['val']['texts'],
            labels=data_splits['val']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        # Create data loaders
        self.train_loader = DataLoader(
            train_dataset,
            batch_size=self.config.batch_size,
            shuffle=True,
            num_workers=0,
            pin_memory=True
        )
        
        self.val_loader = DataLoader(
            val_dataset,
            batch_size=self.config.batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=True
        )
        
        # Setup optimizer and scheduler
        total_steps = len(self.train_loader) * self.config.num_epochs
        
        self.optimizer = AdamW(
            self.model.parameters(),
            lr=self.config.learning_rate,
            weight_decay=self.config.weight_decay
        )
        
        self.scheduler = get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=int(total_steps * self.config.warmup_ratio),
            num_training_steps=total_steps
        )
    
    def train_epoch(self):
        self.model.train()
        
        total_loss = 0.0
        correct_predictions = 0
        total_predictions = 0
        
        for batch in self.train_loader:
            # Move to device
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            labels = batch['labels'].to(self.device)
            
            # Forward pass
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            loss = self.loss_fn(outputs['logits'], labels)
            
            # Backward pass
            self.optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.max_grad_norm)
            self.optimizer.step()
            self.scheduler.step()
            
            # Track metrics
            total_loss += loss.item()
            predictions = torch.argmax(outputs['logits'], dim=-1)
            correct_predictions += (predictions == labels).sum().item()
            total_predictions += labels.size(0)
        
        avg_loss = total_loss / len(self.train_loader)
        accuracy = correct_predictions / total_predictions
        
        return avg_loss, accuracy
    
    def evaluate(self):
        self.model.eval()
        
        total_loss = 0.0
        all_predictions = []
        all_labels = []
        
        with torch.no_grad():
            for batch in self.val_loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)
                
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                loss = self.loss_fn(outputs['logits'], labels)
                
                total_loss += loss.item()
                predictions = torch.argmax(outputs['logits'], dim=-1)
                
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        avg_loss = total_loss / len(self.val_loader)
        accuracy = accuracy_score(all_labels, all_predictions)
        f1_macro = f1_score(all_labels, all_predictions, average='macro', zero_division=0)
        
        return avg_loss, accuracy, f1_macro
    
    def train(self, data_splits: Dict):
        print(f"Starting distilroBERTa single-task training ({self.config.task_type})...")
        
        # Setup data loaders
        self.create_data_loaders(data_splits)
        
        best_f1 = 0.0
        
        for epoch in range(self.config.num_epochs):
            print(f"\nüìç Epoch {epoch + 1}/{self.config.num_epochs}")
            
            # Train
            train_loss, train_accuracy = self.train_epoch()
            
            # Evaluate
            val_loss, val_accuracy, val_f1_macro = self.evaluate()
            
            # Track metrics
            self.training_history['train_loss'].append(train_loss)
            self.training_history['train_accuracy'].append(train_accuracy)
            self.training_history['val_loss'].append(val_loss)
            self.training_history['val_accuracy'].append(val_accuracy)
            self.training_history['val_f1_macro'].append(val_f1_macro)
            
            # Print results
            print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}")
            print(f"  Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}, Val F1: {val_f1_macro:.4f}")
            
            # Save best model
            if val_f1_macro > best_f1:
                best_f1 = val_f1_macro
                self.save_model(is_best=True)
        
        print(f"\ndistilroBERTa training completed! Best F1: {best_f1:.4f}")
        return self.training_history
    
    def save_model(self, is_best=False):
        suffix = "_best" if is_best else ""
        model_dir = os.path.join(self.config.output_dir, f"model{suffix}")
        
        os.makedirs(model_dir, exist_ok=True)
        
        # Save model
        self.model.roberta.save_pretrained(model_dir)
        self.tokenizer.save_pretrained(model_dir)
        
        # Save custom components
        torch.save({
            'classifier_state_dict': self.model.classifier.state_dict(),
            'num_classes': self.num_classes,
            'config': self.config
        }, os.path.join(model_dir, 'custom_components.pt'))
        
        if is_best:
            print(f"Best distilroBERTa model saved to {model_dir}")

class distilroBERTaMultiTaskTrainer:
    
    def __init__(self, config: TrainingConfig):
        self.config = config
        self.device = device
        
        # Initialize RoBERTa tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        # Initialize RoBERTa multi-task model
        self.model = distilroBERTaMultiTaskTransformer(
            model_name=config.model_name,
            sentiment_num_classes=roberta_model_config.sentiment_num_classes,
            emotion_num_classes=roberta_model_config.emotion_num_classes,
            hidden_dropout_prob=config.hidden_dropout_prob,
            attention_dropout_prob=config.attention_dropout_prob,
            classifier_dropout=config.classifier_dropout
        ).to(self.device)
        
        # Loss function
        self.loss_fn = nn.CrossEntropyLoss()
        
        # Initialize tracking
        self.training_history = {
            'train_loss': [],
            'train_sentiment_accuracy': [],
            'train_emotion_accuracy': [],
            'val_loss': [],
            'val_sentiment_accuracy': [],
            'val_emotion_accuracy': [],
            'val_sentiment_f1_macro': [],
            'val_emotion_f1_macro': []
        }
    
    def create_data_loaders(self, data_splits: Dict):
        
        # Create datasets
        train_dataset = distilroBERTaMultiTaskDataset(
            texts=data_splits['train']['texts'],
            sentiment_labels=data_splits['train']['sentiment_labels'],
            emotion_labels=data_splits['train']['emotion_labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        val_dataset = distilroBERTaMultiTaskDataset(
            texts=data_splits['val']['texts'],
            sentiment_labels=data_splits['val']['sentiment_labels'],
            emotion_labels=data_splits['val']['emotion_labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        # Create data loaders
        self.train_loader = DataLoader(
            train_dataset,
            batch_size=self.config.batch_size,
            shuffle=True,
            num_workers=0,
            pin_memory=True
        )
        
        self.val_loader = DataLoader(
            val_dataset,
            batch_size=self.config.batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=True
        )
        
        # Setup optimizer and scheduler
        total_steps = len(self.train_loader) * self.config.num_epochs
        
        self.optimizer = AdamW(
            self.model.parameters(),
            lr=self.config.learning_rate,
            weight_decay=self.config.weight_decay
        )
        
        self.scheduler = get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=int(total_steps * self.config.warmup_ratio),
            num_training_steps=total_steps
        )
    
    def train_epoch(self):
        self.model.train()
        
        total_loss = 0.0
        sentiment_correct = 0
        emotion_correct = 0
        total_predictions = 0
        
        for batch in self.train_loader:
            # Move to device
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            sentiment_labels = batch['sentiment_labels'].to(self.device)
            emotion_labels = batch['emotion_labels'].to(self.device)
            
            # Forward pass
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            
            # Calculate losses
            sentiment_loss = self.loss_fn(outputs['sentiment_logits'], sentiment_labels)
            emotion_loss = self.loss_fn(outputs['emotion_logits'], emotion_labels)
            
            # Combined loss with alpha weighting
            loss = self.config.alpha * sentiment_loss + (1 - self.config.alpha) * emotion_loss
            
            # Backward pass
            self.optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.max_grad_norm)
            self.optimizer.step()
            self.scheduler.step()
            
            # Track metrics
            total_loss += loss.item()
            
            sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
            emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
            
            sentiment_correct += (sentiment_preds == sentiment_labels).sum().item()
            emotion_correct += (emotion_preds == emotion_labels).sum().item()
            total_predictions += sentiment_labels.size(0)
        
        avg_loss = total_loss / len(self.train_loader)
        sentiment_accuracy = sentiment_correct / total_predictions
        emotion_accuracy = emotion_correct / total_predictions
        
        return avg_loss, sentiment_accuracy, emotion_accuracy
    
    def evaluate(self):
        self.model.eval()
        
        total_loss = 0.0
        sentiment_predictions = []
        emotion_predictions = []
        sentiment_labels = []
        emotion_labels = []
        
        with torch.no_grad():
            for batch in self.val_loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                sentiment_true = batch['sentiment_labels'].to(self.device)
                emotion_true = batch['emotion_labels'].to(self.device)
                
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                
                # Calculate combined loss
                sentiment_loss = self.loss_fn(outputs['sentiment_logits'], sentiment_true)
                emotion_loss = self.loss_fn(outputs['emotion_logits'], emotion_true)
                loss = self.config.alpha * sentiment_loss + (1 - self.config.alpha) * emotion_loss
                
                total_loss += loss.item()
                
                sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
                emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
                
                sentiment_predictions.extend(sentiment_preds.cpu().numpy())
                emotion_predictions.extend(emotion_preds.cpu().numpy())
                sentiment_labels.extend(sentiment_true.cpu().numpy())
                emotion_labels.extend(emotion_true.cpu().numpy())
        
        avg_loss = total_loss / len(self.val_loader)
        
        # Calculate metrics
        sentiment_accuracy = accuracy_score(sentiment_labels, sentiment_predictions)
        emotion_accuracy = accuracy_score(emotion_labels, emotion_predictions)
        sentiment_f1_macro = f1_score(sentiment_labels, sentiment_predictions, average='macro', zero_division=0)
        emotion_f1_macro = f1_score(emotion_labels, emotion_predictions, average='macro', zero_division=0)
        
        return avg_loss, sentiment_accuracy, emotion_accuracy, sentiment_f1_macro, emotion_f1_macro
    
    def train(self, data_splits: Dict):
        print(f"Starting distilroBERTa multi-task training...")
        
        # Setup data loaders
        self.create_data_loaders(data_splits)
        
        best_combined_f1 = 0.0
        
        for epoch in range(self.config.num_epochs):
            print(f"\nEpoch {epoch + 1}/{self.config.num_epochs}")
            
            # Train
            train_loss, train_sent_acc, train_emo_acc = self.train_epoch()
            
            # Evaluate
            val_loss, val_sent_acc, val_emo_acc, val_sent_f1, val_emo_f1 = self.evaluate()
            
            # Track metrics
            self.training_history['train_loss'].append(train_loss)
            self.training_history['train_sentiment_accuracy'].append(train_sent_acc)
            self.training_history['train_emotion_accuracy'].append(train_emo_acc)
            self.training_history['val_loss'].append(val_loss)
            self.training_history['val_sentiment_accuracy'].append(val_sent_acc)
            self.training_history['val_emotion_accuracy'].append(val_emo_acc)
            self.training_history['val_sentiment_f1_macro'].append(val_sent_f1)
            self.training_history['val_emotion_f1_macro'].append(val_emo_f1)
            
            # Print results
            print(f"  Train Loss: {train_loss:.4f}")
            print(f"  Train Sentiment Acc: {train_sent_acc:.4f}, Train Emotion Acc: {train_emo_acc:.4f}")
            print(f"  Val Loss: {val_loss:.4f}")
            print(f"  Val Sentiment Acc: {val_sent_acc:.4f}, F1: {val_sent_f1:.4f}")
            print(f"  Val Emotion Acc: {val_emo_acc:.4f}, F1: {val_emo_f1:.4f}")
            
            # Save best model
            combined_f1 = (val_sent_f1 + val_emo_f1) / 2
            if combined_f1 > best_combined_f1:
                best_combined_f1 = combined_f1
                self.save_model(is_best=True)
        
        print(f"\ndistilroBERTa training completed! Best Combined F1: {best_combined_f1:.4f}")
        return self.training_history
    
    def save_model(self, is_best=False):
        suffix = "_best" if is_best else ""
        model_dir = os.path.join(self.config.output_dir, f"model{suffix}")
        
        os.makedirs(model_dir, exist_ok=True)
        
        # Save model
        self.model.roberta.save_pretrained(model_dir)
        self.tokenizer.save_pretrained(model_dir)
        
        # Save custom components
        torch.save({
            'sentiment_classifier_state_dict': self.model.sentiment_classifier.state_dict(),
            'emotion_classifier_state_dict': self.model.emotion_classifier.state_dict(),
            'sentiment_num_classes': self.model.sentiment_num_classes,
            'emotion_num_classes': self.model.emotion_num_classes,
            'config': self.config
        }, os.path.join(model_dir, 'custom_components.pt'))
        
        if is_best:
            print(f"Best distilroBERTa model saved to {model_dir}")

print("distilroBERTa Training classes defined")

distilroBERTa Training classes defined


In [None]:
# In[9]:

def evaluate_distilroberta_model(model_path: str, model_type: str, test_data: Dict, model_name: str, reddit_data: Dict = None):
    # Initialize tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    # Initialize the appropriate model architecture
    if model_type == "multitask":
        model = distilroBERTaMultiTaskTransformer(
            model_name=model_name,
            sentiment_num_classes=roberta_model_config.sentiment_num_classes,
            emotion_num_classes=roberta_model_config.emotion_num_classes
        )
    else:
        num_classes = (roberta_model_config.sentiment_num_classes 
                      if model_type == "sentiment" 
                      else roberta_model_config.emotion_num_classes)
        model = distilroBERTaSingleTaskTransformer(
            model_name=model_name,
            num_classes=num_classes
        )
    
    # Load the saved state dict
    custom_components = torch.load(os.path.join(model_path, 'custom_components.pt'))
    
    if model_type == "multitask":
        model.sentiment_classifier.load_state_dict(custom_components['sentiment_classifier_state_dict'])
        model.emotion_classifier.load_state_dict(custom_components['emotion_classifier_state_dict'])
    else:
        model.classifier.load_state_dict(custom_components['classifier_state_dict'])
    
    # Load the base model weights
    base_model = AutoModel.from_pretrained(model_path)
    model.roberta = base_model
    
    # Make sure model is on the correct device
    model = model.to(device)
    model.eval()
    
    # Evaluate on general dataset
    general_results = evaluate_on_dataset(model, model_type, test_data, tokenizer, "General Dataset")
    
    # Evaluate on Reddit dataset if available
    reddit_results = None
    if reddit_data is not None:
        reddit_results = evaluate_on_dataset(model, model_type, reddit_data, tokenizer, "Reddit Dataset")
    
    return {
        'general': general_results,
        'reddit': reddit_results
    }

def evaluate_on_dataset(model, model_type: str, data: Dict, tokenizer, dataset_name: str):
    """Evaluate model on a specific dataset"""
    print(f"Evaluating on {dataset_name}...")
    
    # Create dataset and dataloader
    if model_type == "multitask":
        dataset = distilroBERTaMultiTaskDataset(
            texts=data['texts'],
            sentiment_labels=data['sentiment_labels'],
            emotion_labels=data['emotion_labels'],
            tokenizer=tokenizer,
            max_length=128
        )
    else:
        dataset = distilroBERTaSingleTaskDataset(
            texts=data['texts'],
            labels=data['labels'],
            tokenizer=tokenizer,
            max_length=128
        )
    
    dataloader = DataLoader(
        dataset,
        batch_size=32,
        shuffle=False,
        num_workers=0,
        pin_memory=True
    )
    
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for batch in dataloader:
            # Move everything to the same device
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            if model_type == "multitask":
                sentiment_labels = batch['sentiment_labels'].to(device)
                emotion_labels = batch['emotion_labels'].to(device)
                
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                sentiment_preds = torch.argmax(outputs['sentiment_logits'], dim=-1)
                emotion_preds = torch.argmax(outputs['emotion_logits'], dim=-1)
                
                # Move predictions back to CPU for sklearn metrics
                all_predictions.extend([
                    sentiment_preds.cpu().numpy(),
                    emotion_preds.cpu().numpy()
                ])
                all_labels.extend([
                    sentiment_labels.cpu().numpy(),
                    emotion_labels.cpu().numpy()
                ])
            else:
                labels = batch['labels'].to(device)
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                predictions = torch.argmax(outputs['logits'], dim=-1)
                
                # Move predictions back to CPU for sklearn metrics
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    if model_type == "multitask":
        sentiment_accuracy = accuracy_score(all_labels[0], all_predictions[0])
        sentiment_f1 = f1_score(all_labels[0], all_predictions[0], average='macro')
        emotion_accuracy = accuracy_score(all_labels[1], all_predictions[1])
        emotion_f1 = f1_score(all_labels[1], all_predictions[1], average='macro')
        
        return {
            'sentiment_accuracy': sentiment_accuracy,
            'sentiment_f1_macro': sentiment_f1,
            'emotion_accuracy': emotion_accuracy,
            'emotion_f1_macro': emotion_f1,
            'combined_accuracy': (sentiment_accuracy + emotion_accuracy) / 2,
            'combined_f1_macro': (sentiment_f1 + emotion_f1) / 2
        }
    else:
        return {
            'accuracy': accuracy_score(all_labels, all_predictions),
            'f1_macro': f1_score(all_labels, all_predictions, average='macro')
        }
    
print("distilroBERTa evaluation functions defined")

distilroBERTa evaluation functions defined


In [None]:
# In[10]:

def create_tuning_subset(data_splits, subset_ratio=0.01):  # Even smaller: 1%
    print(f"üî™ Creating {subset_ratio*100:.0f}% subset for hyperparameter tuning...")
    
    def sample_split(split_data, ratio):
        n_samples = int(len(split_data['texts']) * ratio)
        if n_samples < 20:  # Minimum 20 samples
            n_samples = min(20, len(split_data['texts']))
        indices = np.random.choice(len(split_data['texts']), n_samples, replace=False)
        
        return {
            'texts': [split_data['texts'][i] for i in indices],
            'labels': [split_data['labels'][i] for i in indices]
        }
    
    val_key = 'val' if 'val' in data_splits else ('validation' if 'validation' in data_splits else 'test')
    
    tuning_data = {
        'train': sample_split(data_splits['train'], subset_ratio),
        'val': sample_split(data_splits[val_key], subset_ratio),
        'test': sample_split(data_splits['test'], subset_ratio) if 'test' in data_splits else sample_split(data_splits[val_key], subset_ratio)
    }
    
    print(f"üìä Tuning subset created:")
    print(f"  Train: {len(tuning_data['train']['texts'])} samples")
    print(f"  Val: {len(tuning_data['val']['texts'])} samples")
    
    return tuning_data

def create_multitask_tuning_subset(data_splits, subset_ratio=0.01):
    print(f"üî™ Creating {subset_ratio*100:.0f}% multitask subset for hyperparameter tuning...")
    
    def sample_multitask_split(split_data, ratio):
        n_samples = int(len(split_data['texts']) * ratio)
        if n_samples < 20:
            n_samples = min(20, len(split_data['texts']))
        indices = np.random.choice(len(split_data['texts']), n_samples, replace=False)
        
        return {
            'texts': [split_data['texts'][i] for i in indices],
            'sentiment_labels': [split_data['sentiment_labels'][i] for i in indices],
            'emotion_labels': [split_data['emotion_labels'][i] for i in indices]
        }
    
    val_key = 'val' if 'val' in data_splits else ('validation' if 'validation' in data_splits else 'test')
    
    tuning_data = {
        'train': sample_multitask_split(data_splits['train'], subset_ratio),
        'val': sample_multitask_split(data_splits[val_key], subset_ratio),
        'test': sample_multitask_split(data_splits['test'], subset_ratio) if 'test' in data_splits else sample_multitask_split(data_splits[val_key], subset_ratio)
    }
    
    print(f"Multitask tuning subset created:")
    print(f"  Train: {len(tuning_data['train']['texts'])} samples")
    print(f"  Val: {len(tuning_data['val']['texts'])} samples")
    
    return tuning_data

In [None]:
# In[11]:

class distilroBERTaHyperparameterTuner:
    
    def __init__(
        self,
        model_type: str,  # "sentiment", "emotion", "multitask"
        data_splits: Dict,
        n_trials: int = 15,
        model_name: str = "distilroberta-base"
    ):
        self.model_type = model_type
        self.data_splits = data_splits
        self.n_trials = n_trials
        self.model_name = model_name
        
        print(f"distilroBERTa hyperparameter tuner initialized for {model_type}")
        print(f"Using Random Search for optimization")
    
    def objective(self, trial):
        
        # Sample hyperparameters 
        learning_rate = trial.suggest_float('learning_rate', 2e-5, 1e-4, log=True)
        batch_size = trial.suggest_categorical('batch_size', [16, 32])  # Changed from [4, 8, 16]
        num_epochs = trial.suggest_int('num_epochs', 3, 6)  # Keep this the same
        warmup_ratio = trial.suggest_float('warmup_ratio', 0.1, 0.2)  # Narrowed range
        weight_decay = trial.suggest_float('weight_decay', 0.01, 0.1)  # Adjusted range
        hidden_dropout = trial.suggest_float('hidden_dropout_prob', 0.1, 0.3)  # Same range
        classifier_dropout = trial.suggest_float('classifier_dropout', 0.1, 0.3)  # Adjusted upper bound
        max_length = 128  # Fixed value instead of tuning parameter
        
        # Multi-task specific parameter
        alpha = trial.suggest_float('alpha', 0.4, 0.6) if self.model_type == "multitask" else 0.5  # Narrowed range
        
        # Create config
        config = TrainingConfig(
            model_name=self.model_name,
            learning_rate=learning_rate,
            batch_size=batch_size,
            num_epochs=num_epochs,
            warmup_ratio=warmup_ratio,
            weight_decay=weight_decay,
            hidden_dropout_prob=hidden_dropout,
            classifier_dropout=classifier_dropout,
            max_length=max_length,
            alpha=alpha,
            task_type=self.model_type,
            output_dir=f"./distilroberta_trial_{trial.number}"
        )
        
        try:
            # Clear memory
            aggressive_memory_cleanup()
            
            # Train model
            if self.model_type == "multitask":
                trainer = distilroBERTaMultiTaskTrainer(config)
                history = trainer.train(self.data_splits)
                
                # Return combined F1 score from general dataset (not Reddit)
                best_sentiment_f1 = max(history['val_sentiment_f1_macro'])
                best_emotion_f1 = max(history['val_emotion_f1_macro'])
                combined_f1 = (best_sentiment_f1 + best_emotion_f1) / 2
                
                print(f"Trial {trial.number}: Combined F1 = {combined_f1:.4f}")
                return combined_f1
                
            else:
                # Single task training
                if self.model_type == "sentiment":
                    num_classes = roberta_model_config.sentiment_num_classes
                else:  # emotion
                    num_classes = roberta_model_config.emotion_num_classes
                
                trainer = distilroBERTaSingleTaskTrainer(config, num_classes)
                history = trainer.train(self.data_splits)
                
                # Return best F1 score from general dataset (not Reddit)
                best_f1 = max(history['val_f1_macro'])
                print(f"Trial {trial.number}: F1 = {best_f1:.4f}")
                return best_f1
                
        except Exception as e:
            print(f"Trial {trial.number} failed: {e}")
            return 0.0
        
        finally:
            # Clean up
            aggressive_memory_cleanup()
    
    def tune(self):
        
        # Create study with Random Search
        study = optuna.create_study(
            direction='maximize',
            sampler=optuna.samplers.RandomSampler(seed=42)
        )
        
        print(f"\nüîç Starting hyperparameter optimization for {self.model_type}...")
        print(f"üéØ Random Search: {self.n_trials} trials")
        print("=" * 60)
        
        # Run optimization
        study.optimize(self.objective, n_trials=self.n_trials)
        
        # Print results
        print(f"\nüèÜ Optimization completed for {self.model_type}!")
        print(f"Best trial: {study.best_trial.number}")
        print(f"Best F1 score: {study.best_value:.4f}")
        print(f"Best parameters:")
        for key, value in study.best_params.items():
            print(f"  {key}: {value}")
        
        return study

print("distilroBERTa Hyperparameter Tuning classes defined")

distilroBERTa Hyperparameter Tuning classes defined


In [None]:
# In[12]:

class UltraFastdistilroBERTaSingleTaskTrainer:
    
    def __init__(self, config: TrainingConfig, num_classes: int):
        self.config = config
        self.num_classes = num_classes
        self.device = device
        
        # Initialize tokenizer (reuse if possible)
        if not hasattr(self, '_tokenizer_cache'):
            UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache = AutoTokenizer.from_pretrained(config.model_name)
            if UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache.pad_token is None:
                UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache.pad_token = UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache.eos_token
        
        self.tokenizer = UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache
        
        # Initialize model
        self.model = distilroBERTaSingleTaskTransformer(
            model_name=config.model_name,
            num_classes=num_classes,
            hidden_dropout_prob=config.hidden_dropout_prob,
            attention_dropout_prob=config.attention_dropout_prob,
            classifier_dropout=config.classifier_dropout
        ).to(self.device)
        
        self.loss_fn = nn.CrossEntropyLoss()
        self.training_history = {
            'train_loss': [], 'train_accuracy': [], 'val_loss': [], 'val_accuracy': [], 'val_f1_macro': []
        }
    
    def create_data_loaders(self, data_splits: Dict):
        train_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['train']['texts'],
            labels=data_splits['train']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        val_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['val']['texts'],
            labels=data_splits['val']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        # Speed-optimized data loaders
        self.train_loader = DataLoader(
            train_dataset,
            batch_size=self.config.batch_size,
            shuffle=True,
            num_workers=0,  # No multiprocessing for speed
            pin_memory=False  # Disable pin_memory
        )
        
        self.val_loader = DataLoader(
            val_dataset,
            batch_size=self.config.batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=False
        )
        
        # Simple optimizer setup
        self.optimizer = AdamW(
            self.model.parameters(),
            lr=self.config.learning_rate,
            weight_decay=self.config.weight_decay
        )
    
    def train_epoch(self):
        self.model.train()
        
        total_loss = 0.0
        correct_predictions = 0
        total_predictions = 0
        
        for batch_idx, batch in enumerate(self.train_loader):
            input_ids = batch['input_ids'].to(self.device, non_blocking=True)
            attention_mask = batch['attention_mask'].to(self.device, non_blocking=True)
            labels = batch['labels'].to(self.device, non_blocking=True)
            
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            loss = self.loss_fn(outputs['logits'], labels)
            
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
            # Track metrics
            total_loss += loss.item()
            predictions = torch.argmax(outputs['logits'], dim=-1)
            correct_predictions += (predictions == labels).sum().item()
            total_predictions += labels.size(0)
            
            # Print progress for very small datasets
            if batch_idx % max(1, len(self.train_loader) // 4) == 0:
                print(f"    Batch {batch_idx + 1}/{len(self.train_loader)}")
        
        return total_loss / len(self.train_loader), correct_predictions / total_predictions
    
    def evaluate(self):
        self.model.eval()
        
        total_loss = 0.0
        all_predictions = []
        all_labels = []
        
        with torch.no_grad():
            for batch in self.val_loader:
                input_ids = batch['input_ids'].to(self.device, non_blocking=True)
                attention_mask = batch['attention_mask'].to(self.device, non_blocking=True)
                labels = batch['labels'].to(self.device, non_blocking=True)
                
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                loss = self.loss_fn(outputs['logits'], labels)
                
                total_loss += loss.item()
                predictions = torch.argmax(outputs['logits'], dim=-1)
                
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        accuracy = accuracy_score(all_labels, all_predictions)
        f1_macro = f1_score(all_labels, all_predictions, average='macro', zero_division=0)
        
        return total_loss / len(self.val_loader), accuracy, f1_macro
    
    def train(self, data_splits: Dict):
        print(f"üöÄ Starting ultra-fast distilroBERTa training ({self.config.task_type})...")
        
        self.create_data_loaders(data_splits)
        
        best_f1 = 0.0
        
        for epoch in range(self.config.num_epochs):
            print(f"  üìç Epoch {epoch + 1}/{self.config.num_epochs}")
            
            train_loss, train_accuracy = self.train_epoch()
            val_loss, val_accuracy, val_f1_macro = self.evaluate()
            
            self.training_history['train_loss'].append(train_loss)
            self.training_history['train_accuracy'].append(train_accuracy)
            self.training_history['val_loss'].append(val_loss)
            self.training_history['val_accuracy'].append(val_accuracy)
            self.training_history['val_f1_macro'].append(val_f1_macro)
            
            print(f"    Loss: {train_loss:.4f}, Acc: {train_accuracy:.4f}, Val F1: {val_f1_macro:.4f}")
            
            if val_f1_macro > best_f1:
                best_f1 = val_f1_macro
        
        print(f"‚úÖ Training completed! Best F1: {best_f1:.4f}")
        return self.training_history

print("Ultra-fast trainers defined")

Ultra-fast trainers defined


In [None]:
# In[12]:

class UltraFastdistilroBERTaSingleTaskTrainer:
    
    def __init__(self, config: TrainingConfig, num_classes: int):
        self.config = config
        self.num_classes = num_classes
        self.device = device
        
        # Initialize tokenizer (reuse if possible)
        if not hasattr(self, '_tokenizer_cache'):
            UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache = AutoTokenizer.from_pretrained(config.model_name)
            if UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache.pad_token is None:
                UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache.pad_token = UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache.eos_token
        
        self.tokenizer = UltraFastdistilroBERTaSingleTaskTrainer._tokenizer_cache
        
        # Initialize model
        self.model = distilroBERTaSingleTaskTransformer(
            model_name=config.model_name,
            num_classes=num_classes,
            hidden_dropout_prob=config.hidden_dropout_prob,
            attention_dropout_prob=config.attention_dropout_prob,
            classifier_dropout=config.classifier_dropout
        ).to(self.device)
        
        self.loss_fn = nn.CrossEntropyLoss()
        self.training_history = {
            'train_loss': [], 'train_accuracy': [], 'val_loss': [], 'val_accuracy': [], 'val_f1_macro': []
        }
    
    def create_data_loaders(self, data_splits: Dict):
        train_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['train']['texts'],
            labels=data_splits['train']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        val_dataset = distilroBERTaSingleTaskDataset(
            texts=data_splits['val']['texts'],
            labels=data_splits['val']['labels'],
            tokenizer=self.tokenizer,
            max_length=self.config.max_length
        )
        
        # Speed-optimized data loaders
        self.train_loader = DataLoader(
            train_dataset,
            batch_size=self.config.batch_size,
            shuffle=True,
            num_workers=0,  # No multiprocessing for speed
            pin_memory=False  # Disable pin_memory
        )
        
        self.val_loader = DataLoader(
            val_dataset,
            batch_size=self.config.batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=False
        )
        
        # Simple optimizer setup
        self.optimizer = AdamW(
            self.model.parameters(),
            lr=self.config.learning_rate,
            weight_decay=self.config.weight_decay
        )
    
    def train_epoch(self):
        self.model.train()
        
        total_loss = 0.0
        correct_predictions = 0
        total_predictions = 0
        
        for batch_idx, batch in enumerate(self.train_loader):
            input_ids = batch['input_ids'].to(self.device, non_blocking=True)
            attention_mask = batch['attention_mask'].to(self.device, non_blocking=True)
            labels = batch['labels'].to(self.device, non_blocking=True)
            
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            loss = self.loss_fn(outputs['logits'], labels)
            
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
            # Track metrics
            total_loss += loss.item()
            predictions = torch.argmax(outputs['logits'], dim=-1)
            correct_predictions += (predictions == labels).sum().item()
            total_predictions += labels.size(0)
            
            # Print progress for very small datasets
            if batch_idx % max(1, len(self.train_loader) // 4) == 0:
                print(f"    Batch {batch_idx + 1}/{len(self.train_loader)}")
        
        return total_loss / len(self.train_loader), correct_predictions / total_predictions
    
    def evaluate(self):
        self.model.eval()
        
        total_loss = 0.0
        all_predictions = []
        all_labels = []
        
        with torch.no_grad():
            for batch in self.val_loader:
                input_ids = batch['input_ids'].to(self.device, non_blocking=True)
                attention_mask = batch['attention_mask'].to(self.device, non_blocking=True)
                labels = batch['labels'].to(self.device, non_blocking=True)
                
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                loss = self.loss_fn(outputs['logits'], labels)
                
                total_loss += loss.item()
                predictions = torch.argmax(outputs['logits'], dim=-1)
                
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        accuracy = accuracy_score(all_labels, all_predictions)
        f1_macro = f1_score(all_labels, all_predictions, average='macro', zero_division=0)
        
        return total_loss / len(self.val_loader), accuracy, f1_macro
    
    def train(self, data_splits: Dict):
        print(f"üöÄ Starting ultra-fast distilroBERTa training ({self.config.task_type})...")
        
        self.create_data_loaders(data_splits)
        
        best_f1 = 0.0
        
        for epoch in range(self.config.num_epochs):
            print(f"  üìç Epoch {epoch + 1}/{self.config.num_epochs}")
            
            train_loss, train_accuracy = self.train_epoch()
            val_loss, val_accuracy, val_f1_macro = self.evaluate()
            
            self.training_history['train_loss'].append(train_loss)
            self.training_history['train_accuracy'].append(train_accuracy)
            self.training_history['val_loss'].append(val_loss)
            self.training_history['val_accuracy'].append(val_accuracy)
            self.training_history['val_f1_macro'].append(val_f1_macro)
            
            print(f"    Loss: {train_loss:.4f}, Acc: {train_accuracy:.4f}, Val F1: {val_f1_macro:.4f}")
            
            if val_f1_macro > best_f1:
                best_f1 = val_f1_macro
        
        print(f"‚úÖ Training completed! Best F1: {best_f1:.4f}")
        return self.training_history

print("Ultra-fast trainers defined")

Ultra-fast trainers defined


In [None]:
# In[13]:

print("STARTING distilroBERTa TRAINING PIPELINE")
print("=" * 80)

# Clear memory before starting
aggressive_memory_cleanup()

# Load and process datasets for distilroBERTa
print("\nLoading and processing datasets for distilroBERTa...")
sentiment_data, emotion_data = load_and_process_datasets_roberta()
multitask_data = create_multitask_data_roberta(sentiment_data, emotion_data)

# Load Reddit evaluation data
print("\nLoading Reddit evaluation data...")
reddit_data = load_reddit_evaluation_data()

# Model configurations
model_name = "distilroberta-base"
n_trials = 15  # Number of hyperparameter tuning trials

print("Data loading completed!")
print(f"Sentiment data: {len(sentiment_data['train']['texts'])} train samples")
print(f"Emotion data: {len(emotion_data['train']['texts'])} train samples")
print(f"Multitask data: {len(multitask_data['train']['texts'])} train samples")
if reddit_data:
    print(f"Reddit data: {len(reddit_data['sentiment']['texts'])} evaluation samples")
print(f"Model: {model_name}")
print(f"Hyperparameter trials per model: {n_trials}")

STARTING distilroBERTa TRAINING PIPELINE

Loading and processing datasets for distilroBERTa...
Loading external datasets for distilroBERTa...
SST-2 dataset loaded: 67349 train samples
GoEmotions dataset loaded: 43410 train samples
üîÑ Processing sentiment data for distilroBERTa...
‚úÖ RoBERTa Sentiment data processed:
  Train: 7000 samples
  Val: 1500 samples
  Test: 1500 samples
Processing emotion data for distilroBERTa...
distilroBERTa Emotion data processed:
  Train: 7000 samples
  Val: 1500 samples
  Test: 1500 samples
üîÑ Creating multi-task dataset for distilroBERTa...
distilroBERTa Multi-task data created:
  Train: 7000 samples
  Val: 1500 samples
  Test: 1500 samples

Loading Reddit evaluation data...
Loading Reddit evaluation data...
‚úÖ Reddit data loaded: 95 samples
‚úÖ Reddit data prepared: 95 samples
   Sentiment classes: ['Negative', 'Neutral', 'Positive']
   Emotion classes: ['Anger', 'Fear', 'Joy', 'No Emotion', 'Sadness', 'Surprise']
Data loading completed!
Sentiment

In [None]:
# In[ ]:

# Cell 9: Initial Sentiment Model Training
print("\n" + "="*80)
print("PHASE 1: INITIAL DISTILROBERTA TRAINING - SENTIMENT MODEL")
print("="*80)

# Initialize results dictionary
all_results = {}

# Default configuration for sentiment
default_config_sentiment = TrainingConfig(
    model_name=model_name,
    batch_size=8,
    learning_rate=2e-5,
    num_epochs=3,
    max_length=128,
    task_type="sentiment",
    output_dir="./initial_distilroberta_sentiment_model"
)

print("\n2Ô∏è‚É£ Training Initial distilroBERTa Sentiment Model...")
print("="*60)

# Train initial sentiment model
initial_sentiment_trainer = distilroBERTaSingleTaskTrainer(
    config=default_config_sentiment,
    num_classes=roberta_model_config.sentiment_num_classes
)
initial_sentiment_history = initial_sentiment_trainer.train(sentiment_data)

# Evaluate initial sentiment model on both general and Reddit datasets
initial_sentiment_results = evaluate_distilroberta_model(
    model_path="./initial_distilroberta_sentiment_model/model_best",
    model_type="sentiment",
    test_data=sentiment_data['test'],
    model_name=model_name,
    reddit_data=reddit_data['sentiment'] if reddit_data else None
)

# Store results
all_results['initial_sentiment'] = initial_sentiment_results

print(f"\n‚úÖ Initial Sentiment Model Results:")
print(f"  General Dataset:")
print(f"    Accuracy: {initial_sentiment_results['general']['accuracy']:.4f}")
print(f"    F1 Macro: {initial_sentiment_results['general']['f1_macro']:.4f}")
if initial_sentiment_results['reddit']:
    print(f"  Reddit Dataset:")
    print(f"    Accuracy: {initial_sentiment_results['reddit']['accuracy']:.4f}")
    print(f"    F1 Macro: {initial_sentiment_results['reddit']['f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()


PHASE 1: INITIAL DISTILROBERTA TRAINING - SENTIMENT MODEL

2Ô∏è‚É£ Training Initial distilroBERTa Sentiment Model...
Starting distilroBERTa single-task training (sentiment)...

üìç Epoch 1/3
  Train Loss: 0.6889, Train Acc: 0.7306
  Val Loss: 0.5309, Val Acc: 0.8127, Val F1: 0.5669
Best distilroBERTa model saved to ./initial_distilroberta_sentiment_model\model_best

üìç Epoch 2/3
  Train Loss: 0.4799, Train Acc: 0.8426
  Val Loss: 0.5704, Val Acc: 0.8273, Val F1: 0.5772
Best distilroBERTa model saved to ./initial_distilroberta_sentiment_model\model_best

üìç Epoch 3/3
  Train Loss: 0.4000, Train Acc: 0.8766
  Val Loss: 0.6107, Val Acc: 0.8327, Val F1: 0.5811
Best distilroBERTa model saved to ./initial_distilroberta_sentiment_model\model_best

distilroBERTa training completed! Best F1: 0.5811
Evaluating on General Dataset...
Evaluating on Reddit Dataset...

‚úÖ Initial Sentiment Model Results:
  General Dataset:
    Accuracy: 0.8333
    F1 Macro: 0.5812
  Reddit Dataset:
    Accurac

In [None]:
# In[ ]:

print("\n" + "="*80)
print("üìç PHASE 1: INITIAL DISTILROBERTA TRAINING - EMOTION MODEL")
print("="*80)

# Default configuration for emotion
default_config_emotion = TrainingConfig(
    model_name=model_name,
    batch_size=8,
    learning_rate=2e-5,
    num_epochs=3,
    max_length=128,
    task_type="emotion",
    output_dir="./initial_distilroberta_emotion_model"
)

print("\nTraining Initial distilroBERTa Emotion Model...")
print("="*60)

# Train initial emotion model
initial_emotion_trainer = distilroBERTaSingleTaskTrainer(
    config=default_config_emotion,
    num_classes=roberta_model_config.emotion_num_classes
)
initial_emotion_history = initial_emotion_trainer.train(emotion_data)

# Evaluate initial emotion model on both general and Reddit datasets
initial_emotion_results = evaluate_distilroberta_model(
    model_path="./initial_distilroberta_emotion_model/model_best",
    model_type="emotion",
    test_data=emotion_data['test'],
    model_name=model_name,
    reddit_data=reddit_data['emotion'] if reddit_data else None
)
all_results['initial_emotion'] = initial_emotion_results

print(f"\n‚úÖ Initial Emotion Model Results:")
print(f"  General Dataset:")
print(f"    Accuracy: {initial_emotion_results['general']['accuracy']:.4f}")
print(f"    F1 Macro: {initial_emotion_results['general']['f1_macro']:.4f}")
if initial_emotion_results['reddit']:
    print(f"  Reddit Dataset:")
    print(f"    Accuracy: {initial_emotion_results['reddit']['accuracy']:.4f}")
    print(f"    F1 Macro: {initial_emotion_results['reddit']['f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()


üìç PHASE 1: INITIAL DISTILROBERTA TRAINING - EMOTION MODEL

Training Initial distilroBERTa Emotion Model...
Starting distilroBERTa single-task training (emotion)...

üìç Epoch 1/3
  Train Loss: 1.0259, Train Acc: 0.5974
  Val Loss: 0.7381, Val Acc: 0.7313, Val F1: 0.6966
Best distilroBERTa model saved to ./initial_distilroberta_emotion_model\model_best

üìç Epoch 2/3
  Train Loss: 0.6113, Train Acc: 0.7810
  Val Loss: 0.7366, Val Acc: 0.7473, Val F1: 0.7110
Best distilroBERTa model saved to ./initial_distilroberta_emotion_model\model_best

üìç Epoch 3/3
  Train Loss: 0.4374, Train Acc: 0.8453
  Val Loss: 0.7902, Val Acc: 0.7600, Val F1: 0.7301
Best distilroBERTa model saved to ./initial_distilroberta_emotion_model\model_best

distilroBERTa training completed! Best F1: 0.7301
Evaluating on General Dataset...
Evaluating on Reddit Dataset...

‚úÖ Initial Emotion Model Results:
  General Dataset:
    Accuracy: 0.7593
    F1 Macro: 0.7211
  Reddit Dataset:
    Accuracy: 0.1368
    F1 

In [None]:
# In[ ]:

print("\n" + "="*80)
print("üìç PHASE 1: INITIAL DISTILROBERTA TRAINING - MULTITASK MODEL")
print("="*80)

# Default configuration for multitask
default_config_multitask = TrainingConfig(
    model_name=model_name,
    batch_size=8,
    learning_rate=2e-5,
    num_epochs=3,
    max_length=128,
    alpha=0.5,
    task_type="multitask",
    output_dir="./initial_distilroberta_multitask_model"
)

print("\n4Ô∏è‚É£ Training Initial distilroBERTa Multi-task Model...")
print("="*60)

# Train initial multitask model
initial_multitask_trainer = distilroBERTaMultiTaskTrainer(config=default_config_multitask)
initial_multitask_history = initial_multitask_trainer.train(multitask_data)

# Evaluate initial multitask model on both general and Reddit datasets
initial_multitask_results = evaluate_distilroberta_model(
    model_path="./initial_distilroberta_multitask_model/model_best",
    model_type="multitask",
    test_data=multitask_data['test'],
    model_name=model_name,
    reddit_data=reddit_data['multitask'] if reddit_data else None
)
all_results['initial_multitask'] = initial_multitask_results

print(f"\n‚úÖ Initial Multitask Model Results:")
print(f"  General Dataset:")
print(f"    Sentiment - Accuracy: {initial_multitask_results['general']['sentiment_accuracy']:.4f}, F1: {initial_multitask_results['general']['sentiment_f1_macro']:.4f}")
print(f"    Emotion - Accuracy: {initial_multitask_results['general']['emotion_accuracy']:.4f}, F1: {initial_multitask_results['general']['emotion_f1_macro']:.4f}")
print(f"    Combined - Accuracy: {initial_multitask_results['general']['combined_accuracy']:.4f}, F1: {initial_multitask_results['general']['combined_f1_macro']:.4f}")
if initial_multitask_results['reddit']:
    print(f"  Reddit Dataset:")
    print(f"    Sentiment - Accuracy: {initial_multitask_results['reddit']['sentiment_accuracy']:.4f}, F1: {initial_multitask_results['reddit']['sentiment_f1_macro']:.4f}")
    print(f"    Emotion - Accuracy: {initial_multitask_results['reddit']['emotion_accuracy']:.4f}, F1: {initial_multitask_results['reddit']['emotion_f1_macro']:.4f}")
    print(f"    Combined - Accuracy: {initial_multitask_results['reddit']['combined_accuracy']:.4f}, F1: {initial_multitask_results['reddit']['combined_f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()


üìç PHASE 1: INITIAL DISTILROBERTA TRAINING - MULTITASK MODEL

4Ô∏è‚É£ Training Initial distilroBERTa Multi-task Model...
Starting distilroBERTa multi-task training...

Epoch 1/3
  Train Loss: 1.2160
  Train Sentiment Acc: 0.6973, Train Emotion Acc: 0.2806
  Val Loss: 1.0991
  Val Sentiment Acc: 0.8227, F1: 0.5739
  Val Emotion Acc: 0.3020, F1: 0.0773
Best distilroBERTa model saved to ./initial_distilroberta_multitask_model\model_best

Epoch 2/3
  Train Loss: 1.0836
  Train Sentiment Acc: 0.8416, Train Emotion Acc: 0.2963
  Val Loss: 1.0987
  Val Sentiment Acc: 0.8260, F1: 0.5761
  Val Emotion Acc: 0.2987, F1: 0.0859
Best distilroBERTa model saved to ./initial_distilroberta_multitask_model\model_best

Epoch 3/3
  Train Loss: 1.0172
  Train Sentiment Acc: 0.8769, Train Emotion Acc: 0.3074
  Val Loss: 1.1242
  Val Sentiment Acc: 0.8280, F1: 0.5778
  Val Emotion Acc: 0.2960, F1: 0.1044
Best distilroBERTa model saved to ./initial_distilroberta_multitask_model\model_best

distilroBERTa tr

In [None]:
# In[ ]:

print("\n" + "="*80)
print("üìç INITIAL distilroBERTa RESULTS SUMMARY")
print("="*80)

print(f"\nüìä INITIAL distilroBERTa MODEL PERFORMANCE:")
print(f"  Sentiment Model:")
print(f"    General Dataset - Accuracy: {initial_sentiment_results['general']['accuracy']:.4f}, F1: {initial_sentiment_results['general']['f1_macro']:.4f}")
if initial_sentiment_results.get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {initial_sentiment_results['reddit']['accuracy']:.4f}, F1: {initial_sentiment_results['reddit']['f1_macro']:.4f}")

print(f"\n  Emotion Model:")
print(f"    General Dataset - Accuracy: {initial_emotion_results['general']['accuracy']:.4f}, F1: {initial_emotion_results['general']['f1_macro']:.4f}")
if initial_emotion_results.get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {initial_emotion_results['reddit']['accuracy']:.4f}, F1: {initial_emotion_results['reddit']['f1_macro']:.4f}")

print(f"\n  Multitask Model:")
print(f"    General Dataset:")
print(f"      Sentiment - Accuracy: {initial_multitask_results['general']['sentiment_accuracy']:.4f}, F1: {initial_multitask_results['general']['sentiment_f1_macro']:.4f}")
print(f"      Emotion - Accuracy: {initial_multitask_results['general']['emotion_accuracy']:.4f}, F1: {initial_multitask_results['general']['emotion_f1_macro']:.4f}")
print(f"      Combined - Accuracy: {initial_multitask_results['general']['combined_accuracy']:.4f}, F1: {initial_multitask_results['general']['combined_f1_macro']:.4f}")
if initial_multitask_results.get('reddit'):
    print(f"    Reddit Dataset:")
    print(f"      Sentiment - Accuracy: {initial_multitask_results['reddit']['sentiment_accuracy']:.4f}, F1: {initial_multitask_results['reddit']['sentiment_f1_macro']:.4f}")
    print(f"      Emotion - Accuracy: {initial_multitask_results['reddit']['emotion_accuracy']:.4f}, F1: {initial_multitask_results['reddit']['emotion_f1_macro']:.4f}")
    print(f"      Combined - Accuracy: {initial_multitask_results['reddit']['combined_accuracy']:.4f}, F1: {initial_multitask_results['reddit']['combined_f1_macro']:.4f}")

# Store results for later comparison
all_results = {
    'initial_sentiment': initial_sentiment_results,
    'initial_emotion': initial_emotion_results,
    'initial_multitask': initial_multitask_results
}

print(f"\nüí° These are distilroBERTa baseline results. Now proceeding to hyperparameter tuning!")


üìç INITIAL distilroBERTa RESULTS SUMMARY

üìä INITIAL distilroBERTa MODEL PERFORMANCE:
  Sentiment Model:
    General Dataset - Accuracy: 0.8333, F1: 0.5812
    Reddit Dataset - Accuracy: 0.5474, F1: 0.2841

  Emotion Model:
    General Dataset - Accuracy: 0.7593, F1: 0.7211
    Reddit Dataset - Accuracy: 0.1368, F1: 0.0717

  Multitask Model:
    General Dataset:
      Sentiment - Accuracy: 0.7500, F1: 0.5153
      Emotion - Accuracy: 0.4375, F1: 0.1014
      Combined - Accuracy: 0.5938, F1: 0.3084
    Reddit Dataset:
      Sentiment - Accuracy: 0.6250, F1: 0.2614
      Emotion - Accuracy: 0.3125, F1: 0.1067
      Combined - Accuracy: 0.4688, F1: 0.1841

üí° These are distilroBERTa baseline results. Now proceeding to hyperparameter tuning!


In [None]:
# In[ ]:

print("\n" + "="*80)
print("üìç PHASE 2: HYPERPARAMETER TUNING - SENTIMENT")
print("="*80)

print("\n6Ô∏è‚É£ Hyperparameter Tuning for distilroBERTa Sentiment Model...")
print("="*60)

# Create tuner for sentiment
sentiment_tuner = distilroBERTaHyperparameterTuner(
    model_type="sentiment",
    data_splits=sentiment_data,
    n_trials=5,
    model_name=model_name
)
sentiment_study = sentiment_tuner.tune()

[I 2025-08-12 12:43:26,655] A new study created in memory with name: no-name-d84629e3-d252-473c-8250-b5cad7c4d515



üìç PHASE 2: HYPERPARAMETER TUNING - SENTIMENT

6Ô∏è‚É£ Hyperparameter Tuning for distilroBERTa Sentiment Model...
distilroBERTa hyperparameter tuner initialized for sentiment
Using Random Search for optimization

üîç Starting hyperparameter optimization for sentiment...
üéØ Random Search: 5 trials
Starting distilroBERTa single-task training (sentiment)...

üìç Epoch 1/5
  Train Loss: 0.7439, Train Acc: 0.6879
  Val Loss: 0.5852, Val Acc: 0.7800, Val F1: 0.5445
Best distilroBERTa model saved to ./distilroberta_trial_0\model_best

üìç Epoch 2/5
  Train Loss: 0.4983, Train Acc: 0.8301
  Val Loss: 0.5147, Val Acc: 0.8173, Val F1: 0.5698
Best distilroBERTa model saved to ./distilroberta_trial_0\model_best

üìç Epoch 3/5
  Train Loss: 0.3907, Train Acc: 0.8716
  Val Loss: 0.5497, Val Acc: 0.8233, Val F1: 0.5736
Best distilroBERTa model saved to ./distilroberta_trial_0\model_best

üìç Epoch 4/5
  Train Loss: 0.3117, Train Acc: 0.8933
  Val Loss: 0.6296, Val Acc: 0.8320, Val F1: 0.580

[I 2025-08-12 12:50:46,294] Trial 0 finished with value: 0.5855951746401464 and parameters: {'learning_rate': 3.65445235521325e-05, 'batch_size': 16, 'num_epochs': 5, 'warmup_ratio': 0.11560186404424366, 'weight_decay': 0.02403950683025824, 'hidden_dropout_prob': 0.1116167224336399, 'classifier_dropout': 0.273235229154987}. Best is trial 0 with value: 0.5855951746401464.


Starting distilroBERTa single-task training (sentiment)...

üìç Epoch 1/6
  Train Loss: 0.7399, Train Acc: 0.6867
  Val Loss: 0.5359, Val Acc: 0.8067, Val F1: 0.5630
Best distilroBERTa model saved to ./distilroberta_trial_1\model_best

üìç Epoch 2/6
  Train Loss: 0.5456, Train Acc: 0.8121
  Val Loss: 0.9844, Val Acc: 0.7500, Val F1: 0.5191

üìç Epoch 3/6
  Train Loss: 0.4262, Train Acc: 0.8630
  Val Loss: 0.6071, Val Acc: 0.8207, Val F1: 0.5724
Best distilroBERTa model saved to ./distilroberta_trial_1\model_best

üìç Epoch 4/6
  Train Loss: 0.3540, Train Acc: 0.8854
  Val Loss: 0.6633, Val Acc: 0.8293, Val F1: 0.5784
Best distilroBERTa model saved to ./distilroberta_trial_1\model_best

üìç Epoch 5/6
  Train Loss: 0.3024, Train Acc: 0.8993
  Val Loss: 0.7458, Val Acc: 0.8227, Val F1: 0.5738

üìç Epoch 6/6
  Train Loss: 0.2600, Train Acc: 0.9084
  Val Loss: 0.7806, Val Acc: 0.8167, Val F1: 0.5807


[I 2025-08-12 12:59:31,122] Trial 1 finished with value: 0.5806643674908627 and parameters: {'learning_rate': 5.262490902114904e-05, 'batch_size': 16, 'num_epochs': 6, 'warmup_ratio': 0.18324426408004219, 'weight_decay': 0.029110519961044856, 'hidden_dropout_prob': 0.1363649934414201, 'classifier_dropout': 0.13668090197068677}. Best is trial 0 with value: 0.5855951746401464.


Best distilroBERTa model saved to ./distilroberta_trial_1\model_best

distilroBERTa training completed! Best F1: 0.5807
Trial 1: F1 = 0.5807
Starting distilroBERTa single-task training (sentiment)...

üìç Epoch 1/4
  Train Loss: 0.7661, Train Acc: 0.6771
  Val Loss: 0.6164, Val Acc: 0.8020, Val F1: 0.5587
Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

üìç Epoch 2/4
  Train Loss: 0.5315, Train Acc: 0.8170
  Val Loss: 0.5395, Val Acc: 0.8120, Val F1: 0.5658
Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

üìç Epoch 3/4
  Train Loss: 0.4271, Train Acc: 0.8553
  Val Loss: 0.6255, Val Acc: 0.8147, Val F1: 0.5681
Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

üìç Epoch 4/4
  Train Loss: 0.3532, Train Acc: 0.8787
  Val Loss: 0.6113, Val Acc: 0.8160, Val F1: 0.5691


[I 2025-08-12 13:04:52,455] Trial 2 finished with value: 0.5691273080660836 and parameters: {'learning_rate': 3.2635193912846855e-05, 'batch_size': 16, 'num_epochs': 4, 'warmup_ratio': 0.16118528947223795, 'weight_decay': 0.022554447458683766, 'hidden_dropout_prob': 0.15842892970704364, 'classifier_dropout': 0.17327236865873835}. Best is trial 0 with value: 0.5855951746401464.


Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

distilroBERTa training completed! Best F1: 0.5691
Trial 2: F1 = 0.5691
Starting distilroBERTa single-task training (sentiment)...

üìç Epoch 1/5
  Train Loss: 0.7509, Train Acc: 0.6826
  Val Loss: 0.5549, Val Acc: 0.7987, Val F1: 0.5568
Best distilroBERTa model saved to ./distilroberta_trial_3\model_best

üìç Epoch 2/5
  Train Loss: 0.5633, Train Acc: 0.7991
  Val Loss: 0.5537, Val Acc: 0.8040, Val F1: 0.5615
Best distilroBERTa model saved to ./distilroberta_trial_3\model_best

üìç Epoch 3/5
  Train Loss: 0.4671, Train Acc: 0.8411
  Val Loss: 0.5817, Val Acc: 0.8313, Val F1: 0.5801
Best distilroBERTa model saved to ./distilroberta_trial_3\model_best

üìç Epoch 4/5
  Train Loss: 0.4049, Train Acc: 0.8633
  Val Loss: 0.6469, Val Acc: 0.8213, Val F1: 0.5736

üìç Epoch 5/5
  Train Loss: 0.3681, Train Acc: 0.8733
  Val Loss: 0.6281, Val Acc: 0.8247, Val F1: 0.5755

distilroBERTa training completed! Best F1: 0.5801
Tr

[I 2025-08-12 13:09:50,320] Trial 3 finished with value: 0.5800826875346377 and parameters: {'learning_rate': 4.166863122305896e-05, 'batch_size': 16, 'num_epochs': 5, 'warmup_ratio': 0.15924145688620425, 'weight_decay': 0.014180537144799797, 'hidden_dropout_prob': 0.22150897038028766, 'classifier_dropout': 0.1341048247374583}. Best is trial 0 with value: 0.5855951746401464.


Starting distilroBERTa single-task training (sentiment)...

üìç Epoch 1/6
  Train Loss: 0.8615, Train Acc: 0.5690
  Val Loss: 0.5654, Val Acc: 0.8027, Val F1: 0.5591
Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

üìç Epoch 2/6
  Train Loss: 0.5895, Train Acc: 0.7833
  Val Loss: 0.5260, Val Acc: 0.8180, Val F1: 0.5704
Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

üìç Epoch 3/6
  Train Loss: 0.5234, Train Acc: 0.8067
  Val Loss: 0.5082, Val Acc: 0.8193, Val F1: 0.5714
Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

üìç Epoch 4/6
  Train Loss: 0.4782, Train Acc: 0.8313
  Val Loss: 0.5179, Val Acc: 0.8293, Val F1: 0.5786
Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

üìç Epoch 5/6
  Train Loss: 0.4435, Train Acc: 0.8429
  Val Loss: 0.5165, Val Acc: 0.8267, Val F1: 0.5772

üìç Epoch 6/6


[I 2025-08-12 13:20:27,219] Trial 4 finished with value: 0.5786103330220977 and parameters: {'learning_rate': 2.2207471217033647e-05, 'batch_size': 32, 'num_epochs': 6, 'warmup_ratio': 0.13046137691733709, 'weight_decay': 0.018790490260574548, 'hidden_dropout_prob': 0.23684660530243137, 'classifier_dropout': 0.18803049874792027}. Best is trial 0 with value: 0.5855951746401464.


  Train Loss: 0.4227, Train Acc: 0.8511
  Val Loss: 0.5189, Val Acc: 0.8287, Val F1: 0.5783

distilroBERTa training completed! Best F1: 0.5786
Trial 4: F1 = 0.5786

üèÜ Optimization completed for sentiment!
Best trial: 0
Best F1 score: 0.5856
Best parameters:
  learning_rate: 3.65445235521325e-05
  batch_size: 16
  num_epochs: 5
  warmup_ratio: 0.11560186404424366
  weight_decay: 0.02403950683025824
  hidden_dropout_prob: 0.1116167224336399
  classifier_dropout: 0.273235229154987


In [None]:
# In[23]:

# Cell 13: Hyperparameter Tuning - Emotion
print("\n" + "="*80)
print("üìç PHASE 2: HYPERPARAMETER TUNING - EMOTION")
print("="*80)

print("\n7Ô∏è‚É£ Hyperparameter Tuning for distilroBERTa Emotion Mo cdel...")
print("="*60)

# Create tuner for emotion
emotion_tuner = distilroBERTaHyperparameterTuner(
    model_type="emotion",
    data_splits=emotion_data,
    n_trials=5,
    model_name=model_name
)
emotion_study = emotion_tuner.tune()

[I 2025-08-12 13:20:27,240] A new study created in memory with name: no-name-33f46561-b71b-4be8-abe2-1ef452c215da



üìç PHASE 2: HYPERPARAMETER TUNING - EMOTION

7Ô∏è‚É£ Hyperparameter Tuning for distilroBERTa Emotion Mo cdel...
distilroBERTa hyperparameter tuner initialized for emotion
Using Random Search for optimization

üîç Starting hyperparameter optimization for emotion...
üéØ Random Search: 5 trials
Starting distilroBERTa single-task training (emotion)...

üìç Epoch 1/5
  Train Loss: 1.1190, Train Acc: 0.5633
  Val Loss: 0.7822, Val Acc: 0.7073, Val F1: 0.6354
Best distilroBERTa model saved to ./distilroberta_trial_0\model_best

üìç Epoch 2/5
  Train Loss: 0.6473, Train Acc: 0.7630
  Val Loss: 0.7377, Val Acc: 0.7507, Val F1: 0.7227
Best distilroBERTa model saved to ./distilroberta_trial_0\model_best

üìç Epoch 3/5
  Train Loss: 0.4602, Train Acc: 0.8341
  Val Loss: 0.8189, Val Acc: 0.7473, Val F1: 0.7112

üìç Epoch 4/5
  Train Loss: 0.3166, Train Acc: 0.8906
  Val Loss: 0.8873, Val Acc: 0.7447, Val F1: 0.7135

üìç Epoch 5/5
  Train Loss: 0.2081, Train Acc: 0.9300
  Val Loss: 0.9370,

[I 2025-08-12 13:24:46,762] Trial 0 finished with value: 0.7266554701446585 and parameters: {'learning_rate': 3.65445235521325e-05, 'batch_size': 16, 'num_epochs': 5, 'warmup_ratio': 0.11560186404424366, 'weight_decay': 0.02403950683025824, 'hidden_dropout_prob': 0.1116167224336399, 'classifier_dropout': 0.273235229154987}. Best is trial 0 with value: 0.7266554701446585.


Best distilroBERTa model saved to ./distilroberta_trial_0\model_best

distilroBERTa training completed! Best F1: 0.7267
Trial 0: F1 = 0.7267
Starting distilroBERTa single-task training (emotion)...

üìç Epoch 1/6
  Train Loss: 1.1750, Train Acc: 0.5414
  Val Loss: 0.7865, Val Acc: 0.7207, Val F1: 0.6821
Best distilroBERTa model saved to ./distilroberta_trial_1\model_best

üìç Epoch 2/6
  Train Loss: 0.7221, Train Acc: 0.7403
  Val Loss: 0.7405, Val Acc: 0.7487, Val F1: 0.7120
Best distilroBERTa model saved to ./distilroberta_trial_1\model_best

üìç Epoch 3/6
  Train Loss: 0.5052, Train Acc: 0.8180
  Val Loss: 0.8640, Val Acc: 0.7427, Val F1: 0.7109

üìç Epoch 4/6
  Train Loss: 0.3291, Train Acc: 0.8813
  Val Loss: 0.9431, Val Acc: 0.7307, Val F1: 0.6955

üìç Epoch 5/6
  Train Loss: 0.2068, Train Acc: 0.9303
  Val Loss: 1.1271, Val Acc: 0.7340, Val F1: 0.7002

üìç Epoch 6/6


[I 2025-08-12 13:29:59,188] Trial 1 finished with value: 0.7120221078190857 and parameters: {'learning_rate': 5.262490902114904e-05, 'batch_size': 16, 'num_epochs': 6, 'warmup_ratio': 0.18324426408004219, 'weight_decay': 0.029110519961044856, 'hidden_dropout_prob': 0.1363649934414201, 'classifier_dropout': 0.13668090197068677}. Best is trial 0 with value: 0.7266554701446585.


  Train Loss: 0.1301, Train Acc: 0.9603
  Val Loss: 1.2428, Val Acc: 0.7360, Val F1: 0.7026

distilroBERTa training completed! Best F1: 0.7120
Trial 1: F1 = 0.7120
Starting distilroBERTa single-task training (emotion)...

üìç Epoch 1/4
  Train Loss: 1.1658, Train Acc: 0.5391
  Val Loss: 0.8321, Val Acc: 0.6840, Val F1: 0.6217
Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

üìç Epoch 2/4
  Train Loss: 0.6797, Train Acc: 0.7501
  Val Loss: 0.7412, Val Acc: 0.7413, Val F1: 0.7059
Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

üìç Epoch 3/4
  Train Loss: 0.4999, Train Acc: 0.8190
  Val Loss: 0.7855, Val Acc: 0.7453, Val F1: 0.7118
Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

üìç Epoch 4/4
  Train Loss: 0.3895, Train Acc: 0.8576
  Val Loss: 0.8194, Val Acc: 0.7533, Val F1: 0.7208


[I 2025-08-12 13:33:29,957] Trial 2 finished with value: 0.7208486248547187 and parameters: {'learning_rate': 3.2635193912846855e-05, 'batch_size': 16, 'num_epochs': 4, 'warmup_ratio': 0.16118528947223795, 'weight_decay': 0.022554447458683766, 'hidden_dropout_prob': 0.15842892970704364, 'classifier_dropout': 0.17327236865873835}. Best is trial 0 with value: 0.7266554701446585.


Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

distilroBERTa training completed! Best F1: 0.7208
Trial 2: F1 = 0.7208
Starting distilroBERTa single-task training (emotion)...

üìç Epoch 1/5
  Train Loss: 1.2044, Train Acc: 0.5194
  Val Loss: 0.8866, Val Acc: 0.7167, Val F1: 0.6628
Best distilroBERTa model saved to ./distilroberta_trial_3\model_best

üìç Epoch 2/5
  Train Loss: 0.7387, Train Acc: 0.7306
  Val Loss: 0.8381, Val Acc: 0.7187, Val F1: 0.6791
Best distilroBERTa model saved to ./distilroberta_trial_3\model_best

üìç Epoch 3/5
  Train Loss: 0.5862, Train Acc: 0.7891
  Val Loss: 0.8555, Val Acc: 0.7473, Val F1: 0.7168
Best distilroBERTa model saved to ./distilroberta_trial_3\model_best

üìç Epoch 4/5
  Train Loss: 0.4622, Train Acc: 0.8343
  Val Loss: 0.8654, Val Acc: 0.7460, Val F1: 0.7189
Best distilroBERTa model saved to ./distilroberta_trial_3\model_best

üìç Epoch 5/5


[I 2025-08-12 13:38:31,861] Trial 3 finished with value: 0.7188542488682588 and parameters: {'learning_rate': 4.166863122305896e-05, 'batch_size': 16, 'num_epochs': 5, 'warmup_ratio': 0.15924145688620425, 'weight_decay': 0.014180537144799797, 'hidden_dropout_prob': 0.22150897038028766, 'classifier_dropout': 0.1341048247374583}. Best is trial 0 with value: 0.7266554701446585.


  Train Loss: 0.3814, Train Acc: 0.8656
  Val Loss: 0.8847, Val Acc: 0.7467, Val F1: 0.7156

distilroBERTa training completed! Best F1: 0.7189
Trial 3: F1 = 0.7189
Starting distilroBERTa single-task training (emotion)...

üìç Epoch 1/6
  Train Loss: 1.4828, Train Acc: 0.3951
  Val Loss: 0.8550, Val Acc: 0.6907, Val F1: 0.6360
Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

üìç Epoch 2/6
  Train Loss: 0.7959, Train Acc: 0.6993
  Val Loss: 0.7516, Val Acc: 0.7413, Val F1: 0.7069
Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

üìç Epoch 3/6
  Train Loss: 0.6662, Train Acc: 0.7566
  Val Loss: 0.7764, Val Acc: 0.7453, Val F1: 0.7119
Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

üìç Epoch 4/6
  Train Loss: 0.6001, Train Acc: 0.7833
  Val Loss: 0.7813, Val Acc: 0.7427, Val F1: 0.7103

üìç Epoch 5/6
  Train Loss: 0.5556, Train Acc: 0.7897
  Val Loss: 0.7981, Val Acc: 0.7493, Val F1: 0.7136
Best distilroBERTa model saved t

[I 2025-08-12 13:49:16,537] Trial 4 finished with value: 0.7135875327822228 and parameters: {'learning_rate': 2.2207471217033647e-05, 'batch_size': 32, 'num_epochs': 6, 'warmup_ratio': 0.13046137691733709, 'weight_decay': 0.018790490260574548, 'hidden_dropout_prob': 0.23684660530243137, 'classifier_dropout': 0.18803049874792027}. Best is trial 0 with value: 0.7266554701446585.



üèÜ Optimization completed for emotion!
Best trial: 0
Best F1 score: 0.7267
Best parameters:
  learning_rate: 3.65445235521325e-05
  batch_size: 16
  num_epochs: 5
  warmup_ratio: 0.11560186404424366
  weight_decay: 0.02403950683025824
  hidden_dropout_prob: 0.1116167224336399
  classifier_dropout: 0.273235229154987


In [None]:
# In[24]:

# Cell 14: Hyperparameter Tuning - Multitask
print("\n" + "="*80)
print("üìç PHASE 2: HYPERPARAMETER TUNING - MULTITASK")
print("="*80)

print("\n8Ô∏è‚É£ Hyperparameter Tuning for distilroBERTa Multi-task Model...")
print("="*60)

# Create tuner for multitask
multitask_tuner = distilroBERTaHyperparameterTuner(
    model_type="multitask",
    data_splits=multitask_data,
    n_trials=5,
    model_name=model_name
)
multitask_study = multitask_tuner.tune()

[I 2025-08-12 13:49:16,581] A new study created in memory with name: no-name-b3a828c4-3b0b-4860-b96f-b21cb66fac49



üìç PHASE 2: HYPERPARAMETER TUNING - MULTITASK

8Ô∏è‚É£ Hyperparameter Tuning for distilroBERTa Multi-task Model...
distilroBERTa hyperparameter tuner initialized for multitask
Using Random Search for optimization

üîç Starting hyperparameter optimization for multitask...
üéØ Random Search: 5 trials
Starting distilroBERTa multi-task training...

Epoch 1/5
  Train Loss: 1.2240
  Train Sentiment Acc: 0.6910, Train Emotion Acc: 0.2693
  Val Loss: 1.1068
  Val Sentiment Acc: 0.8027, F1: 0.5604
  Val Emotion Acc: 0.3000, F1: 0.0821
Best distilroBERTa model saved to ./distilroberta_trial_0\model_best

Epoch 2/5
  Train Loss: 1.0856
  Train Sentiment Acc: 0.8267, Train Emotion Acc: 0.2809
  Val Loss: 1.1097
  Val Sentiment Acc: 0.8227, F1: 0.5746
  Val Emotion Acc: 0.3027, F1: 0.0775
Best distilroBERTa model saved to ./distilroberta_trial_0\model_best

Epoch 3/5
  Train Loss: 1.0191
  Train Sentiment Acc: 0.8717, Train Emotion Acc: 0.2913
  Val Loss: 1.0920
  Val Sentiment Acc: 0.8280, F1

[I 2025-08-12 13:53:56,715] Trial 0 finished with value: 0.35907448922484525 and parameters: {'learning_rate': 3.65445235521325e-05, 'batch_size': 16, 'num_epochs': 5, 'warmup_ratio': 0.11560186404424366, 'weight_decay': 0.02403950683025824, 'hidden_dropout_prob': 0.1116167224336399, 'classifier_dropout': 0.273235229154987, 'alpha': 0.5202230023486417}. Best is trial 0 with value: 0.35907448922484525.


Best distilroBERTa model saved to ./distilroberta_trial_0\model_best

distilroBERTa training completed! Best Combined F1: 0.3565
Trial 0: Combined F1 = 0.3591
Starting distilroBERTa multi-task training...

Epoch 1/6
  Train Loss: 1.2373
  Train Sentiment Acc: 0.6843, Train Emotion Acc: 0.2721
  Val Loss: 1.1256
  Val Sentiment Acc: 0.7920, F1: 0.5527
  Val Emotion Acc: 0.2620, F1: 0.1284
Best distilroBERTa model saved to ./distilroberta_trial_1\model_best

Epoch 2/6
  Train Loss: 1.1166
  Train Sentiment Acc: 0.8153, Train Emotion Acc: 0.2841
  Val Loss: 1.0925
  Val Sentiment Acc: 0.8233, F1: 0.5745
  Val Emotion Acc: 0.3013, F1: 0.0805

Epoch 3/6
  Train Loss: 1.0421
  Train Sentiment Acc: 0.8663, Train Emotion Acc: 0.2903
  Val Loss: 1.1254
  Val Sentiment Acc: 0.8160, F1: 0.5700
  Val Emotion Acc: 0.2807, F1: 0.1148
Best distilroBERTa model saved to ./distilroberta_trial_1\model_best

Epoch 4/6
  Train Loss: 0.9859
  Train Sentiment Acc: 0.8891, Train Emotion Acc: 0.3084
  Val Loss

[I 2025-08-12 14:17:42,417] Trial 1 finished with value: 0.3752436806566236 and parameters: {'learning_rate': 6.251028636335231e-05, 'batch_size': 32, 'num_epochs': 6, 'warmup_ratio': 0.12123391106782762, 'weight_decay': 0.02636424704863906, 'hidden_dropout_prob': 0.13668090197068677, 'classifier_dropout': 0.16084844859190756, 'alpha': 0.5049512863264476}. Best is trial 1 with value: 0.3752436806566236.


Best distilroBERTa model saved to ./distilroberta_trial_1\model_best

distilroBERTa training completed! Best Combined F1: 0.3752
Trial 1: Combined F1 = 0.3752
Starting distilroBERTa multi-task training...

Epoch 1/3
  Train Loss: 1.3170
  Train Sentiment Acc: 0.6470, Train Emotion Acc: 0.2750
  Val Loss: 1.2168
  Val Sentiment Acc: 0.7913, F1: 0.5508
  Val Emotion Acc: 0.3027, F1: 0.0774
Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

Epoch 2/3
  Train Loss: 1.2013
  Train Sentiment Acc: 0.8133, Train Emotion Acc: 0.2856
  Val Loss: 1.1789
  Val Sentiment Acc: 0.8060, F1: 0.5626
  Val Emotion Acc: 0.3027, F1: 0.0775
Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

Epoch 3/3
  Train Loss: 1.1588
  Train Sentiment Acc: 0.8410, Train Emotion Acc: 0.2884
  Val Loss: 1.1843
  Val Sentiment Acc: 0.8147, F1: 0.5684
  Val Emotion Acc: 0.3033, F1: 0.0786
Best distilroBERTa model saved to ./distilroberta_trial_2\model_best

distilroBERTa training comple

[I 2025-08-12 14:28:37,670] Trial 2 finished with value: 0.3235255076358019 and parameters: {'learning_rate': 4.008174375308313e-05, 'batch_size': 32, 'num_epochs': 3, 'warmup_ratio': 0.12921446485352184, 'weight_decay': 0.04297256589643226, 'hidden_dropout_prob': 0.19121399684340717, 'classifier_dropout': 0.2570351922786027, 'alpha': 0.43993475643167196}. Best is trial 1 with value: 0.3752436806566236.


Starting distilroBERTa multi-task training...

Epoch 1/5
  Train Loss: 1.2379
  Train Sentiment Acc: 0.6309, Train Emotion Acc: 0.2597
  Val Loss: 1.0503
  Val Sentiment Acc: 0.8027, F1: 0.5601
  Val Emotion Acc: 0.3027, F1: 0.0774
Best distilroBERTa model saved to ./distilroberta_trial_3\model_best

Epoch 2/5
  Train Loss: 1.0998
  Train Sentiment Acc: 0.7839, Train Emotion Acc: 0.2683
  Val Loss: 1.0741
  Val Sentiment Acc: 0.8160, F1: 0.5689
  Val Emotion Acc: 0.2867, F1: 0.1083
Best distilroBERTa model saved to ./distilroberta_trial_3\model_best

Epoch 3/5
  Train Loss: 1.0582
  Train Sentiment Acc: 0.8127, Train Emotion Acc: 0.2789
  Val Loss: 1.0753
  Val Sentiment Acc: 0.8140, F1: 0.5681
  Val Emotion Acc: 0.3027, F1: 0.0785

Epoch 4/5
  Train Loss: 1.0265
  Train Sentiment Acc: 0.8289, Train Emotion Acc: 0.2760
  Val Loss: 1.0614
  Val Sentiment Acc: 0.8260, F1: 0.5763
  Val Emotion Acc: 0.3013, F1: 0.0828

Epoch 5/5


[I 2025-08-12 14:35:46,494] Trial 3 finished with value: 0.3423072426689452 and parameters: {'learning_rate': 4.575772704489176e-05, 'batch_size': 16, 'num_epochs': 5, 'warmup_ratio': 0.11705241236872915, 'weight_decay': 0.015854643368675158, 'hidden_dropout_prob': 0.28977710745066665, 'classifier_dropout': 0.29312640661491185, 'alpha': 0.5616794696232922}. Best is trial 1 with value: 0.3752436806566236.


  Train Loss: 0.9957
  Train Sentiment Acc: 0.8503, Train Emotion Acc: 0.2901
  Val Loss: 1.0726
  Val Sentiment Acc: 0.8247, F1: 0.5754
  Val Emotion Acc: 0.3027, F1: 0.0785

distilroBERTa training completed! Best Combined F1: 0.3386
Trial 3: Combined F1 = 0.3423
Starting distilroBERTa multi-task training...

Epoch 1/4
  Train Loss: 1.2982
  Train Sentiment Acc: 0.6557, Train Emotion Acc: 0.2640
  Val Loss: 1.1826
  Val Sentiment Acc: 0.7967, F1: 0.5563
  Val Emotion Acc: 0.3047, F1: 0.0871
Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

Epoch 2/4
  Train Loss: 1.1656
  Train Sentiment Acc: 0.8226, Train Emotion Acc: 0.2933
  Val Loss: 1.1664
  Val Sentiment Acc: 0.8053, F1: 0.5603
  Val Emotion Acc: 0.3000, F1: 0.0861
Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

Epoch 3/4
  Train Loss: 1.1072
  Train Sentiment Acc: 0.8663, Train Emotion Acc: 0.2997
  Val Loss: 1.1729
  Val Sentiment Acc: 0.8313, F1: 0.5794
  Val Emotion Acc: 0.2987, F1: 

[I 2025-08-12 14:51:57,498] Trial 4 finished with value: 0.34570425707326 and parameters: {'learning_rate': 3.26547139093759e-05, 'batch_size': 32, 'num_epochs': 4, 'warmup_ratio': 0.11220382348447788, 'weight_decay': 0.054565921910014324, 'hidden_dropout_prob': 0.10687770422304368, 'classifier_dropout': 0.2818640804157564, 'alpha': 0.4517559963200034}. Best is trial 1 with value: 0.3752436806566236.


Best distilroBERTa model saved to ./distilroberta_trial_4\model_best

distilroBERTa training completed! Best Combined F1: 0.3457
Trial 4: Combined F1 = 0.3457

üèÜ Optimization completed for multitask!
Best trial: 1
Best F1 score: 0.3752
Best parameters:
  learning_rate: 6.251028636335231e-05
  batch_size: 32
  num_epochs: 6
  warmup_ratio: 0.12123391106782762
  weight_decay: 0.02636424704863906
  hidden_dropout_prob: 0.13668090197068677
  classifier_dropout: 0.16084844859190756
  alpha: 0.5049512863264476


In [None]:
# In[ ]:

print("\n" + "="*80)
print("üìç PHASE 3: FINAL TRAINING - OPTIMIZED SENTIMENT MODEL")
print("="*80)

print("\n9Ô∏è‚É£ Training Final distilroBERTa Sentiment Model with Best Parameters...")
print("="*60)

best_sentiment_params = sentiment_study.best_params
print(f"üéØ Using best hyperparameters:")
for key, value in best_sentiment_params.items():
    print(f"  {key}: {value}")

if 'all_results' not in globals():
    all_results = {}

final_sentiment_config = TrainingConfig(
    model_name=model_name,
    learning_rate=5.262490902114904e-05,  # From best parameters
    batch_size=16,
    num_epochs=5,  # Increased epochs for final training
    warmup_ratio=0.18324426408004219,
    weight_decay=0.029110519961044856,
    hidden_dropout_prob=0.1363649934414201,
    classifier_dropout=0.13668090197068677,
    max_length=128,
    task_type="sentiment",
    output_dir="./final_distilroberta_sentiment_model"
)

# Train final sentiment model
final_sentiment_trainer = distilroBERTaSingleTaskTrainer(
    config=final_sentiment_config,
    num_classes=roberta_model_config.sentiment_num_classes
)
final_sentiment_history = final_sentiment_trainer.train(sentiment_data)

# Evaluate final sentiment model on both general and Reddit datasets
final_sentiment_results = evaluate_distilroberta_model(
    model_path="./final_distilroberta_sentiment_model/model_best",
    model_type="sentiment",
    test_data=sentiment_data['test'],
    model_name=model_name,
    reddit_data=reddit_data['sentiment'] if reddit_data else None
)

# Now store the results
all_results['final_sentiment'] = final_sentiment_results

print(f"\nFinal Sentiment Model Results:")
print(f"  General Dataset:")
print(f"    Accuracy: {final_sentiment_results['general']['accuracy']:.4f}")
print(f"    F1 Macro: {final_sentiment_results['general']['f1_macro']:.4f}")
if final_sentiment_results['reddit']:
    print(f"  Reddit Dataset:")
    print(f"    Accuracy: {final_sentiment_results['reddit']['accuracy']:.4f}")
    print(f"    F1 Macro: {final_sentiment_results['reddit']['f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()


üìç PHASE 3: FINAL TRAINING - OPTIMIZED SENTIMENT MODEL

9Ô∏è‚É£ Training Final distilroBERTa Sentiment Model with Best Parameters...
üéØ Using best hyperparameters:
  learning_rate: 3.65445235521325e-05
  batch_size: 16
  num_epochs: 5
  warmup_ratio: 0.11560186404424366
  weight_decay: 0.02403950683025824
  hidden_dropout_prob: 0.1116167224336399
  classifier_dropout: 0.273235229154987
Starting distilroBERTa single-task training (sentiment)...

üìç Epoch 1/5
  Train Loss: 0.7376, Train Acc: 0.6953
  Val Loss: 0.5447, Val Acc: 0.8087, Val F1: 0.5640
Best distilroBERTa model saved to ./final_distilroberta_sentiment_model\model_best

üìç Epoch 2/5
  Train Loss: 0.5464, Train Acc: 0.8120
  Val Loss: 0.4997, Val Acc: 0.8227, Val F1: 0.5740
Best distilroBERTa model saved to ./final_distilroberta_sentiment_model\model_best

üìç Epoch 3/5
  Train Loss: 0.4175, Train Acc: 0.8629
  Val Loss: 0.5749, Val Acc: 0.8133, Val F1: 0.5675

üìç Epoch 4/5
  Train Loss: 0.3399, Train Acc: 0.8896
 

In [None]:
# In[ ]:

print("\n" + "="*80)
print("üìç PHASE 3: FINAL TRAINING - OPTIMIZED EMOTION MODEL")
print("="*80)

print("\nüîü Training Final distilroBERTa Emotion Model with Best Parameters...")
print("="*60)

# Initialize results dictionary if not exists
if 'all_results' not in globals():
    all_results = {}

# Get best parameters from emotion tuning
best_emotion_params = emotion_study.best_params
print(f"üéØ Using best hyperparameters:")
for key, value in best_emotion_params.items():
    print(f"  {key}: {value}")

# Create optimized config for final training
final_emotion_config = TrainingConfig(
    model_name=model_name,
    learning_rate=best_emotion_params['learning_rate'],
    batch_size=best_emotion_params['batch_size'],
    num_epochs=5,  # Increase epochs for final training
    warmup_ratio=best_emotion_params['warmup_ratio'],
    weight_decay=best_emotion_params['weight_decay'],
    hidden_dropout_prob=best_emotion_params['hidden_dropout_prob'],
    classifier_dropout=best_emotion_params['classifier_dropout'],
    max_length=best_emotion_params.get('max_length', 128),
    task_type="emotion",
    output_dir="./final_distilroberta_emotion_model"
)

print(f"\nüöÄ Training final emotion model:")
print(f"  Dataset: Full emotion data ({len(emotion_data['train']['texts'])} train samples)")
print(f"  Epochs: {final_emotion_config.num_epochs}")
print(f"  Batch size: {final_emotion_config.batch_size}")
print(f"  Learning rate: {final_emotion_config.learning_rate:.2e}")

# Train final emotion model
final_emotion_trainer = distilroBERTaSingleTaskTrainer(
    config=final_emotion_config,
    num_classes=roberta_model_config.emotion_num_classes
)
final_emotion_history = final_emotion_trainer.train(emotion_data)

# Evaluate final emotion model on both general and Reddit datasets
final_emotion_results = evaluate_distilroberta_model(
    model_path="./final_distilroberta_emotion_model/model_best",
    model_type="emotion",
    test_data=emotion_data['test'],
    model_name=model_name,
    reddit_data=reddit_data['emotion'] if reddit_data else None
)
all_results['final_emotion'] = final_emotion_results

print(f"\n‚úÖ Final Emotion Model Results:")
print(f"  General Dataset:")
print(f"    Accuracy: {final_emotion_results['general']['accuracy']:.4f}")
print(f"    F1 Macro: {final_emotion_results['general']['f1_macro']:.4f}")
if final_emotion_results['reddit']:
    print(f"  Reddit Dataset:")
    print(f"    Accuracy: {final_emotion_results['reddit']['accuracy']:.4f}")
    print(f"    F1 Macro: {final_emotion_results['reddit']['f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()


üìç PHASE 3: FINAL TRAINING - OPTIMIZED EMOTION MODEL

üîü Training Final distilroBERTa Emotion Model with Best Parameters...
üéØ Using best hyperparameters:
  learning_rate: 3.65445235521325e-05
  batch_size: 16
  num_epochs: 5
  warmup_ratio: 0.11560186404424366
  weight_decay: 0.02403950683025824
  hidden_dropout_prob: 0.1116167224336399
  classifier_dropout: 0.273235229154987

üöÄ Training final emotion model:
  Dataset: Full emotion data (7000 train samples)
  Epochs: 5
  Batch size: 16
  Learning rate: 3.65e-05
Starting distilroBERTa single-task training (emotion)...

üìç Epoch 1/5
  Train Loss: 1.1020, Train Acc: 0.5627
  Val Loss: 0.7840, Val Acc: 0.7140, Val F1: 0.6534
Best distilroBERTa model saved to ./final_distilroberta_emotion_model\model_best

üìç Epoch 2/5
  Train Loss: 0.6616, Train Acc: 0.7573
  Val Loss: 0.7641, Val Acc: 0.7427, Val F1: 0.6980
Best distilroBERTa model saved to ./final_distilroberta_emotion_model\model_best

üìç Epoch 3/5
  Train Loss: 0.4659,

In [None]:
# In[ ]:

print("\n" + "="*80)
print("üìç PHASE 3: FINAL TRAINING - OPTIMIZED MULTITASK MODEL")
print("="*80)

print("\n1Ô∏è‚É£1Ô∏è‚É£ Training Final distilroBERTa Multi-task Model with Best Parameters...")
print("="*60)

# Initialize results dictionary if not exists
if 'all_results' not in globals():
    all_results = {}

# Get best parameters from multitask tuning
best_multitask_params = multitask_study.best_params
print(f"üéØ Using best hyperparameters:")
for key, value in best_multitask_params.items():
    print(f"  {key}: {value}")

# Create optimized config for final training
final_multitask_config = TrainingConfig(
    model_name=model_name,
    learning_rate=best_multitask_params['learning_rate'],
    batch_size=best_multitask_params['batch_size'],
    num_epochs=5,  # Increase epochs for final training
    warmup_ratio=best_multitask_params['warmup_ratio'],
    weight_decay=best_multitask_params['weight_decay'],
    hidden_dropout_prob=best_multitask_params['hidden_dropout_prob'],
    classifier_dropout=best_multitask_params['classifier_dropout'],
    max_length=best_multitask_params.get('max_length', 128),
    alpha=best_multitask_params['alpha'],  # Multitask-specific parameter
    task_type="multitask",
    output_dir="./final_distilroberta_multitask_model"
)

print(f"\nüöÄ Training final multitask model:")
print(f"  Dataset: Full multitask data ({len(multitask_data['train']['texts'])} train samples)")
print(f"  Epochs: {final_multitask_config.num_epochs}")
print(f"  Batch size: {final_multitask_config.batch_size}")
print(f"  Learning rate: {final_multitask_config.learning_rate:.2e}")
print(f"  Alpha (loss weighting): {final_multitask_config.alpha:.3f}")

# Train final multitask model
final_multitask_trainer = distilroBERTaMultiTaskTrainer(config=final_multitask_config)
final_multitask_history = final_multitask_trainer.train(multitask_data)

# Evaluate final multitask model on both general and Reddit datasets
final_multitask_results = evaluate_distilroberta_model(
    model_path="./final_distilroberta_multitask_model/model_best",
    model_type="multitask",
    test_data=multitask_data['test'],
    model_name=model_name,
    reddit_data=reddit_data['multitask'] if reddit_data else None
)
all_results['final_multitask'] = final_multitask_results

print(f"\n‚úÖ Final Multitask Model Results:")
print(f"  General Dataset:")
print(f"    Sentiment - Accuracy: {final_multitask_results['general']['sentiment_accuracy']:.4f}, F1: {final_multitask_results['general']['sentiment_f1_macro']:.4f}")
print(f"    Emotion - Accuracy: {final_multitask_results['general']['emotion_accuracy']:.4f}, F1: {final_multitask_results['general']['emotion_f1_macro']:.4f}")
print(f"    Combined - Accuracy: {final_multitask_results['general']['combined_accuracy']:.4f}, F1: {final_multitask_results['general']['combined_f1_macro']:.4f}")
if final_multitask_results['reddit']:
    print(f"  Reddit Dataset:")
    print(f"    Sentiment - Accuracy: {final_multitask_results['reddit']['sentiment_accuracy']:.4f}, F1: {final_multitask_results['reddit']['sentiment_f1_macro']:.4f}")
    print(f"    Emotion - Accuracy: {final_multitask_results['reddit']['emotion_accuracy']:.4f}, F1: {final_multitask_results['reddit']['emotion_f1_macro']:.4f}")
    print(f"    Combined - Accuracy: {final_multitask_results['reddit']['combined_accuracy']:.4f}, F1: {final_multitask_results['reddit']['combined_f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()

print("\n" + "="*80)
print("üèÅ FINAL COMPREHENSIVE RESULTS SUMMARY")
print("="*80)

print(f"\nüìä DISTILROBERTA MODEL PERFORMANCE COMPARISON:")
print(f"  {'='*60}")

# Sentiment Model Comparison
print(f"\nüéØ SENTIMENT MODEL:")
print(f"  Initial Model:")
print(f"    General Dataset - Accuracy: {all_results['initial_sentiment']['general']['accuracy']:.4f}, F1: {all_results['initial_sentiment']['general']['f1_macro']:.4f}")
if all_results['initial_sentiment'].get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {all_results['initial_sentiment']['reddit']['accuracy']:.4f}, F1: {all_results['initial_sentiment']['reddit']['f1_macro']:.4f}")

print(f"  Final Optimized Model:")
print(f"    General Dataset - Accuracy: {all_results['final_sentiment']['general']['accuracy']:.4f}, F1: {all_results['final_sentiment']['general']['f1_macro']:.4f}")
if all_results['final_sentiment'].get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {all_results['final_sentiment']['reddit']['accuracy']:.4f}, F1: {all_results['final_sentiment']['reddit']['f1_macro']:.4f}")

# Calculate improvements
sentiment_general_improvement = all_results['final_sentiment']['general']['accuracy'] - all_results['initial_sentiment']['general']['accuracy']
sentiment_f1_improvement = all_results['final_sentiment']['general']['f1_macro'] - all_results['initial_sentiment']['general']['f1_macro']
print(f"  Improvements:")
print(f"    General Accuracy: {sentiment_general_improvement:+.4f}")
print(f"    General F1: {sentiment_f1_improvement:+.4f}")

# Emotion Model Comparison
print(f"\nüé≠ EMOTION MODEL:")
print(f"  Initial Model:")
print(f"    General Dataset - Accuracy: {all_results['initial_emotion']['general']['accuracy']:.4f}, F1: {all_results['initial_emotion']['general']['f1_macro']:.4f}")
if all_results['initial_emotion'].get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {all_results['initial_emotion']['reddit']['accuracy']:.4f}, F1: {all_results['initial_emotion']['reddit']['f1_macro']:.4f}")

print(f"  Final Optimized Model:")
print(f"    General Dataset - Accuracy: {all_results['final_emotion']['general']['accuracy']:.4f}, F1: {all_results['final_emotion']['general']['f1_macro']:.4f}")
if all_results['final_emotion'].get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {all_results['final_emotion']['reddit']['accuracy']:.4f}, F1: {all_results['final_emotion']['reddit']['f1_macro']:.4f}")

# Calculate improvements
emotion_general_improvement = all_results['final_emotion']['general']['accuracy'] - all_results['initial_emotion']['general']['accuracy']
emotion_f1_improvement = all_results['final_emotion']['general']['f1_macro'] - all_results['initial_emotion']['general']['f1_macro']
print(f"  Improvements:")
print(f"    General Accuracy: {emotion_general_improvement:+.4f}")
print(f"    General F1: {emotion_f1_improvement:+.4f}")

# Multitask Model Comparison
print(f"\nüîÑ MULTITASK MODEL:")
print(f"  Initial Model:")
print(f"    General Dataset:")
print(f"      Sentiment - Accuracy: {all_results['initial_multitask']['general']['sentiment_accuracy']:.4f}, F1: {all_results['initial_multitask']['general']['sentiment_f1_macro']:.4f}")
print(f"      Emotion - Accuracy: {all_results['initial_multitask']['general']['emotion_accuracy']:.4f}, F1: {all_results['initial_multitask']['general']['emotion_f1_macro']:.4f}")
print(f"      Combined - Accuracy: {all_results['initial_multitask']['general']['combined_accuracy']:.4f}, F1: {all_results['initial_multitask']['general']['combined_f1_macro']:.4f}")
if all_results['initial_multitask'].get('reddit'):
    print(f"    Reddit Dataset:")
    print(f"      Sentiment - Accuracy: {all_results['initial_multitask']['reddit']['sentiment_accuracy']:.4f}, F1: {all_results['initial_multitask']['reddit']['sentiment_f1_macro']:.4f}")
    print(f"      Emotion - Accuracy: {all_results['initial_multitask']['reddit']['emotion_accuracy']:.4f}, F1: {all_results['initial_multitask']['reddit']['emotion_f1_macro']:.4f}")
    print(f"      Combined - Accuracy: {all_results['initial_multitask']['reddit']['combined_accuracy']:.4f}, F1: {all_results['initial_multitask']['reddit']['combined_f1_macro']:.4f}")

print(f"  Final Optimized Model:")
print(f"    General Dataset:")
print(f"      Sentiment - Accuracy: {all_results['final_multitask']['general']['sentiment_accuracy']:.4f}, F1: {all_results['final_multitask']['general']['sentiment_f1_macro']:.4f}")
print(f"      Emotion - Accuracy: {all_results['final_multitask']['general']['emotion_accuracy']:.4f}, F1: {all_results['final_multitask']['general']['emotion_f1_macro']:.4f}")
print(f"      Combined - Accuracy: {all_results['final_multitask']['general']['combined_accuracy']:.4f}, F1: {all_results['final_multitask']['general']['combined_f1_macro']:.4f}")
if all_results['final_multitask'].get('reddit'):
    print(f"    Reddit Dataset:")
    print(f"      Sentiment - Accuracy: {all_results['final_multitask']['reddit']['sentiment_accuracy']:.4f}, F1: {all_results['final_multitask']['reddit']['sentiment_f1_macro']:.4f}")
    print(f"      Emotion - Accuracy: {all_results['final_multitask']['reddit']['emotion_accuracy']:.4f}, F1: {all_results['final_multitask']['reddit']['emotion_f1_macro']:.4f}")
    print(f"      Combined - Accuracy: {all_results['final_multitask']['reddit']['combined_accuracy']:.4f}, F1: {all_results['final_multitask']['reddit']['combined_f1_macro']:.4f}")

# Calculate improvements
multitask_sentiment_improvement = all_results['final_multitask']['general']['sentiment_accuracy'] - all_results['initial_multitask']['general']['sentiment_accuracy']
multitask_emotion_improvement = all_results['final_multitask']['general']['emotion_accuracy'] - all_results['initial_multitask']['general']['emotion_accuracy']
multitask_combined_improvement = all_results['final_multitask']['general']['combined_accuracy'] - all_results['initial_multitask']['general']['combined_accuracy']
print(f"  Improvements:")
print(f"    Sentiment Accuracy: {multitask_sentiment_improvement:+.4f}")
print(f"    Emotion Accuracy: {multitask_emotion_improvement:+.4f}")
print(f"    Combined Accuracy: {multitask_combined_improvement:+.4f}")

print(f"\nüéâ DISTILROBERTA TRAINING PIPELINE COMPLETED SUCCESSFULLY!")
print(f"   All models trained and evaluated on both general and Reddit datasets")
print(f"   Hyperparameter optimization completed using macro F1 on general datasets")
print(f"   Final models saved and ready for deployment")


üìç PHASE 3: FINAL TRAINING - OPTIMIZED MULTITASK MODEL

1Ô∏è‚É£1Ô∏è‚É£ Training Final distilroBERTa Multi-task Model with Best Parameters...
üéØ Using best hyperparameters:
  learning_rate: 6.251028636335231e-05
  batch_size: 32
  num_epochs: 6
  warmup_ratio: 0.12123391106782762
  weight_decay: 0.02636424704863906
  hidden_dropout_prob: 0.13668090197068677
  classifier_dropout: 0.16084844859190756
  alpha: 0.5049512863264476

üöÄ Training final multitask model:
  Dataset: Full multitask data (7000 train samples)
  Epochs: 5
  Batch size: 32
  Learning rate: 6.25e-05
  Alpha (loss weighting): 0.505
Starting distilroBERTa multi-task training...

Epoch 1/5
  Train Loss: 1.2325
  Train Sentiment Acc: 0.6946, Train Emotion Acc: 0.2700
  Val Loss: 1.1181
  Val Sentiment Acc: 0.7960, F1: 0.5541
  Val Emotion Acc: 0.2860, F1: 0.0977
Best distilroBERTa model saved to ./final_distilroberta_multitask_model\model_best

Epoch 2/5
  Train Loss: 1.1044
  Train Sentiment Acc: 0.8239, Train Emoti

In [None]:
# In[ ]:

print("\n" + "="*80)
print("üìç PHASE 3: FINAL TRAINING - OPTIMIZED MULTITASK MODEL")
print("="*80)

print("\n1Ô∏è‚É£1Ô∏è‚É£ Training Final distilroBERTa Multi-task Model with Best Parameters...")
print("="*60)

# Initialize results dictionary if not exists
if 'all_results' not in globals():
    all_results = {}

# Get best parameters from multitask tuning
best_multitask_params = multitask_study.best_params
print(f"üéØ Using best hyperparameters:")
for key, value in best_multitask_params.items():
    print(f"  {key}: {value}")

# Create optimized config for final training
final_multitask_config = TrainingConfig(
    model_name=model_name,
    learning_rate=best_multitask_params['learning_rate'],
    batch_size=best_multitask_params['batch_size'],
    num_epochs=5,  # Increase epochs for final training
    warmup_ratio=best_multitask_params['warmup_ratio'],
    weight_decay=best_multitask_params['weight_decay'],
    hidden_dropout_prob=best_multitask_params['hidden_dropout_prob'],
    classifier_dropout=best_multitask_params['classifier_dropout'],
    max_length=best_multitask_params.get('max_length', 128),
    alpha=best_multitask_params['alpha'],  # Multitask-specific parameter
    task_type="multitask",
    output_dir="./final_distilroberta_multitask_model"
)

print(f"\nüöÄ Training final multitask model:")
print(f"  Dataset: Full multitask data ({len(multitask_data['train']['texts'])} train samples)")
print(f"  Epochs: {final_multitask_config.num_epochs}")
print(f"  Batch size: {final_multitask_config.batch_size}")
print(f"  Learning rate: {final_multitask_config.learning_rate:.2e}")
print(f"  Alpha (loss weighting): {final_multitask_config.alpha:.3f}")

# Train final multitask model
final_multitask_trainer = distilroBERTaMultiTaskTrainer(config=final_multitask_config)
final_multitask_history = final_multitask_trainer.train(multitask_data)

# Evaluate final multitask model on both general and Reddit datasets
final_multitask_results = evaluate_distilroberta_model(
    model_path="./final_distilroberta_multitask_model/model_best",
    model_type="multitask",
    test_data=multitask_data['test'],
    model_name=model_name,
    reddit_data=reddit_data['multitask'] if reddit_data else None
)
all_results['final_multitask'] = final_multitask_results

print(f"\n‚úÖ Final Multitask Model Results:")
print(f"  General Dataset:")
print(f"    Sentiment - Accuracy: {final_multitask_results['general']['sentiment_accuracy']:.4f}, F1: {final_multitask_results['general']['sentiment_f1_macro']:.4f}")
print(f"    Emotion - Accuracy: {final_multitask_results['general']['emotion_accuracy']:.4f}, F1: {final_multitask_results['general']['emotion_f1_macro']:.4f}")
print(f"    Combined - Accuracy: {final_multitask_results['general']['combined_accuracy']:.4f}, F1: {final_multitask_results['general']['combined_f1_macro']:.4f}")
if final_multitask_results['reddit']:
    print(f"  Reddit Dataset:")
    print(f"    Sentiment - Accuracy: {final_multitask_results['reddit']['sentiment_accuracy']:.4f}, F1: {final_multitask_results['reddit']['sentiment_f1_macro']:.4f}")
    print(f"    Emotion - Accuracy: {final_multitask_results['reddit']['emotion_accuracy']:.4f}, F1: {final_multitask_results['reddit']['emotion_f1_macro']:.4f}")
    print(f"    Combined - Accuracy: {final_multitask_results['reddit']['combined_accuracy']:.4f}, F1: {final_multitask_results['reddit']['combined_f1_macro']:.4f}")

# Clean up memory
aggressive_memory_cleanup()

print("\n" + "="*80)
print("üèÅ FINAL COMPREHENSIVE RESULTS SUMMARY")
print("="*80)

print(f"\nüìä DISTILROBERTA MODEL PERFORMANCE COMPARISON:")
print(f"  {'='*60}")

# Sentiment Model Comparison
print(f"\nüéØ SENTIMENT MODEL:")
print(f"  Initial Model:")
print(f"    General Dataset - Accuracy: {all_results['initial_sentiment']['general']['accuracy']:.4f}, F1: {all_results['initial_sentiment']['general']['f1_macro']:.4f}")
if all_results['initial_sentiment'].get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {all_results['initial_sentiment']['reddit']['accuracy']:.4f}, F1: {all_results['initial_sentiment']['reddit']['f1_macro']:.4f}")

print(f"  Final Optimized Model:")
print(f"    General Dataset - Accuracy: {all_results['final_sentiment']['general']['accuracy']:.4f}, F1: {all_results['final_sentiment']['general']['f1_macro']:.4f}")
if all_results['final_sentiment'].get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {all_results['final_sentiment']['reddit']['accuracy']:.4f}, F1: {all_results['final_sentiment']['reddit']['f1_macro']:.4f}")

# Calculate improvements
sentiment_general_improvement = all_results['final_sentiment']['general']['accuracy'] - all_results['initial_sentiment']['general']['accuracy']
sentiment_f1_improvement = all_results['final_sentiment']['general']['f1_macro'] - all_results['initial_sentiment']['general']['f1_macro']
print(f"  Improvements:")
print(f"    General Accuracy: {sentiment_general_improvement:+.4f}")
print(f"    General F1: {sentiment_f1_improvement:+.4f}")

# Emotion Model Comparison
print(f"\nüé≠ EMOTION MODEL:")
print(f"  Initial Model:")
print(f"    General Dataset - Accuracy: {all_results['initial_emotion']['general']['accuracy']:.4f}, F1: {all_results['initial_emotion']['general']['f1_macro']:.4f}")
if all_results['initial_emotion'].get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {all_results['initial_emotion']['reddit']['accuracy']:.4f}, F1: {all_results['initial_emotion']['reddit']['f1_macro']:.4f}")

print(f"  Final Optimized Model:")
print(f"    General Dataset - Accuracy: {all_results['final_emotion']['general']['accuracy']:.4f}, F1: {all_results['final_emotion']['general']['f1_macro']:.4f}")
if all_results['final_emotion'].get('reddit'):
    print(f"    Reddit Dataset - Accuracy: {all_results['final_emotion']['reddit']['accuracy']:.4f}, F1: {all_results['final_emotion']['reddit']['f1_macro']:.4f}")

# Calculate improvements
emotion_general_improvement = all_results['final_emotion']['general']['accuracy'] - all_results['initial_emotion']['general']['accuracy']
emotion_f1_improvement = all_results['final_emotion']['general']['f1_macro'] - all_results['initial_emotion']['general']['f1_macro']
print(f"  Improvements:")
print(f"    General Accuracy: {emotion_general_improvement:+.4f}")
print(f"    General F1: {emotion_f1_improvement:+.4f}")

# Multitask Model Comparison
print(f"\nüîÑ MULTITASK MODEL:")
print(f"  Initial Model:")
print(f"    General Dataset:")
print(f"      Sentiment - Accuracy: {all_results['initial_multitask']['general']['sentiment_accuracy']:.4f}, F1: {all_results['initial_multitask']['general']['sentiment_f1_macro']:.4f}")
print(f"      Emotion - Accuracy: {all_results['initial_multitask']['general']['emotion_accuracy']:.4f}, F1: {all_results['initial_multitask']['general']['emotion_f1_macro']:.4f}")
print(f"      Combined - Accuracy: {all_results['initial_multitask']['general']['combined_accuracy']:.4f}, F1: {all_results['initial_multitask']['general']['combined_f1_macro']:.4f}")
if all_results['initial_multitask'].get('reddit'):
    print(f"    Reddit Dataset:")
    print(f"      Sentiment - Accuracy: {all_results['initial_multitask']['reddit']['sentiment_accuracy']:.4f}, F1: {all_results['initial_multitask']['reddit']['sentiment_f1_macro']:.4f}")
    print(f"      Emotion - Accuracy: {all_results['initial_multitask']['reddit']['emotion_accuracy']:.4f}, F1: {all_results['initial_multitask']['reddit']['emotion_f1_macro']:.4f}")
    print(f"      Combined - Accuracy: {all_results['initial_multitask']['reddit']['combined_accuracy']:.4f}, F1: {all_results['initial_multitask']['reddit']['combined_f1_macro']:.4f}")

print(f"  Final Optimized Model:")
print(f"    General Dataset:")
print(f"      Sentiment - Accuracy: {all_results['final_multitask']['general']['sentiment_accuracy']:.4f}, F1: {all_results['final_multitask']['general']['sentiment_f1_macro']:.4f}")
print(f"      Emotion - Accuracy: {all_results['final_multitask']['general']['emotion_accuracy']:.4f}, F1: {all_results['final_multitask']['general']['emotion_f1_macro']:.4f}")
print(f"      Combined - Accuracy: {all_results['final_multitask']['general']['combined_accuracy']:.4f}, F1: {all_results['final_multitask']['general']['combined_f1_macro']:.4f}")
if all_results['final_multitask'].get('reddit'):
    print(f"    Reddit Dataset:")
    print(f"      Sentiment - Accuracy: {all_results['final_multitask']['reddit']['sentiment_accuracy']:.4f}, F1: {all_results['final_multitask']['reddit']['sentiment_f1_macro']:.4f}")
    print(f"      Emotion - Accuracy: {all_results['final_multitask']['reddit']['emotion_accuracy']:.4f}, F1: {all_results['final_multitask']['reddit']['emotion_f1_macro']:.4f}")
    print(f"      Combined - Accuracy: {all_results['final_multitask']['reddit']['combined_accuracy']:.4f}, F1: {all_results['final_multitask']['reddit']['combined_f1_macro']:.4f}")

# Calculate improvements
multitask_sentiment_improvement = all_results['final_multitask']['general']['sentiment_accuracy'] - all_results['initial_multitask']['general']['sentiment_accuracy']
multitask_emotion_improvement = all_results['final_multitask']['general']['emotion_accuracy'] - all_results['initial_multitask']['general']['emotion_accuracy']
multitask_combined_improvement = all_results['final_multitask']['general']['combined_accuracy'] - all_results['initial_multitask']['general']['combined_accuracy']
print(f"  Improvements:")
print(f"    Sentiment Accuracy: {multitask_sentiment_improvement:+.4f}")
print(f"    Emotion Accuracy: {multitask_emotion_improvement:+.4f}")
print(f"    Combined Accuracy: {multitask_combined_improvement:+.4f}")

print(f"\nüéâ DISTILROBERTA TRAINING PIPELINE COMPLETED SUCCESSFULLY!")
print(f"   All models trained and evaluated on both general and Reddit datasets")
print(f"   Hyperparameter optimization completed using macro F1 on general datasets")
print(f"   Final models saved and ready for deployment")


üìç PHASE 3: FINAL TRAINING - OPTIMIZED MULTITASK MODEL

1Ô∏è‚É£1Ô∏è‚É£ Training Final distilroBERTa Multi-task Model with Best Parameters...
üéØ Using best hyperparameters:
  learning_rate: 6.251028636335231e-05
  batch_size: 32
  num_epochs: 6
  warmup_ratio: 0.12123391106782762
  weight_decay: 0.02636424704863906
  hidden_dropout_prob: 0.13668090197068677
  classifier_dropout: 0.16084844859190756
  alpha: 0.5049512863264476

üöÄ Training final multitask model:
  Dataset: Full multitask data (7000 train samples)
  Epochs: 5
  Batch size: 32
  Learning rate: 6.25e-05
  Alpha (loss weighting): 0.505
Starting distilroBERTa multi-task training...

Epoch 1/5
  Train Loss: 1.2560
  Train Sentiment Acc: 0.6641, Train Emotion Acc: 0.2711
  Val Loss: 1.1479
  Val Sentiment Acc: 0.8013, F1: 0.5583
  Val Emotion Acc: 0.2733, F1: 0.0959
Best distilroBERTa model saved to ./final_distilroberta_multitask_model\model_best

Epoch 2/5
  Train Loss: 1.1146
  Train Sentiment Acc: 0.8189, Train Emoti