In [1]:
# ============================================================================
# IMPORTS
# ============================================================================

import os
import gc
import json
import warnings
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    classification_report,
    hamming_loss
)

from torch.optim import AdamW

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup,
    LongformerTokenizer,
    LongformerForSequenceClassification,
    BigBirdTokenizer,
    BigBirdForSequenceClassification,
    BartTokenizer,
    BartForSequenceClassification,
    T5Tokenizer,
    T5ForConditionalGeneration,
    AutoModel
)


from sentence_transformers import SentenceTransformer
from tqdm.notebook import tqdm
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass

warnings.filterwarnings('ignore')

2025-10-04 15:54:39.586697: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759593279.775259      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759593279.826158      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# ============================================================================
# EXPERIMENTAL SETUP AND HYPERPARAMETERS
# ============================================================================

@dataclass
class ExperimentConfig:
    """Configuration for experimental setup."""
    
    # Paths
    DATASET_PATH: str = "/kaggle/input/eacl-26-dataset-2/EACL_26-FinancialDiscussionsDataset.json"
    OUTPUT_DIR: str = "/kaggle/working"
    
    # Reproducibility
    RANDOM_SEED: int = 42
    
    # Data splits
    TRAIN_SIZE: float = 0.6
    VAL_SIZE: float = 0.2
    TEST_SIZE: float = 0.2
    
    # Training hyperparameters (following M-HELP)
    BATCH_SIZE: int = 16
    LEARNING_RATE: float = 5e-5
    NUM_EPOCHS: int = 10
    WEIGHT_DECAY: float = 1e-2
    WARMUP_STEPS: int = 500
    MAX_GRAD_NORM: float = 1.0
    
    # Early stopping
    EARLY_STOPPING_PATIENCE: int = 3
    
    # Optimizer parameters
    ADAM_BETA1: float = 0.9
    ADAM_BETA2: float = 0.999
    ADAM_EPSILON: float = 1e-8
    
    # Model-specific parameters
    DROPOUT: float = 0.2
    
    # Max sequence lengths per model type
    MAX_SEQ_LENGTH_STANDARD: int = 512
    MAX_SEQ_LENGTH_LONG: int = 1024
    
    # Label definitions
    INTENT_LABELS: List[str] = None
    IMPACT_LABELS: List[str] = None
    
    def __post_init__(self):
        """Initialize label lists after dataclass creation."""
        # PROJECT 2 LABELS
        self.INTENT_LABELS = [
            "Advocacy & Warning",
            "Community Building & Solidarity Seeking",
            "Emotional & Esteem Support-Seeking",
            "Informational Help-Seeking",
            "Instrumental Help-Seeking",
            "Sense-Making & Narrative Construction",
            "Venting / Catharsis"
        ]
        
        self.IMPACT_LABELS = [
            "Behavioral Impact (Maladaptive Coping)",
            "Cognitive Impact",
            "Economic & Material Impact",
            "Physical Health Impact",
            "Psychological & Emotional Impact",
            "Relational & Social Impact"
        ]

CONFIG = ExperimentConfig()

In [3]:
# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================

def set_seed(seed: int) -> None:
    """Set random seed for reproducibility."""
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def clear_memory() -> None:
    """Clear GPU and CPU memory."""
    gc.collect()
    torch.cuda.empty_cache()

def validate_dataset_structure(data: List[Dict]) -> None:
    """Validate dataset structure and raise errors for malformed data."""
    if not isinstance(data, list):
        raise ValueError(f"Dataset must be a list, got {type(data)}")
    
    if len(data) == 0:
        raise ValueError("Dataset is empty")
    
    required_keys = {'sample_id', 'text', 'labels'}
    for idx, sample in enumerate(data):
        missing_keys = required_keys - set(sample.keys())
        if missing_keys:
            raise ValueError(f"Sample {idx} missing keys: {missing_keys}")
        
        if not isinstance(sample['labels'], dict):
            raise ValueError(f"Sample {idx}: 'labels' must be dict, got {type(sample['labels'])}")
        
        if 'intent' not in sample['labels'] or 'impact' not in sample['labels']:
            raise ValueError(f"Sample {idx}: labels must have 'intent' and 'impact' keys")
        
        if not isinstance(sample['labels']['intent'], list):
            raise ValueError(f"Sample {idx}: intent must be list")
        
        if not isinstance(sample['labels']['impact'], list):
            raise ValueError(f"Sample {idx}: impact must be list")


In [4]:
# ============================================================================
# DATA LOADING AND PREPROCESSING
# ============================================================================

class DataProcessor:
    """Handle data loading, validation, and preprocessing."""
    
    def __init__(self, config: ExperimentConfig):
        self.config = config
        self.intent_mlb = MultiLabelBinarizer(classes=config.INTENT_LABELS)
        self.impact_mlb = MultiLabelBinarizer(classes=config.IMPACT_LABELS)
        
    def load_data(self) -> List[Dict]:
        """Load and validate dataset."""
        try:
            with open(self.config.DATASET_PATH, 'r', encoding='utf-8') as f:
                data = json.load(f)
            print(f"Successfully loaded {len(data)} samples from {self.config.DATASET_PATH}")
            
            validate_dataset_structure(data)
            print("Dataset structure validation passed")
            
            return data
            
        except FileNotFoundError:
            raise FileNotFoundError(f"Dataset not found at {self.config.DATASET_PATH}")
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid JSON format: {e}")
        except Exception as e:
            raise RuntimeError(f"Error loading dataset: {e}")
    
    def prepare_labels(self, data: List[Dict]) -> Tuple[np.ndarray, np.ndarray]:
        """Prepare multi-label encoded arrays."""
        intent_labels = [sample['labels']['intent'] for sample in data]
        impact_labels = [sample['labels']['impact'] for sample in data]
        
        intent_encoded = self.intent_mlb.fit_transform(intent_labels)
        impact_encoded = self.impact_mlb.fit_transform(impact_labels)
        
        print(f"Intent labels shape: {intent_encoded.shape}")
        print(f"Impact labels shape: {impact_encoded.shape}")
        
        return intent_encoded, impact_encoded
    
    def split_data(
        self, 
        data: List[Dict], 
        intent_labels: np.ndarray, 
        impact_labels: np.ndarray
    ) -> Dict[str, Tuple[List[Dict], np.ndarray, np.ndarray]]:
        """Split data into train/val/test sets."""
        
        # First split: train + temp
        train_data, temp_data, train_intent, temp_intent, train_impact, temp_impact = train_test_split(
            data,
            intent_labels,
            impact_labels,
            test_size=(self.config.VAL_SIZE + self.config.TEST_SIZE),
            random_state=self.config.RANDOM_SEED,
            shuffle=True
        )
        
        # Second split: val + test
        val_size_adjusted = self.config.VAL_SIZE / (self.config.VAL_SIZE + self.config.TEST_SIZE)
        val_data, test_data, val_intent, test_intent, val_impact, test_impact = train_test_split(
            temp_data,
            temp_intent,
            temp_impact,
            test_size=(1 - val_size_adjusted),
            random_state=self.config.RANDOM_SEED,
            shuffle=True
        )
        
        splits = {
            'train': (train_data, train_intent, train_impact),
            'val': (val_data, val_intent, val_impact),
            'test': (test_data, test_intent, test_impact)
        }
        
        print(f"Train: {len(train_data)} samples")
        print(f"Val: {len(val_data)} samples")
        print(f"Test: {len(test_data)} samples")
        
        return splits

In [5]:
# ============================================================================
# DATASET CLASS
# ============================================================================

class MultiLabelTextDataset(Dataset):
    """PyTorch Dataset for multi-label text classification."""
    
    def __init__(
        self, 
        texts: List[str], 
        labels: np.ndarray, 
        tokenizer, 
        max_length: int
    ):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        
    def __len__(self) -> int:
        return len(self.texts)
    
    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        text = self.texts[idx]
        label = self.labels[idx]
        
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.FloatTensor(label)
        }

In [6]:
# ============================================================================
# MODEL WRAPPER FOR SENTENCE TRANSFORMERS
# ============================================================================

class SentenceTransformerClassifier(nn.Module):
    """Wrapper for Sentence Transformer models with classification head."""
    
    def __init__(self, model_name: str, num_labels: int, dropout: float = 0.2):
        super().__init__()
        self.encoder = SentenceTransformer(model_name)
        embedding_dim = self.encoder.get_sentence_embedding_dimension()
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Linear(embedding_dim, num_labels)
        
    def forward(self, input_ids, attention_mask):
        # Sentence transformers expect text, not token IDs
        # This will be handled differently in the trainer
        pass

In [7]:
# ============================================================================
# EARLY STOPPING
# ============================================================================

class EarlyStopping:
    """Early stopping to prevent overfitting."""
    
    def __init__(self, patience: int = 3, min_delta: float = 0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        
    def __call__(self, val_loss: float) -> bool:
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0
            
        return self.early_stop

In [8]:
# ============================================================================
# METRICS CALCULATOR
# ============================================================================

class MetricsCalculator:
    """Calculate and store evaluation metrics."""
    
    @staticmethod
    def calculate_metrics(
        y_true: np.ndarray, 
        y_pred: np.ndarray, 
        label_names: List[str]
    ) -> Dict[str, float]:
        """Calculate all metrics for multi-label classification."""
        
        metrics = {
            'subset_accuracy': accuracy_score(y_true, y_pred),
            'macro_f1': f1_score(y_true, y_pred, average='macro', zero_division=0),
            'weighted_f1': f1_score(y_true, y_pred, average='weighted', zero_division=0),
            'micro_f1': f1_score(y_true, y_pred, average='micro', zero_division=0),
            'hamming_loss': hamming_loss(y_true, y_pred)
        }
        
        # Per-label metrics
        per_label_f1 = f1_score(y_true, y_pred, average=None, zero_division=0)
        per_label_precision = precision_score(y_true, y_pred, average=None, zero_division=0)
        per_label_recall = recall_score(y_true, y_pred, average=None, zero_division=0)
        
        per_label_metrics = {}
        for i, label in enumerate(label_names):
            per_label_metrics[f'{label}_f1'] = per_label_f1[i]
            per_label_metrics[f'{label}_precision'] = per_label_precision[i]
            per_label_metrics[f'{label}_recall'] = per_label_recall[i]
        
        metrics.update(per_label_metrics)
        
        return metrics

In [9]:
# ============================================================================
# MODEL TRAINER
# ============================================================================

class ModelTrainer:
    """Handle model training and evaluation."""
    
    def __init__(self, config: ExperimentConfig):
        self.config = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.use_data_parallel = torch.cuda.device_count() > 1  # ADD THIS LINE
        print(f"Using device: {self.device}")
        if self.use_data_parallel:  # ADD THESE 3 LINES
            print(f"DataParallel enabled with {torch.cuda.device_count()} GPUs")
        else:
            print("Single GPU mode")
        
    def get_model_and_tokenizer(
        self, 
        model_name: str, 
        num_labels: int
    ) -> Tuple[nn.Module, any, int]:
        """Load model and tokenizer with appropriate configuration."""
        
        try:
            # Long-context models
            if 'longformer' in model_name.lower():
                tokenizer = LongformerTokenizer.from_pretrained(model_name)
                model = LongformerForSequenceClassification.from_pretrained(
                    model_name,
                    num_labels=num_labels,
                    problem_type="multi_label_classification",
                    ignore_mismatched_sizes=True
                )
                max_length = self.config.MAX_SEQ_LENGTH_LONG
                
            elif 'bigbird' in model_name.lower():
                tokenizer = BigBirdTokenizer.from_pretrained(model_name)
                model = BigBirdForSequenceClassification.from_pretrained(
                    model_name,
                    num_labels=num_labels,
                    problem_type="multi_label_classification",
                    ignore_mismatched_sizes=True
                )
                max_length = self.config.MAX_SEQ_LENGTH_LONG
                
            # Encoder-decoder models
            elif 'bart' in model_name.lower():
                tokenizer = BartTokenizer.from_pretrained(model_name)
                model = BartForSequenceClassification.from_pretrained(
                    model_name,
                    num_labels=num_labels,
                    problem_type="multi_label_classification",
                    ignore_mismatched_sizes=True
                )
                max_length = self.config.MAX_SEQ_LENGTH_STANDARD

            # LED (Longformer Encoder-Decoder)
            elif 'led' in model_name.lower():
                from transformers import LEDTokenizer, LEDForSequenceClassification
                tokenizer = LEDTokenizer.from_pretrained(model_name)
                
                # LED can be used directly for classification
                try:
                    model = LEDForSequenceClassification.from_pretrained(
                        model_name,
                        num_labels=num_labels,
                        problem_type="multi_label_classification"
                    )
                except:
                    # If classification head doesn't exist, use custom wrapper
                    from transformers import LEDModel
                    base_model = LEDModel.from_pretrained(model_name)
                    
                    class LEDClassifierWrapper(nn.Module):
                        def __init__(self, led_model, num_labels):
                            super().__init__()
                            self.led = led_model
                            self.classifier = nn.Linear(led_model.config.d_model, num_labels)
                            
                        def forward(self, input_ids, attention_mask, **kwargs):
                            # Set global attention on first token
                            global_attention_mask = torch.zeros_like(input_ids)
                            global_attention_mask[:, 0] = 1
                            
                            encoder_outputs = self.led.encoder(
                                input_ids=input_ids,
                                attention_mask=attention_mask,
                                global_attention_mask=global_attention_mask
                            )
                            pooled = encoder_outputs.last_hidden_state[:, 0, :]
                            logits = self.classifier(pooled)
                            
                            from transformers.modeling_outputs import SequenceClassifierOutput
                            return SequenceClassifierOutput(logits=logits)
                    
                    model = LEDClassifierWrapper(base_model, num_labels)
                
                max_length = self.config.MAX_SEQ_LENGTH_LONG  # 1024 for LED

            
                
            elif 't5' in model_name.lower() or 'flan' in model_name.lower() or 'pegasus' in model_name.lower():
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                # Use AutoModelForSeq2SeqLM wrapper for classification
                from transformers import AutoModelForSeq2SeqLM
                base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
                
                # Create classification wrapper
                class T5ClassifierWrapper(nn.Module):
                    def __init__(self, t5_model, num_labels):
                        super().__init__()
                        self.t5 = t5_model
                        self.classifier = nn.Linear(t5_model.config.d_model, num_labels)
                        
                    def forward(self, input_ids, attention_mask, **kwargs):
                        encoder_outputs = self.t5.encoder(
                            input_ids=input_ids,
                            attention_mask=attention_mask
                        )
                        pooled = encoder_outputs.last_hidden_state.mean(dim=1)  # Mean pooling
                        logits = self.classifier(pooled)
                        
                        # Return compatible output
                        from transformers.modeling_outputs import SequenceClassifierOutput
                        return SequenceClassifierOutput(logits=logits)
                
                model = T5ClassifierWrapper(base_model, num_labels)
                max_length = self.config.MAX_SEQ_LENGTH_STANDARD
                
            # Sentence transformers
            elif 'sentence-transformers' in model_name:
                # Special handling for sentence transformers
                return None, None, self.config.MAX_SEQ_LENGTH_STANDARD
                
            # Standard transformers
            else:
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                model = AutoModelForSequenceClassification.from_pretrained(
                    model_name,
                    num_labels=num_labels,
                    problem_type="multi_label_classification",
                    ignore_mismatched_sizes=True
                )
                max_length = self.config.MAX_SEQ_LENGTH_STANDARD
            
            model.to(self.device)
            print(f"Successfully loaded {model_name}")
            print(f"Max sequence length: {max_length}")


            if self.use_data_parallel and model is not None:
                model = nn.DataParallel(model, device_ids=[0, 1])
                print(f"Model wrapped in DataParallel")
            
            return model, tokenizer, max_length

            
        except Exception as e:
            raise RuntimeError(f"Failed to load model {model_name}: {str(e)}")


    def train_sentence_transformer_model(
        self,
        model_name: str,
        train_texts: List[str],
        train_labels: np.ndarray,
        val_texts: List[str],
        val_labels: np.ndarray,
        task_name: str
    ) -> Dict[str, float]:
        """
        Special training pipeline for sentence transformer models.
        Uses pre-computed embeddings + classification head.
        """
        print(f"\n{'='*80}")
        print(f"Training {model_name} for {task_name}")
        print(f"{'='*80}")
        
        try:
            from sentence_transformers import SentenceTransformer
            
            # Load sentence transformer
            print(f"Loading sentence transformer: {model_name}")
            encoder = SentenceTransformer(model_name)
            embedding_dim = encoder.get_sentence_embedding_dimension()
            print(f"Embedding dimension: {embedding_dim}")
            
            # Create classification head
            classifier = nn.Sequential(
                nn.Dropout(self.config.DROPOUT),
                nn.Linear(embedding_dim, train_labels.shape[1])
            )
            
            # Wrap in DataParallel if available
            if self.use_data_parallel:
                classifier = nn.DataParallel(classifier, device_ids=[0, 1])
                print("Classifier wrapped in DataParallel")
            
            classifier = classifier.to(self.device)
            
            # Encode all texts (batch processing)
            print("Encoding texts...")
            train_embeddings = encoder.encode(
                train_texts, 
                batch_size=32,
                show_progress_bar=True, 
                convert_to_tensor=True,
                device='cuda'
            )
            val_embeddings = encoder.encode(
                val_texts, 
                batch_size=32,
                show_progress_bar=True, 
                convert_to_tensor=True,
                device='cuda'
            )
            
            # Move to device
            train_embeddings = train_embeddings.to(self.device)
            val_embeddings = val_embeddings.to(self.device)
            train_labels_torch = torch.FloatTensor(train_labels).to(self.device)
            val_labels_torch = torch.FloatTensor(val_labels).to(self.device)
            
            # Setup training
            optimizer = AdamW(
                classifier.parameters(),
                lr=self.config.LEARNING_RATE,
                betas=(self.config.ADAM_BETA1, self.config.ADAM_BETA2),
                eps=self.config.ADAM_EPSILON,
                weight_decay=self.config.WEIGHT_DECAY
            )
            
            total_steps = (len(train_embeddings) // self.config.BATCH_SIZE) * self.config.NUM_EPOCHS
            scheduler = get_linear_schedule_with_warmup(
                optimizer,
                num_warmup_steps=self.config.WARMUP_STEPS,
                num_training_steps=total_steps
            )
            
            criterion = nn.BCEWithLogitsLoss()
            early_stopping = EarlyStopping(patience=self.config.EARLY_STOPPING_PATIENCE)
            
            best_val_loss = float('inf')
            
            # Training loop
            for epoch in range(self.config.NUM_EPOCHS):
                print(f"\nEpoch {epoch + 1}/{self.config.NUM_EPOCHS}")
                
                # Train
                classifier.train()
                total_loss = 0
                num_batches = 0
                
                progress_bar = tqdm(
                    range(0, len(train_embeddings), self.config.BATCH_SIZE),
                    desc="Training",
                    leave=False
                )
                
                for i in progress_bar:
                    batch_embeddings = train_embeddings[i:i+self.config.BATCH_SIZE]
                    batch_labels = train_labels_torch[i:i+self.config.BATCH_SIZE]
                    
                    optimizer.zero_grad()
                    logits = classifier(batch_embeddings)
                    loss = criterion(logits, batch_labels)
                    loss.backward()
                    
                    torch.nn.utils.clip_grad_norm_(classifier.parameters(), self.config.MAX_GRAD_NORM)
                    optimizer.step()
                    scheduler.step()
                    
                    total_loss += loss.item()
                    num_batches += 1
                    progress_bar.set_postfix({'loss': loss.item()})
                
                train_loss = total_loss / num_batches
                
                # Validate
                classifier.eval()
                with torch.no_grad():
                    val_logits = classifier(val_embeddings)
                    val_loss = criterion(val_logits, val_labels_torch).item()
                    val_preds = (torch.sigmoid(val_logits) > 0.5).cpu().numpy()
                
                print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
                
                if early_stopping(val_loss):
                    print(f"Early stopping triggered at epoch {epoch + 1}")
                    break
                
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
            
            print(f"\nTraining completed for {model_name}")
            print(f"Best validation loss: {best_val_loss:.4f}")
            
            # Calculate metrics
            metrics = MetricsCalculator.calculate_metrics(
                val_labels,
                val_preds,
                self.config.INTENT_LABELS if 'intent' in task_name.lower() 
                else self.config.IMPACT_LABELS
            )
            
            # Clean up
            del encoder
            del classifier
            del train_embeddings
            del val_embeddings
            clear_memory()
            
            return metrics
            
        except Exception as e:
            print(f"\n{'!'*80}")
            print(f"TRAINING FAILED for {model_name}")
            print(f"{'!'*80}")
            print(f"Error: {str(e)}")
            clear_memory()
            return None    
    
    def train_epoch(
        self, 
        model: nn.Module, 
        dataloader: DataLoader, 
        optimizer: torch.optim.Optimizer,
        scheduler,
        criterion: nn.Module
    ) -> float:
        """Train for one epoch."""
        model.train()
        total_loss = 0
        
        progress_bar = tqdm(dataloader, desc="Training", leave=False)
        
        for batch in progress_bar:
            try:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)
                
                optimizer.zero_grad()
                
                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask
                )
                
                logits = outputs.logits if hasattr(outputs, 'logits') else outputs[0]
                loss = criterion(logits, labels)
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), self.config.MAX_GRAD_NORM)
                optimizer.step()
                scheduler.step()
                
                total_loss += loss.item()
                progress_bar.set_postfix({'loss': loss.item()})
                
            except RuntimeError as e:
                if "out of memory" in str(e):
                    raise MemoryError(f"OOM Error during training: {str(e)}")
                else:
                    raise RuntimeError(f"Training error: {str(e)}")
        
        return total_loss / len(dataloader)
    
    def evaluate(
        self, 
        model: nn.Module, 
        dataloader: DataLoader, 
        criterion: nn.Module
    ) -> Tuple[float, np.ndarray, np.ndarray]:
        """Evaluate model on validation/test set."""
        model.eval()
        total_loss = 0
        all_preds = []
        all_labels = []
        
        progress_bar = tqdm(dataloader, desc="Evaluating", leave=False)
        
        with torch.no_grad():
            for batch in progress_bar:
                try:
                    input_ids = batch['input_ids'].to(self.device)
                    attention_mask = batch['attention_mask'].to(self.device)
                    labels = batch['labels'].to(self.device)
                    
                    outputs = model(
                        input_ids=input_ids,
                        attention_mask=attention_mask
                    )
                    
                    logits = outputs.logits if hasattr(outputs, 'logits') else outputs[0]
                    loss = criterion(logits, labels)
                    
                    total_loss += loss.item()
                    
                    # Convert logits to predictions (threshold at 0.5)
                    preds = torch.sigmoid(logits) > 0.5
                    
                    all_preds.append(preds.cpu().numpy())
                    all_labels.append(labels.cpu().numpy())
                    
                except RuntimeError as e:
                    if "out of memory" in str(e):
                        raise MemoryError(f"OOM Error during evaluation: {str(e)}")
                    else:
                        raise RuntimeError(f"Evaluation error: {str(e)}")
        
        all_preds = np.vstack(all_preds)
        all_labels = np.vstack(all_labels)
        
        return total_loss / len(dataloader), all_preds, all_labels
    
    def train_model(
        self,
        model_name: str,
        train_texts: List[str],
        train_labels: np.ndarray,
        val_texts: List[str],
        val_labels: np.ndarray,
        task_name: str
    ) -> Dict[str, float]:
        """Complete training pipeline for a single model."""
        
        print(f"\n{'='*80}")
        print(f"Training {model_name} for {task_name}")
        print(f"{'='*80}")
        
        try:
            # Get model and tokenizer
            model, tokenizer, max_length = self.get_model_and_tokenizer(
                model_name, 
                train_labels.shape[1]
            )
            
            # Create datasets
            train_dataset = MultiLabelTextDataset(
                train_texts, train_labels, tokenizer, max_length
            )
            val_dataset = MultiLabelTextDataset(
                val_texts, val_labels, tokenizer, max_length
            )
            
            train_loader = DataLoader(
                train_dataset, 
                batch_size=self.config.BATCH_SIZE, 
                shuffle=True
            )
            val_loader = DataLoader(
                val_dataset, 
                batch_size=self.config.BATCH_SIZE, 
                shuffle=False
            )
            
            # Setup optimizer and scheduler
            optimizer = AdamW(
                model.parameters(),
                lr=self.config.LEARNING_RATE,
                betas=(self.config.ADAM_BETA1, self.config.ADAM_BETA2),
                eps=self.config.ADAM_EPSILON,
                weight_decay=self.config.WEIGHT_DECAY
            )
            
            total_steps = len(train_loader) * self.config.NUM_EPOCHS
            scheduler = get_linear_schedule_with_warmup(
                optimizer,
                num_warmup_steps=self.config.WARMUP_STEPS,
                num_training_steps=total_steps
            )
            
            criterion = nn.BCEWithLogitsLoss()
            early_stopping = EarlyStopping(patience=self.config.EARLY_STOPPING_PATIENCE)
            
            # Training loop
            best_val_loss = float('inf')
            
            for epoch in range(self.config.NUM_EPOCHS):
                print(f"\nEpoch {epoch + 1}/{self.config.NUM_EPOCHS}")
                
                train_loss = self.train_epoch(
                    model, train_loader, optimizer, scheduler, criterion
                )
                val_loss, val_preds, val_labels_actual = self.evaluate(
                    model, val_loader, criterion
                )
                
                print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
                
                if early_stopping(val_loss):
                    print(f"Early stopping triggered at epoch {epoch + 1}")
                    break
                
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
            
            print(f"\nTraining completed for {model_name}")
            print(f"Best validation loss: {best_val_loss:.4f}")
            
            # Final metrics (return validation metrics)
            metrics = MetricsCalculator.calculate_metrics(
                val_labels_actual,
                val_preds,
                self.config.INTENT_LABELS if 'intent' in task_name.lower() 
                else self.config.IMPACT_LABELS
            )
            
            # Clean up
            del model
            del tokenizer
            del optimizer
            del scheduler
            clear_memory()
            
            return metrics
            
        except MemoryError as e:
            print(f"\n{'!'*80}")
            print(f"OUT OF MEMORY ERROR for {model_name}")
            print(f"{'!'*80}")
            print(f"Error: {str(e)}")
            print("Consider:")
            print("- Reducing batch size")
            print("- Using a smaller model variant")
            print("- Using gradient accumulation")
            clear_memory()
            return None
            
        except Exception as e:
            print(f"\n{'!'*80}")
            print(f"TRAINING FAILED for {model_name}")
            print(f"{'!'*80}")
            print(f"Error: {str(e)}")
            clear_memory()
            return None

In [10]:
# ============================================================================
# T5 CLASSIFICATION WRAPPER
# ============================================================================

class T5ClassificationWrapper(nn.Module):
    """Wrapper for T5 models to enable classification."""
    
    def __init__(self, t5_model, num_labels: int):
        super().__init__()
        self.t5 = t5_model
        self.classifier = nn.Linear(t5_model.config.d_model, num_labels)
        
    def forward(self, input_ids, attention_mask):
        encoder_outputs = self.t5.encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        hidden_states = encoder_outputs.last_hidden_state
        pooled = hidden_states[:, 0, :]  # Use first token
        logits = self.classifier(pooled)
        
        # Return in same format as other models
        from transformers.modeling_outputs import SequenceClassifierOutput
        return SequenceClassifierOutput(logits=logits)

In [11]:
# ============================================================================
# EXPERIMENT RUNNER
# ============================================================================

class ExperimentRunner:
    """Orchestrate the complete experiment."""
    
    def __init__(self, config: ExperimentConfig):
        self.config = config
        self.processor = DataProcessor(config)
        self.trainer = ModelTrainer(config)
        self.results = {
            'intent': [],
            'impact': []
        }
        
    def get_model_list(self) -> List[Tuple[str, str, str]]:

        models = [
            ("BERT-base", "Encoder", "bert-base-uncased"),
            ("FinBERT", "Encoder (Domain)", "ProsusAI/finbert"),
            ("RoBERTa-base", "Encoder", "roberta-base"),
            ("FinBERT-Tone", "Encoder (Domain)", "yiyanghkust/finbert-tone"),
            ("FinBERT-ESG", "Encoder (Domain)", "yiyanghkust/finbert-esg"),
            ("SecBERT", "Encoder (Domain)", "nlpaueb/sec-bert-base"),  # NEW
            ("DistilRoBERTa", "Encoder (Light)", "distilroberta-base"),  # NEW
            ("DistilBERT", "Encoder (Light)", "distilbert-base-uncased"),
            ("DeBERTa-v3-small", "Advanced Encoder", "microsoft/deberta-v3-small"),
            ("ELECTRA-small", "Advanced Encoder", "google/electra-small-discriminator"),
            ("ELECTRA-base", "Advanced Encoder", "google/electra-base-discriminator"),
            ("ALBERT-base-v2", "Advanced Encoder", "albert-base-v2"),
            ("XLNet-base", "Advanced Encoder", "xlnet-base-cased"),
            ("BART-base", "Encoder-Decoder", "facebook/bart-base"),
            ("DistilBART-cnn", "Encoder-Decoder (Light)", "sshleifer/distilbart-cnn-6-6"),
        ]

        return models

    def save_label_distribution(self, splits):
        """Save label distribution statistics for train/val/test splits."""
        
        # Extract data from splits
        train_data, train_intent, train_impact = splits['train']
        val_data, val_intent, val_impact = splits['val']
        test_data, test_intent, test_impact = splits['test']
        
        # Calculate intent label distribution
        intent_dist = []
        for i, label in enumerate(self.config.INTENT_LABELS):
            intent_dist.append({
                'Label': label,
                'Train': train_intent[:, i].sum(),
                'Val': val_intent[:, i].sum(),
                'Test': test_intent[:, i].sum(),
                'Total': train_intent[:, i].sum() + val_intent[:, i].sum() + test_intent[:, i].sum()
            })
        
        # Calculate impact label distribution
        impact_dist = []
        for i, label in enumerate(self.config.IMPACT_LABELS):
            impact_dist.append({
                'Label': label,
                'Train': train_impact[:, i].sum(),
                'Val': val_impact[:, i].sum(),
                'Test': test_impact[:, i].sum(),
                'Total': train_impact[:, i].sum() + val_impact[:, i].sum() + test_impact[:, i].sum()
            })
        
        # Create DataFrames
        intent_dist_df = pd.DataFrame(intent_dist)
        impact_dist_df = pd.DataFrame(impact_dist)
        
        # Save to CSV
        os.makedirs(self.config.OUTPUT_DIR, exist_ok=True)
        
        intent_dist_path = os.path.join(self.config.OUTPUT_DIR, 'project2_intent_label_distribution.csv')
        impact_dist_path = os.path.join(self.config.OUTPUT_DIR, 'project2_impact_label_distribution.csv')
        
        intent_dist_df.to_csv(intent_dist_path, index=False)
        impact_dist_df.to_csv(impact_dist_path, index=False)
        
        print(f"Saved: {intent_dist_path}")
        print(f"Saved: {impact_dist_path}")
        
        # Print summary
        print("\n" + "="*80)
        print("INTENT LABEL DISTRIBUTION")
        print("="*80)
        print(intent_dist_df.to_string(index=False))
        
        print("\n" + "="*80)
        print("IMPACT LABEL DISTRIBUTION")
        print("="*80)
        print(impact_dist_df.to_string(index=False))

    
    def run_experiments(self):
        """Run all experiments."""
        
        print("\n" + "="*80)
        print("STARTING EXPERIMENT: Project 2 - Financial Distress Classification")  # CHANGED
        print("="*80)
        
        # Load and prepare data
        print("\n[1/5] Loading dataset...")
        data = self.processor.load_data()
        
        print("\n[2/5] Preparing labels...")
        intent_labels, impact_labels = self.processor.prepare_labels(data)
        
        print("\n[3/5] Splitting data...")
        splits = self.processor.split_data(data, intent_labels, impact_labels)

        # Save label distribution
        self.save_label_distribution(splits)
        
        train_data, train_intent, train_impact = splits['train']
        val_data, val_intent, val_impact = splits['val']
        test_data, test_intent, test_impact = splits['test']
        
        train_texts = [s['text'] for s in train_data]
        val_texts = [s['text'] for s in val_data]
        test_texts = [s['text'] for s in test_data]
        
        # Get model list
        models = self.get_model_list()
        
        print(f"\n[4/5] Training {len(models)} models on 2 tasks...")
        print(f"Total training runs: {len(models) * 2}")
        
           
        # Train all models
        for idx, (model_name, model_type, hf_id) in enumerate(models, 1):
            print(f"\n{'#'*80}")
            print(f"MODEL {idx}/{len(models)}: {model_name} ({model_type})")
            print(f"{'#'*80}")
            
            # Check if sentence transformer
            if 'sentence-transformers' in hf_id:
                # Use special training method for sentence transformers
                intent_metrics = self.trainer.train_sentence_transformer_model(
                    hf_id,
                    train_texts,
                    train_intent,
                    val_texts,
                    val_intent,
                    "Intent Classification"
                )
                
                if intent_metrics is not None:
                    intent_metrics['model_name'] = model_name
                    intent_metrics['model_type'] = model_type
                    intent_metrics['hf_id'] = hf_id
                    self.results['intent'].append(intent_metrics)
                
                impact_metrics = self.trainer.train_sentence_transformer_model(
                    hf_id,
                    train_texts,
                    train_impact,
                    val_texts,
                    val_impact,
                    "Impact Classification"
                )
                
                if impact_metrics is not None:
                    impact_metrics['model_name'] = model_name
                    impact_metrics['model_type'] = model_type
                    impact_metrics['hf_id'] = hf_id
                    self.results['impact'].append(impact_metrics)
            else:
                # Normal training for non-sentence-transformer models
                intent_metrics = self.trainer.train_model(
                    hf_id,
                    train_texts,
                    train_intent,
                    val_texts,
                    val_intent,
                    "Intent Classification"
                )
                
                if intent_metrics is not None:
                    intent_metrics['model_name'] = model_name
                    intent_metrics['model_type'] = model_type
                    intent_metrics['hf_id'] = hf_id
                    self.results['intent'].append(intent_metrics)
                
                impact_metrics = self.trainer.train_model(
                    hf_id,
                    train_texts,
                    train_impact,
                    val_texts,
                    val_impact,
                    "Impact Classification"
                )
                
                if impact_metrics is not None:
                    impact_metrics['model_name'] = model_name
                    impact_metrics['model_type'] = model_type
                    impact_metrics['hf_id'] = hf_id
                    self.results['impact'].append(impact_metrics)
        
        print("\n[5/5] Saving results...")
        self.save_results()
        
        print("\n" + "="*80)
        print("EXPERIMENT COMPLETED")
        print("="*80)

    def save_results(self):
        """Save all results to CSV files."""
        
        os.makedirs(self.config.OUTPUT_DIR, exist_ok=True)
        
        # Main results
        intent_df = pd.DataFrame(self.results['intent'])
        impact_df = pd.DataFrame(self.results['impact'])
        
        # Reorder columns for main metrics
        main_cols = ['model_name', 'model_type', 'subset_accuracy', 'macro_f1', 
                     'weighted_f1', 'micro_f1', 'hamming_loss']
        
        intent_main = intent_df[main_cols]
        impact_main = impact_df[main_cols]
        
        # Save main results - CHANGED FILENAMES
        intent_main_path = os.path.join(self.config.OUTPUT_DIR, 'project2_intent_results.csv')
        impact_main_path = os.path.join(self.config.OUTPUT_DIR, 'project2_impact_results.csv')
        
        intent_main.to_csv(intent_main_path, index=False)
        impact_main.to_csv(impact_main_path, index=False)
        
        print(f"Saved: {intent_main_path}")
        print(f"Saved: {impact_main_path}")
        
        # Per-label results
        intent_label_cols = ['model_name', 'model_type'] + \
                           [col for col in intent_df.columns if any(label in col for label in self.config.INTENT_LABELS)]
        impact_label_cols = ['model_name', 'model_type'] + \
                           [col for col in impact_df.columns if any(label in col for label in self.config.IMPACT_LABELS)]
        
        intent_per_label = intent_df[intent_label_cols]
        impact_per_label = impact_df[impact_label_cols]
        
        # CHANGED FILENAMES
        intent_per_label_path = os.path.join(self.config.OUTPUT_DIR, 'project2_intent_per_label.csv')
        impact_per_label_path = os.path.join(self.config.OUTPUT_DIR, 'project2_impact_per_label.csv')
        
        intent_per_label.to_csv(intent_per_label_path, index=False)
        impact_per_label.to_csv(impact_per_label_path, index=False)
        
        print(f"Saved: {intent_per_label_path}")
        print(f"Saved: {impact_per_label_path}")

In [12]:
# ============================================================================
# MAIN EXECUTION
# ============================================================================
    
# Set seed for reproducibility
set_seed(CONFIG.RANDOM_SEED)

# Initialize and run experiment
runner = ExperimentRunner(CONFIG)
runner.run_experiments()

print("\n" + "="*80)
print("All results saved to:", CONFIG.OUTPUT_DIR)
print("="*80)

Using device: cuda
DataParallel enabled with 2 GPUs

STARTING EXPERIMENT: Project 2 - Financial Distress Classification

[1/5] Loading dataset...
Successfully loaded 2408 samples from /kaggle/input/eacl-26-dataset-2/EACL_26-FinancialDiscussionsDataset.json
Dataset structure validation passed

[2/5] Preparing labels...
Intent labels shape: (2408, 7)
Impact labels shape: (2408, 6)

[3/5] Splitting data...
Train: 1444 samples
Val: 482 samples
Test: 482 samples
Saved: /kaggle/working/project2_intent_label_distribution.csv
Saved: /kaggle/working/project2_impact_label_distribution.csv

INTENT LABEL DISTRIBUTION
                                  Label  Train  Val  Test  Total
Community Building & Solidarity Seeking    385  136   122    643
     Emotional & Esteem Support-Seeking    459  146   162    767
             Informational Help-Seeking    488  148   160    796
              Instrumental Help-Seeking    302  115    98    515
  Sense-Making & Narrative Construction    469  155   165    7

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded bert-base-uncased
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6615 | Val Loss: 0.6071

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5864 | Val Loss: 0.5505

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5172 | Val Loss: 0.5124

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4401 | Val Loss: 0.4460

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3681 | Val Loss: 0.4382

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3026 | Val Loss: 0.4611

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2290 | Val Loss: 0.4398

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1626 | Val Loss: 0.4534
Early stopping triggered at epoch 8

Training completed for bert-base-uncased
Best validation loss: 0.4382

Training bert-base-uncased for Impact Classification


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded bert-base-uncased
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6090 | Val Loss: 0.5586

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5258 | Val Loss: 0.4508

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4038 | Val Loss: 0.3698

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3257 | Val Loss: 0.3543

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2475 | Val Loss: 0.3500

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1884 | Val Loss: 0.3476

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1339 | Val Loss: 0.3445

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0850 | Val Loss: 0.3537

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0553 | Val Loss: 0.3728

Epoch 10/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0395 | Val Loss: 0.3706
Early stopping triggered at epoch 10

Training completed for bert-base-uncased
Best validation loss: 0.3445

################################################################################
MODEL 2/15: FinBERT (Encoder (Domain))
################################################################################

Training ProsusAI/finbert for Intent Classification


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ProsusAI/finbert and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded ProsusAI/finbert
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6427 | Val Loss: 0.6022

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5821 | Val Loss: 0.5526

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5071 | Val Loss: 0.4722

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4318 | Val Loss: 0.4431

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3607 | Val Loss: 0.4377

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2925 | Val Loss: 0.4473

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2236 | Val Loss: 0.4606

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1597 | Val Loss: 0.4409
Early stopping triggered at epoch 8

Training completed for ProsusAI/finbert
Best validation loss: 0.4377

Training ProsusAI/finbert for Impact Classification


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ProsusAI/finbert and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([6, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([6]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded ProsusAI/finbert
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6060 | Val Loss: 0.5384

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5072 | Val Loss: 0.4531

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4226 | Val Loss: 0.3887

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3435 | Val Loss: 0.3526

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2752 | Val Loss: 0.3605

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2083 | Val Loss: 0.3497

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1472 | Val Loss: 0.3995

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0934 | Val Loss: 0.3722

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0587 | Val Loss: 0.3775
Early stopping triggered at epoch 9

Training completed for ProsusAI/finbert
Best validation loss: 0.3497

################################################################################
MODEL 3/15: RoBERTa-base (Encoder)
################################################################################

Training roberta-base for Intent Classification


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded roberta-base
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6455 | Val Loss: 0.6031

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5337 | Val Loss: 0.4522

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4392 | Val Loss: 0.4241

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3871 | Val Loss: 0.4403

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3414 | Val Loss: 0.4186

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2918 | Val Loss: 0.4246

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2159 | Val Loss: 0.4244

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1526 | Val Loss: 0.4394
Early stopping triggered at epoch 8

Training completed for roberta-base
Best validation loss: 0.4186

Training roberta-base for Impact Classification


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded roberta-base
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6410 | Val Loss: 0.5495

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4801 | Val Loss: 0.3724

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3543 | Val Loss: 0.3452

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2889 | Val Loss: 0.3179

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2363 | Val Loss: 0.3277

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1966 | Val Loss: 0.3073

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1445 | Val Loss: 0.3164

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0990 | Val Loss: 0.3442

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0678 | Val Loss: 0.3334
Early stopping triggered at epoch 9

Training completed for roberta-base
Best validation loss: 0.3073

################################################################################
MODEL 4/15: FinBERT-Tone (Encoder (Domain))
################################################################################

Training yiyanghkust/finbert-tone for Intent Classification


config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at yiyanghkust/finbert-tone and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded yiyanghkust/finbert-tone
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6396 | Val Loss: 0.6013

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5809 | Val Loss: 0.5552

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5206 | Val Loss: 0.5269

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4674 | Val Loss: 0.4774

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4095 | Val Loss: 0.4757

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3328 | Val Loss: 0.4810

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2513 | Val Loss: 0.5275

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1808 | Val Loss: 0.5249
Early stopping triggered at epoch 8

Training completed for yiyanghkust/finbert-tone
Best validation loss: 0.4757

Training yiyanghkust/finbert-tone for Impact Classification


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at yiyanghkust/finbert-tone and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([6, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([6]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded yiyanghkust/finbert-tone
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6026 | Val Loss: 0.5334

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5062 | Val Loss: 0.4599

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4490 | Val Loss: 0.4354

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3929 | Val Loss: 0.3976

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3349 | Val Loss: 0.4110

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2904 | Val Loss: 0.4150

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2175 | Val Loss: 0.4147
Early stopping triggered at epoch 7

Training completed for yiyanghkust/finbert-tone
Best validation loss: 0.3976

################################################################################
MODEL 5/15: FinBERT-ESG (Encoder (Domain))
################################################################################

Training yiyanghkust/finbert-esg for Intent Classification


config.json:   0%|          | 0.00/781 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at yiyanghkust/finbert-esg and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([4, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded yiyanghkust/finbert-esg
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6426 | Val Loss: 0.6006

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5811 | Val Loss: 0.5613

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5443 | Val Loss: 0.5159

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4918 | Val Loss: 0.5100

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4294 | Val Loss: 0.4992

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3705 | Val Loss: 0.4895

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2948 | Val Loss: 0.5100

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2226 | Val Loss: 0.5136

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1587 | Val Loss: 0.5372
Early stopping triggered at epoch 9

Training completed for yiyanghkust/finbert-esg
Best validation loss: 0.4895

Training yiyanghkust/finbert-esg for Impact Classification


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at yiyanghkust/finbert-esg and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([4, 768]) in the checkpoint and torch.Size([6, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([6]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded yiyanghkust/finbert-esg
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6132 | Val Loss: 0.5230

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5062 | Val Loss: 0.4605

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4594 | Val Loss: 0.4398

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4060 | Val Loss: 0.4163

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3609 | Val Loss: 0.4012

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3088 | Val Loss: 0.4117

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2386 | Val Loss: 0.4224

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1694 | Val Loss: 0.4176
Early stopping triggered at epoch 8

Training completed for yiyanghkust/finbert-esg
Best validation loss: 0.4012

################################################################################
MODEL 6/15: SecBERT (Encoder (Domain))
################################################################################

Training nlpaueb/sec-bert-base for Intent Classification


tokenizer_config.json:   0%|          | 0.00/263 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/568 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/sec-bert-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded nlpaueb/sec-bert-base
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6348 | Val Loss: 0.6027

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5767 | Val Loss: 0.5665

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5302 | Val Loss: 0.5093

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4613 | Val Loss: 0.4775

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4038 | Val Loss: 0.4809

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3363 | Val Loss: 0.4785

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2552 | Val Loss: 0.4934
Early stopping triggered at epoch 7

Training completed for nlpaueb/sec-bert-base
Best validation loss: 0.4775

Training nlpaueb/sec-bert-base for Impact Classification


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/sec-bert-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded nlpaueb/sec-bert-base
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5863 | Val Loss: 0.5318

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4901 | Val Loss: 0.4441

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4156 | Val Loss: 0.4052

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3646 | Val Loss: 0.4112

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3135 | Val Loss: 0.4238

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2518 | Val Loss: 0.3743

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1827 | Val Loss: 0.3725

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1170 | Val Loss: 0.3909

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0743 | Val Loss: 0.3997

Epoch 10/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0527 | Val Loss: 0.3984
Early stopping triggered at epoch 10

Training completed for nlpaueb/sec-bert-base
Best validation loss: 0.3725

################################################################################
MODEL 7/15: DistilRoBERTa (Encoder (Light))
################################################################################

Training distilroberta-base for Intent Classification


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded distilroberta-base
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6493 | Val Loss: 0.6068

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5670 | Val Loss: 0.5299

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4701 | Val Loss: 0.4254

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4059 | Val Loss: 0.4225

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3576 | Val Loss: 0.4180

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3138 | Val Loss: 0.4232

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2494 | Val Loss: 0.4409

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1831 | Val Loss: 0.4451
Early stopping triggered at epoch 8

Training completed for distilroberta-base
Best validation loss: 0.4180

Training distilroberta-base for Impact Classification


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded distilroberta-base
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6250 | Val Loss: 0.5490

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4877 | Val Loss: 0.4084

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3717 | Val Loss: 0.3362

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3150 | Val Loss: 0.3207

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2745 | Val Loss: 0.3290

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2270 | Val Loss: 0.3220

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1746 | Val Loss: 0.3583
Early stopping triggered at epoch 7

Training completed for distilroberta-base
Best validation loss: 0.3207

################################################################################
MODEL 8/15: DistilBERT (Encoder (Light))
################################################################################

Training distilbert-base-uncased for Intent Classification


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded distilbert-base-uncased
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6459 | Val Loss: 0.6085

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5802 | Val Loss: 0.5431

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5083 | Val Loss: 0.4683

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4343 | Val Loss: 0.4332

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3679 | Val Loss: 0.4358

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2917 | Val Loss: 0.4472

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2132 | Val Loss: 0.4689
Early stopping triggered at epoch 7

Training completed for distilbert-base-uncased
Best validation loss: 0.4332

Training distilbert-base-uncased for Impact Classification


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded distilbert-base-uncased
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6318 | Val Loss: 0.5513

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5254 | Val Loss: 0.4585

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4279 | Val Loss: 0.4078

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3606 | Val Loss: 0.3817

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2934 | Val Loss: 0.3764

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2268 | Val Loss: 0.3451

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1620 | Val Loss: 0.3508

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1070 | Val Loss: 0.3662

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0704 | Val Loss: 0.3813
Early stopping triggered at epoch 9

Training completed for distilbert-base-uncased
Best validation loss: 0.3451

################################################################################
MODEL 9/15: DeBERTa-v3-small (Advanced Encoder)
################################################################################

Training microsoft/deberta-v3-small for Intent Classification


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/578 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/286M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/286M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded microsoft/deberta-v3-small
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6877 | Val Loss: 0.6111

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5957 | Val Loss: 0.5590

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5360 | Val Loss: 0.5355

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4894 | Val Loss: 0.4759

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4405 | Val Loss: 0.4530

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3967 | Val Loss: 0.4643

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3372 | Val Loss: 0.4649

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2837 | Val Loss: 0.4919
Early stopping triggered at epoch 8

Training completed for microsoft/deberta-v3-small
Best validation loss: 0.4530

Training microsoft/deberta-v3-small for Impact Classification


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded microsoft/deberta-v3-small
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6506 | Val Loss: 0.5554

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5328 | Val Loss: 0.4809

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4498 | Val Loss: 0.3976

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3829 | Val Loss: 0.3647

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3295 | Val Loss: 0.3700

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2854 | Val Loss: 0.3790

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2400 | Val Loss: 0.3567

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1900 | Val Loss: 0.3513

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1517 | Val Loss: 0.3733

Epoch 10/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1262 | Val Loss: 0.3567

Training completed for microsoft/deberta-v3-small
Best validation loss: 0.3513

################################################################################
MODEL 10/15: ELECTRA-small (Advanced Encoder)
################################################################################

Training google/electra-small-discriminator for Intent Classification


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/54.2M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded google/electra-small-discriminator
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/54.2M [00:00<?, ?B/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6848 | Val Loss: 0.6680

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6477 | Val Loss: 0.6268

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6124 | Val Loss: 0.5993

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5876 | Val Loss: 0.5819

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5582 | Val Loss: 0.5455

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5300 | Val Loss: 0.5174

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4926 | Val Loss: 0.5007

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4602 | Val Loss: 0.4877

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4370 | Val Loss: 0.4768

Epoch 10/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4200 | Val Loss: 0.4751

Training completed for google/electra-small-discriminator
Best validation loss: 0.4751

Training google/electra-small-discriminator for Impact Classification


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded google/electra-small-discriminator
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6873 | Val Loss: 0.6617

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6356 | Val Loss: 0.5961

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5823 | Val Loss: 0.5465

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5272 | Val Loss: 0.5013

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4730 | Val Loss: 0.4779

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4291 | Val Loss: 0.4362

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3871 | Val Loss: 0.3987

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3563 | Val Loss: 0.3987

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3284 | Val Loss: 0.3849

Epoch 10/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3148 | Val Loss: 0.3833

Training completed for google/electra-small-discriminator
Best validation loss: 0.3833

################################################################################
MODEL 11/15: ELECTRA-base (Advanced Encoder)
################################################################################

Training google/electra-base-discriminator for Intent Classification


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded google/electra-base-discriminator
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6570 | Val Loss: 0.6131

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5888 | Val Loss: 0.5399

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5132 | Val Loss: 0.4733

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4441 | Val Loss: 0.4520

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3832 | Val Loss: 0.4337

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3260 | Val Loss: 0.4347

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2573 | Val Loss: 0.4229

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1910 | Val Loss: 0.4305

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1362 | Val Loss: 0.4535

Epoch 10/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1005 | Val Loss: 0.4500
Early stopping triggered at epoch 10

Training completed for google/electra-base-discriminator
Best validation loss: 0.4229

Training google/electra-base-discriminator for Impact Classification


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded google/electra-base-discriminator
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6460 | Val Loss: 0.5608

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5470 | Val Loss: 0.4932

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4617 | Val Loss: 0.3939

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3865 | Val Loss: 0.3871

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3272 | Val Loss: 0.3272

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2656 | Val Loss: 0.3453

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2070 | Val Loss: 0.3494

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1538 | Val Loss: 0.3257

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1138 | Val Loss: 0.3371

Epoch 10/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0857 | Val Loss: 0.3372

Training completed for google/electra-base-discriminator
Best validation loss: 0.3257

################################################################################
MODEL 12/15: ALBERT-base-v2 (Advanced Encoder)
################################################################################

Training albert-base-v2 for Intent Classification


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded albert-base-v2
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6197 | Val Loss: 0.5795

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5388 | Val Loss: 0.5022

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4698 | Val Loss: 0.4588

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4193 | Val Loss: 0.4229

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3638 | Val Loss: 0.4344

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3072 | Val Loss: 0.4117

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2422 | Val Loss: 0.4381

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1788 | Val Loss: 0.4276

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1221 | Val Loss: 0.4336
Early stopping triggered at epoch 9

Training completed for albert-base-v2
Best validation loss: 0.4117

Training albert-base-v2 for Impact Classification


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded albert-base-v2
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6091 | Val Loss: 0.5561

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5010 | Val Loss: 0.4414

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4120 | Val Loss: 0.3653

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3372 | Val Loss: 0.3613

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2914 | Val Loss: 0.3632

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2477 | Val Loss: 0.3378

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1963 | Val Loss: 0.3300

Epoch 8/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1674 | Val Loss: 0.3384

Epoch 9/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1047 | Val Loss: 0.3272

Epoch 10/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.0659 | Val Loss: 0.3241

Training completed for albert-base-v2
Best validation loss: 0.3241

################################################################################
MODEL 13/15: XLNet-base (Advanced Encoder)
################################################################################

Training xlnet-base-cased for Intent Classification


config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded xlnet-base-cased
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6340 | Val Loss: 0.5691

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5181 | Val Loss: 0.4424

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4316 | Val Loss: 0.4283

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3795 | Val Loss: 0.4033

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3126 | Val Loss: 0.4227

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2518 | Val Loss: 0.4614

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1657 | Val Loss: 0.4842
Early stopping triggered at epoch 7

Training completed for xlnet-base-cased
Best validation loss: 0.4033

Training xlnet-base-cased for Impact Classification


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded xlnet-base-cased
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6138 | Val Loss: 0.5313

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4702 | Val Loss: 0.3730

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3551 | Val Loss: 0.3296

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2958 | Val Loss: 0.3121

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2429 | Val Loss: 0.3324

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1885 | Val Loss: 0.3408

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1207 | Val Loss: 0.3445
Early stopping triggered at epoch 7

Training completed for xlnet-base-cased
Best validation loss: 0.3121

################################################################################
MODEL 14/15: BART-base (Encoder-Decoder)
################################################################################

Training facebook/bart-base for Intent Classification


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded facebook/bart-base
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.58.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6313 | Val Loss: 0.6060

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5496 | Val Loss: 0.4718

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4390 | Val Loss: 0.4069

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3918 | Val Loss: 0.4315

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3586 | Val Loss: 0.4251

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3024 | Val Loss: 0.4217
Early stopping triggered at epoch 6

Training completed for facebook/bart-base
Best validation loss: 0.4069

Training facebook/bart-base for Impact Classification


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded facebook/bart-base
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.5930 | Val Loss: 0.5416

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4585 | Val Loss: 0.3698

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3493 | Val Loss: 0.3240

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2989 | Val Loss: 0.3094

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2557 | Val Loss: 0.3596

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2150 | Val Loss: 0.3161

Epoch 7/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1610 | Val Loss: 0.3276
Early stopping triggered at epoch 7

Training completed for facebook/bart-base
Best validation loss: 0.3094

################################################################################
MODEL 15/15: DistilBART-cnn (Encoder-Decoder (Light))
################################################################################

Training sshleifer/distilbart-cnn-6-6 for Intent Classification


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/460M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/460M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at sshleifer/distilbart-cnn-6-6 and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded sshleifer/distilbart-cnn-6-6
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6319 | Val Loss: 0.5748

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.4729 | Val Loss: 0.4172

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3859 | Val Loss: 0.4101

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3173 | Val Loss: 0.4154

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2626 | Val Loss: 0.4363

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2016 | Val Loss: 0.4736
Early stopping triggered at epoch 6

Training completed for sshleifer/distilbart-cnn-6-6
Best validation loss: 0.4101

Training sshleifer/distilbart-cnn-6-6 for Impact Classification


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at sshleifer/distilbart-cnn-6-6 and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded sshleifer/distilbart-cnn-6-6
Max sequence length: 512
Model wrapped in DataParallel

Epoch 1/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.6002 | Val Loss: 0.4849

Epoch 2/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3971 | Val Loss: 0.3467

Epoch 3/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.3015 | Val Loss: 0.3135

Epoch 4/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.2423 | Val Loss: 0.3313

Epoch 5/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1840 | Val Loss: 0.3444

Epoch 6/10


Training:   0%|          | 0/91 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/31 [00:00<?, ?it/s]

Train Loss: 0.1360 | Val Loss: 0.3813
Early stopping triggered at epoch 6

Training completed for sshleifer/distilbart-cnn-6-6
Best validation loss: 0.3135

[5/5] Saving results...
Saved: /kaggle/working/project2_intent_results.csv
Saved: /kaggle/working/project2_impact_results.csv
Saved: /kaggle/working/project2_intent_per_label.csv
Saved: /kaggle/working/project2_impact_per_label.csv

EXPERIMENT COMPLETED

All results saved to: /kaggle/working
