# Arabic Sentiment Analysis with Ensemble Learning

HOW TO USE:
1. Runtime > Change runtime type > GPU (T4 or better)
2. Run cells in order
3. Results will be saved to Google Drive


## 1 - Setup and Installation

In [52]:
print("="*70)
print("ARABIC SENTIMENT ANALYSIS - GOOGLE COLAB SETUP")
print("="*70)

# Install required packages
print("\n📦 Installing required packages...")
!pip install -q transformers datasets torch scikit-learn pandas numpy tqdm matplotlib seaborn

# Mount Google Drive
print("\n💾 Mounting Google Drive...")
from google.colab import drive
drive.mount('/content/drive')

# Create project directories in Google Drive
import os
project_path = '/content/drive/MyDrive/arabic_sentiment_analysis'
os.makedirs(f'{project_path}/data', exist_ok=True)
os.makedirs(f'{project_path}/saved_models', exist_ok=True)
os.makedirs(f'{project_path}/results', exist_ok=True)
os.makedirs(f'{project_path}/figures', exist_ok=True)

print(f"✓ Project directory created at: {project_path}")
print("\n⚠️  IMPORTANT: Upload 'balanced-reviews.csv' to:")
print(f"   {project_path}/data/balanced-reviews.csv")
print("\nYou can download the dataset from:")
print("   https://github.com/elnagara/HARD-Arabic-Dataset")

# Check GPU
import torch
if torch.cuda.is_available():
    print(f"\n✓ GPU Available: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("\n⚠️  WARNING: No GPU detected. Training will be very slow!")
    print("   Go to: Runtime > Change runtime type > GPU")

print("\n" + "="*70)
print("Setup complete! You can now run the next cells.")
print("="*70)

ARABIC SENTIMENT ANALYSIS - GOOGLE COLAB SETUP

📦 Installing required packages...
[31mERROR: Operation cancelled by user[0m[31m
[0m
💾 Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✓ Project directory created at: /content/drive/MyDrive/arabic_sentiment_analysis

⚠️  IMPORTANT: Upload 'balanced-reviews.csv' to:
   /content/drive/MyDrive/arabic_sentiment_analysis/data/balanced-reviews.csv

You can download the dataset from:
   https://github.com/elnagara/HARD-Arabic-Dataset

✓ GPU Available: NVIDIA A100-SXM4-80GB
   Memory: 85.17 GB

Setup complete! You can now run the next cells.


## 2 - Configuration

In [45]:
class Config:
    """Central configuration for all experiments"""

    # Paths (Google Drive)
    PROJECT_PATH = '/content/drive/MyDrive/arabic_sentiment_analysis'
    DATA_DIR = f'{PROJECT_PATH}/data'
    MODEL_DIR = f'{PROJECT_PATH}/saved_models'
    RESULTS_DIR = f'{PROJECT_PATH}/results'
    FIGURES_DIR = f'{PROJECT_PATH}/figures'

    # Dataset settings
    DATASET_FILE = 'balanced-reviews.csv'
    TRAIN_RATIO = 0.8
    VAL_RATIO = 0.1
    TEST_RATIO = 0.1

    # Model configurations
    MODELS = {
        'arabert': 'aubmindlab/bert-base-arabert',
        'marbert': 'UBC-NLP/MARBERT',
        'xlm-roberta': 'xlm-roberta-base',
        'camelbert': 'CAMeL-Lab/bert-base-arabic-camelbert-msa'
    }

    # Training hyperparameters
    LEARNING_RATE = 2e-5
    BATCH_SIZE = 16  # Reduced for Colab GPU
    NUM_EPOCHS = 3
    MAX_LENGTH = 512
    WARMUP_RATIO = 0.1
    WEIGHT_DECAY = 0.01

    # Early stopping
    PATIENCE = 3

    # Reproducibility - Use fewer seeds for faster runtime in Colab
    RANDOM_SEEDS = [42, 123, 456, 789, 2024]  # Reduced from 5 to 2 for faster execution

    # Device
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Number of labels
    NUM_LABELS = 2

config = Config()
print("✓ Configuration loaded")
print(f"  Device: {config.DEVICE}")
print(f"  Batch Size: {config.BATCH_SIZE}")
print(f"  Random Seeds: {config.RANDOM_SEEDS}")

✓ Configuration loaded
  Device: cuda
  Batch Size: 16
  Random Seeds: [123]


## 3 - Data Preprocessing

In [46]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
import torch

class ArabicPreprocessor:
    """Arabic text preprocessing utilities"""

    @staticmethod
    def normalize_arabic(text):
        """Normalize Arabic text"""
        if not isinstance(text, str):
            return ""

        # Normalize different forms of Alef
        text = re.sub('[إأآا]', 'ا', text)
        text = re.sub('ى', 'ي', text)
        text = re.sub('ة', 'ه', text)

        # Remove diacritics
        arabic_diacritics = re.compile("""
                                 ّ    | # Tashdid
                                 َ    | # Fatha
                                 ً    | # Tanwin Fath
                                 ُ    | # Damma
                                 ٌ    | # Tanwin Damm
                                 ِ    | # Kasra
                                 ٍ    | # Tanwin Kasr
                                 ْ    | # Sukun
                                 ـ     # Tatwil/Kashida
                             """, re.VERBOSE)
        text = re.sub(arabic_diacritics, '', text)

        return text

    @staticmethod
    def clean_text(text):
        """Clean and normalize text"""
        if not isinstance(text, str):
            return ""

        text = re.sub(r'http\S+|www\S+|https\S+', '', text)
        text = re.sub(r'\S+@\S+', '', text)
        text = re.sub(r'\s+', ' ', text).strip()

        return text

    @staticmethod
    def preprocess(text):
        """Complete preprocessing pipeline"""
        text = ArabicPreprocessor.normalize_arabic(text)
        text = ArabicPreprocessor.clean_text(text)
        return text


class HARDDataset(Dataset):
    """PyTorch Dataset for HARD"""

    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }


class DataManager:
    """Manage dataset loading, preprocessing, and splitting"""

    def __init__(self, config):
        self.config = config
        self.preprocessor = ArabicPreprocessor()

    def load_hard_dataset(self, file_path):
        """Load HARD dataset from CSV/TSV"""
        try:
            df = pd.read_csv(file_path, sep=',', encoding='utf-8')
        except:
            try:
                df = pd.read_csv(file_path, encoding='utf-8')
            except Exception as e:
                print(f"Error loading dataset: {e}")
                print("Please ensure the file is in the correct format.")
                return None

        # Standardize column names
        if 'review' in df.columns:
          df = df.rename(columns={'review': 'text'})
        if 'rating' in df.columns:
          df = df.rename(columns={'rating': 'label'})

        print("Preprocessing texts...")
        df['text'] = df['text'].apply(self.preprocessor.preprocess)

        # Remove empty texts
        df = df[df['text'].str.len() > 0]

        # Ensure labels are 0/1
        label_map = {1: 0, 2: 0, 4: 1, 5: 1}
        df['label'] = df['label'].map(label_map)

        return df

    def split_data(self, df, seed=123):
        """Split data into train/val/test"""
        train_val, test = train_test_split(
            df,
            test_size=self.config.TEST_RATIO,
            random_state=seed,
            stratify=df['label']
        )

        val_ratio_adjusted = self.config.VAL_RATIO / (self.config.TRAIN_RATIO + self.config.VAL_RATIO)
        train, val = train_test_split(
            train_val,
            test_size=val_ratio_adjusted,
            random_state=seed,
            stratify=train_val['label']
        )

        print(f"Data split (seed={seed}):")
        print(f"  Train: {len(train)} samples")
        print(f"  Val:   {len(val)} samples")
        print(f"  Test:  {len(test)} samples")

        return train, val, test

    def create_dataloaders(self, train_df, val_df, test_df, tokenizer, batch_size):
        """Create PyTorch DataLoaders"""
        train_dataset = HARDDataset(
            train_df['text'].values,
            train_df['label'].values,
            tokenizer,
            self.config.MAX_LENGTH
        )

        val_dataset = HARDDataset(
            val_df['text'].values,
            val_df['label'].values,
            tokenizer,
            self.config.MAX_LENGTH
        )

        test_dataset = HARDDataset(
            test_df['text'].values,
            test_df['label'].values,
            tokenizer,
            self.config.MAX_LENGTH
        )

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        return train_loader, val_loader, test_loader

print("✓ Data preprocessing classes defined")

✓ Data preprocessing classes defined


## 4 - Model Training

In [47]:
import torch.nn as nn
import torch
from transformers.optimization import get_linear_schedule_with_warmup
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    get_linear_schedule_with_warmup
)
from tqdm.auto import tqdm
import time

class SentimentClassifier:
    """Wrapper for fine-tuning BERT-based models"""

    def __init__(self, model_name, config, device):
        self.config = config
        self.device = device
        self.model_name = model_name

        print(f"Loading {model_name}...")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=config.NUM_LABELS
        )
        self.model.to(device)

        self.best_val_acc = 0
        self.patience_counter = 0

    def train_epoch(self, train_loader, optimizer, scheduler):
        """Train for one epoch"""
        self.model.train()
        total_loss = 0
        correct = 0
        total = 0

        pbar = tqdm(train_loader, desc='Training')
        for batch in pbar:
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            labels = batch['labels'].to(self.device)

            optimizer.zero_grad()

            outputs = self.model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )

            loss = outputs.loss
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)

            optimizer.step()
            scheduler.step()

            total_loss += loss.item()
            preds = torch.argmax(outputs.logits, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            pbar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{correct/total:.4f}'
            })

        return total_loss / len(train_loader), correct / total

    def evaluate(self, val_loader):
        """Evaluate on validation/test set"""
        self.model.eval()
        total_loss = 0
        correct = 0
        total = 0
        all_preds = []
        all_labels = []
        all_probs = []

        with torch.no_grad():
            for batch in tqdm(val_loader, desc='Evaluating'):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    labels=labels
                )

                loss = outputs.loss
                total_loss += loss.item()

                logits = outputs.logits
                probs = torch.softmax(logits, dim=1)
                preds = torch.argmax(logits, dim=1)

                correct += (preds == labels).sum().item()
                total += labels.size(0)

                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
                all_probs.extend(probs.cpu().numpy())

        accuracy = correct / total
        avg_loss = total_loss / len(val_loader)

        return avg_loss, accuracy, all_preds, all_labels, all_probs

    def train(self, train_loader, val_loader, save_path):
        """Complete training loop with early stopping"""
        print(f"\n{'='*60}")
        print(f"Training {self.model_name}")
        print(f"{'='*60}")

        optimizer = torch.optim.AdamW(
            self.model.parameters(),
            lr=self.config.LEARNING_RATE,
            weight_decay=self.config.WEIGHT_DECAY
        )

        num_training_steps = len(train_loader) * self.config.NUM_EPOCHS
        num_warmup_steps = int(num_training_steps * self.config.WARMUP_RATIO)

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )

        start_time = time.time()

        for epoch in range(self.config.NUM_EPOCHS):
            print(f"\nEpoch {epoch + 1}/{self.config.NUM_EPOCHS}")

            train_loss, train_acc = self.train_epoch(train_loader, optimizer, scheduler)
            val_loss, val_acc, _, _, _ = self.evaluate(val_loader)

            print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
            print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

            if val_acc > self.best_val_acc:
                self.best_val_acc = val_acc
                self.patience_counter = 0
                torch.save(self.model.state_dict(), save_path)
                print(f"✓ Best model saved! Val Acc: {val_acc:.4f}")
            else:
                self.patience_counter += 1
                print(f"No improvement ({self.patience_counter}/{self.config.PATIENCE})")

                if self.patience_counter >= self.config.PATIENCE:
                    print("Early stopping triggered!")
                    break

        training_time = time.time() - start_time
        print(f"\nTraining completed in {training_time/3600:.2f} hours")
        print(f"Best validation accuracy: {self.best_val_acc:.4f}")

        self.model.load_state_dict(torch.load(save_path))

        return self.best_val_acc, training_time

    def predict(self, test_loader):
        """Get predictions on test set"""
        _, accuracy, preds, labels, probs = self.evaluate(test_loader)
        return accuracy, preds, labels, probs

print("✓ Model training classes defined")

✓ Model training classes defined


## 5 - Ensemble Methods

In [48]:
from sklearn.linear_model import LogisticRegression

class EnsembleMethods:
    """Implementation of various ensemble strategies"""

    @staticmethod
    def soft_voting(predictions_probs):
        """Soft voting: Average probabilities from all models"""
        avg_probs = np.mean(predictions_probs, axis=0)
        final_preds = np.argmax(avg_probs, axis=1)
        return final_preds, avg_probs

    @staticmethod
    def hard_voting(predictions):
        """Hard voting: Majority vote on predicted classes"""
        stacked = np.column_stack(predictions)
        final_preds = []
        for row in stacked:
            counts = np.bincount(row)
            final_preds.append(np.argmax(counts))
        return np.array(final_preds)

    @staticmethod
    def weighted_voting(predictions_probs, weights):
        """Weighted voting: Weight models by validation performance"""
        # Convert weights to numpy array and normalize
        weights = np.array(weights, dtype=np.float64)
        weights = weights / np.sum(weights)

        # Initialize weighted probabilities
        weighted_probs = np.zeros_like(predictions_probs[0], dtype=np.float64)

        # Apply weighted sum
        for i, probs in enumerate(predictions_probs):
            probs = np.array(probs, dtype=np.float64)  # Ensure probs is numpy array
            weighted_probs += weights[i] * probs

        final_preds = np.argmax(weighted_probs, axis=1)
        return final_preds, weighted_probs

    @staticmethod
    def stacking(train_probs, train_labels, test_probs):
        """Stacking: Train meta-classifier on base model predictions"""
        X_train = np.hstack([probs for probs in train_probs])
        X_test = np.hstack([probs for probs in test_probs])

        meta_clf = LogisticRegression(max_iter=1000, random_state=42)
        meta_clf.fit(X_train, train_labels)

        final_preds = meta_clf.predict(X_test)
        final_probs = meta_clf.predict_proba(X_test)

        return final_preds, final_probs, meta_clf

print("✓ Ensemble methods defined")

✓ Ensemble methods defined


## 6 - Evaluation Utilities

In [49]:
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    confusion_matrix
)
import json

class Evaluator:
    """Evaluation utilities"""

    @staticmethod
    def calculate_metrics(y_true, y_pred):
        """Calculate all evaluation metrics"""
        accuracy = accuracy_score(y_true, y_pred)
        precision, recall, f1, _ = precision_recall_fscore_support(
            y_true, y_pred, average='macro'
        )
        cm = confusion_matrix(y_true, y_pred)

        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'confusion_matrix': cm.tolist()
        }

    @staticmethod
    def print_results(name, metrics):
        """Pretty print results"""
        print(f"\n{'='*60}")
        print(f"{name} Results")
        print(f"{'='*60}")
        print(f"Accuracy:  {metrics['accuracy']:.4f}")
        print(f"Precision: {metrics['precision']:.4f}")
        print(f"Recall:    {metrics['recall']:.4f}")
        print(f"F1-Score:  {metrics['f1']:.4f}")
        print("\nConfusion Matrix:")
        print(np.array(metrics['confusion_matrix']))

    @staticmethod
    def save_results(results, filepath):
        """Save results to JSON"""
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        print(f"Results saved to {filepath}")

print("✓ Evaluation utilities defined")

✓ Evaluation utilities defined


## 7 - Main Execution

In [50]:
def main():
    """Main execution pipeline with integrated statistical, error, and runtime analysis"""
    
    # Initialize runtime tracking
    runtime_tracker.start_experiment()
    
    print(f"{'='*70}")
    print("ARABIC SENTIMENT ANALYSIS - STARTING EXPERIMENTS")
    print(f"{'='*70}\n")

    # Load dataset
    print("Step 1: Loading dataset...")
    data_manager = DataManager(config)

    dataset_path = os.path.join(config.DATA_DIR, config.DATASET_FILE)

    if not os.path.exists(dataset_path):
        print(f"❌ ERROR: Dataset not found at {dataset_path}")
        print("\nPlease upload 'balanced-reviews.csv' to:")
        print(f"   {config.DATA_DIR}/")
        print("\nDownload from: https://github.com/elnagara/HARD-Arabic-Dataset")
        return

    df = data_manager.load_hard_dataset(dataset_path)
    if df is None:
        return

    print(f"✓ Loaded {len(df)} reviews")

    # Store results
    all_results = {}
    
    # Store test data for error analysis
    stored_test_data = {}

    # Run experiments
    for seed_idx, seed in enumerate(config.RANDOM_SEEDS):
        print(f"\n{'#'*70}")
        print(f"EXPERIMENT {seed_idx + 1}/{len(config.RANDOM_SEEDS)} - Seed: {seed}")
        print(f"{'#'*70}\n")

        torch.manual_seed(seed)
        np.random.seed(seed)
        
        # Track runtime for this seed
        runtime_tracker.start_seed(seed)

        # Split data
        train_df, val_df, test_df = data_manager.split_data(df, seed=seed)

        # Store predictions
        model_predictions = {}
        model_probs = {}
        model_val_accuracies = {}

        # Train individual models
        for model_key, model_name in config.MODELS.items():
            print(f"\n{'='*70}")
            print(f"Training {model_key}: {model_name}")
            print(f"{'='*70}")

            # Initialize model
            classifier = SentimentClassifier(model_name, config, config.DEVICE)

            # Create dataloaders
            train_loader, val_loader, test_loader = data_manager.create_dataloaders(
                train_df, val_df, test_df,
                classifier.tokenizer,
                config.BATCH_SIZE
            )

            # Train
            model_path = os.path.join(config.MODEL_DIR, f'{model_key}_seed{seed}.pt')
            val_acc, train_time = classifier.train(train_loader, val_loader, model_path)
            
            # Log training time
            runtime_tracker.log_runtime(seed, model_key, train_time)

            # Evaluate
            test_acc, preds, labels, probs = classifier.predict(test_loader)
            metrics = Evaluator.calculate_metrics(labels, preds)
            metrics['training_time'] = train_time
            metrics['val_accuracy'] = val_acc

            # Store
            model_predictions[model_key] = preds
            model_probs[model_key] = probs
            model_val_accuracies[model_key] = val_acc

            Evaluator.print_results(f"{model_key} (Test Set)", metrics)

            # Save individual results for this seed
            result_path = os.path.join(config.RESULTS_DIR, f'{model_key}_results_seed{seed}.json')
            Evaluator.save_results(metrics, result_path)

            # Free memory
            del classifier
            torch.cuda.empty_cache()
            gc.collect()

        # Apply ensemble methods
        print(f"\n{'='*70}")
        print("ENSEMBLE METHODS")
        print(f"{'='*70}")

        ensemble_results = {}
        true_labels = labels

        all_preds = [model_predictions[key] for key in config.MODELS.keys()]
        all_probs = [model_probs[key] for key in config.MODELS.keys()]

        # Soft Voting
        soft_preds, soft_probs = EnsembleMethods.soft_voting(all_probs)
        soft_metrics = Evaluator.calculate_metrics(true_labels, soft_preds)
        Evaluator.print_results("Soft Voting Ensemble", soft_metrics)
        ensemble_results['soft_voting'] = soft_metrics

        # Hard Voting
        hard_preds = EnsembleMethods.hard_voting(all_preds)
        hard_metrics = Evaluator.calculate_metrics(true_labels, hard_preds)
        Evaluator.print_results("Hard Voting Ensemble", hard_metrics)
        ensemble_results['hard_voting'] = hard_metrics

        # Weighted Voting
        weights = [model_val_accuracies[key] for key in config.MODELS.keys()]
        weighted_preds, weighted_probs = EnsembleMethods.weighted_voting(all_probs, weights)
        weighted_metrics = Evaluator.calculate_metrics(true_labels, weighted_preds)
        Evaluator.print_results("Weighted Voting Ensemble", weighted_metrics)
        ensemble_results['weighted_voting'] = weighted_metrics

        # Stacking
        val_probs_all = []
        for model_key in config.MODELS.keys():
            classifier = SentimentClassifier(config.MODELS[model_key], config, config.DEVICE)
            model_path = os.path.join(config.MODEL_DIR, f'{model_key}_seed{seed}.pt')
            classifier.model.load_state_dict(torch.load(model_path))

            _, val_loader, _ = data_manager.create_dataloaders(
                train_df, val_df, test_df,
                classifier.tokenizer,
                config.BATCH_SIZE
            )

            _, _, val_labels, val_probs = classifier.predict(val_loader)
            val_probs_all.append(val_probs)

            del classifier
            torch.cuda.empty_cache()
            gc.collect()

        val_labels = np.array(val_labels)
        stacking_preds, stacking_probs, meta_clf = EnsembleMethods.stacking(
            val_probs_all, val_labels, all_probs
        )
        stacking_metrics = Evaluator.calculate_metrics(true_labels, stacking_preds)
        Evaluator.print_results("Stacking Ensemble", stacking_metrics)
        ensemble_results['stacking'] = stacking_metrics

        # Store seed results
        all_results[f'seed_{seed}'] = {
            'individual_models': {k: Evaluator.calculate_metrics(true_labels, v)
                                 for k, v in model_predictions.items()},
            'ensembles': ensemble_results
        }
        
        # Save complete results for this seed
        seed_result_path = os.path.join(config.RESULTS_DIR, f'results_seed_{seed}.json')
        complete_seed_results = {
            **{k: Evaluator.calculate_metrics(true_labels, v) 
               for k, v in model_predictions.items()},
            **ensemble_results
        }
        Evaluator.save_results(complete_seed_results, seed_result_path)
        print(f"\n✓ Saved complete results for seed {seed} to {seed_result_path}")
        
        # Store test data for error analysis (only first seed)
        if seed_idx == 0:
            stored_test_data = {
                'test_df': test_df,
                'true_labels': true_labels,
                'stacking_preds': stacking_preds,
                'stacking_probs': stacking_probs
            }

    # Aggregate results
    print(f"\n{'='*70}")
    print("FINAL RESULTS (Averaged across all seeds)")
    print(f"{'='*70}\n")

    def aggregate_results(results_dict):
        metrics = ['accuracy', 'precision', 'recall', 'f1']
        aggregated = {}

        for model_name in results_dict[f'seed_{config.RANDOM_SEEDS[0]}'].keys():
            aggregated[model_name] = {}
            for metric in metrics:
                values = []
                for seed in config.RANDOM_SEEDS:
                    if model_name in results_dict[f'seed_{seed}']:
                        values.append(results_dict[f'seed_{seed}'][model_name][metric])
                aggregated[model_name][metric] = {
                    'mean': np.mean(values),
                    'std': np.std(values)
                }
        return aggregated

    individual_aggregated = aggregate_results({
        f'seed_{seed}': all_results[f'seed_{seed}']['individual_models']
        for seed in config.RANDOM_SEEDS
    })

    print("Individual Models:")
    for model_name, metrics in individual_aggregated.items():
        print(f"\n{model_name}:")
        print(f"  Accuracy:  {metrics['accuracy']['mean']:.4f} ± {metrics['accuracy']['std']:.4f}")
        print(f"  F1-Score:  {metrics['f1']['mean']:.4f} ± {metrics['f1']['std']:.4f}")

    ensemble_aggregated = aggregate_results({
        f'seed_{seed}': all_results[f'seed_{seed}']['ensembles']
        for seed in config.RANDOM_SEEDS
    })

    print("\nEnsemble Methods:")
    for ensemble_name, metrics in ensemble_aggregated.items():
        print(f"\n{ensemble_name}:")
        print(f"  Accuracy:  {metrics['accuracy']['mean']:.4f} ± {metrics['accuracy']['std']:.4f}")
        print(f"  F1-Score:  {metrics['f1']['mean']:.4f} ± {metrics['f1']['std']:.4f}")

    # Generate runtime analysis
    print(f"\n{'='*70}")
    print("RUNTIME ANALYSIS")
    print(f"{'='*70}")
    runtime_tracker.generate_runtime_table()
    
    runtime_file = os.path.join(config.RESULTS_DIR, 'runtime_analysis.txt')
    runtime_tracker.export_runtime_analysis(runtime_file)
    
    # Statistical Analysis
    print(f"\n{'='*70}")
    print("STATISTICAL ANALYSIS")
    print(f"{'='*70}")
    
    analyzer = StatisticalAnalyzer(config.RESULTS_DIR, config.RANDOM_SEEDS)
    
    if len(analyzer.all_results) > 0:
        summary = analyzer.generate_summary_table()
        
        print("\nIndividual Models (Mean ± Std):")
        for model in config.MODELS.keys():
            if model in summary and 'f1' in summary[model]:
                print(f"  {model:15s}: F1 = {summary[model]['f1']['formatted']}%")
        
        print("\nEnsemble Methods (Mean ± Std):")
        for model in ['soft_voting', 'hard_voting', 'weighted_voting', 'stacking']:
            if model in summary and 'f1' in summary[model]:
                print(f"  {model:15s}: F1 = {summary[model]['f1']['formatted']}%")
        
        sig_results, best_ens, best_ind = analyzer.perform_significance_tests()
        
        stat_file = os.path.join(config.RESULTS_DIR, 'statistical_analysis.txt')
        analyzer.export_for_paper(stat_file)
        
        plot_path = os.path.join(config.FIGURES_DIR, 'performance_boxplots.png')
        analyzer.plot_performance(plot_path)
        
        print(f"\n✓ Statistical analysis saved to: {stat_file}")
    
    # Error Analysis
    if stored_test_data:
        print(f"\n{'='*70}")
        print("ERROR ANALYSIS")
        print(f"{'='*70}")
        
        error_analyzer = ErrorAnalyzer(
            stored_test_data['test_df'],
            stored_test_data['true_labels'],
            stored_test_data['stacking_preds'],
            stored_test_data['stacking_probs'],
            "Stacking Ensemble"
        )
        
        error_summary = error_analyzer.get_error_summary()
        print(f"\nFalse Positives: {error_summary['false_positives']} ({error_summary['fp_rate']*100:.2f}%)")
        print(f"False Negatives: {error_summary['false_negatives']} ({error_summary['fn_rate']*100:.2f}%)")
        
        suspicious = error_analyzer.analyze_label_quality(sample_size=20)
        
        error_file = os.path.join(config.RESULTS_DIR, 'error_analysis_report.txt')
        error_analyzer.generate_error_report(error_file)
        print(f"\n✓ Error analysis saved to: {error_file}")
    
    print(f"\n{'='*70}")
    print("✓ ALL EXPERIMENTS COMPLETE!")
    print(f"{'='*70}")
    print(f"\nResults directory: {config.RESULTS_DIR}")
    print("\nGenerated files:")
    print("  - results_seed_*.json")
    print("  - statistical_analysis.txt")
    print("  - runtime_analysis.txt")
    print("  - error_analysis_report.txt")
    print("  - performance_boxplots.png")

print("✓ Enhanced main() function defined")

In [None]:
# ========================================
# RUN EVERYTHING
# ========================================
# This cell executes the complete pipeline:
#   - Training (all models, all seeds)
#   - Statistical analysis
#   - Error analysis
#   - Runtime tracking
#   - Generate all tables and figures

main()

## Run Experiment

In [None]:
if __name__ == '__main__':
    main()

## 10 - Main Functions

**Two main functions:**
1. `run_single_seed(seed)` - Run one seed at a time
2. `run_all_analyses()` - Analyze all completed seeds

In [None]:
def run_single_seed(seed):
    """
    Run complete training pipeline for a single seed
    
    This function:
    1. Trains all 4 models (AraBERT, MARBERT, XLM-RoBERTa, CAMeLBERT)
    2. Tests all 4 ensemble methods (Soft, Hard, Weighted, Stacking)
    3. Saves all results to Google Drive
    4. Tracks runtime automatically
    5. Returns results dictionary
    
    Args:
        seed (int): Random seed for reproducibility (e.g., 42, 123, 456, 789, 2024)
    
    Returns:
        dict: Complete results for this seed
        
    Example:
        results_42 = run_single_seed(42)
    """
    
    print(f"\n{'#'*70}")
    print(f"RUNNING SEED: {seed}")
    print(f"{'#'*70}\n")
    
    # Set random seeds
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    # Track runtime for this seed
    runtime_tracker.start_seed(seed)
    
    # Load dataset
    print("Loading dataset...")
    data_manager = DataManager(config)
    dataset_path = os.path.join(config.DATA_DIR, config.DATASET_FILE)
    
    if not os.path.exists(dataset_path):
        print(f"❌ ERROR: Dataset not found at {dataset_path}")
        print("\nPlease upload 'balanced-reviews.csv' to:")
        print(f"   {config.DATA_DIR}/")
        return None
    
    df = data_manager.load_hard_dataset(dataset_path)
    if df is None:
        return None
    
    print(f"✓ Loaded {len(df)} reviews")
    
    # Split data
    train_df, val_df, test_df = data_manager.split_data(df, seed=seed)
    
    # Store predictions
    model_predictions = {}
    model_probs = {}
    model_val_accuracies = {}
    
    # Train individual models
    for model_key, model_name in config.MODELS.items():
        print(f"\n{'='*70}")
        print(f"Training {model_key}: {model_name}")
        print(f"{'='*70}")
        
        # Initialize model
        classifier = SentimentClassifier(model_name, config, config.DEVICE)
        
        # Create dataloaders
        train_loader, val_loader, test_loader = data_manager.create_dataloaders(
            train_df, val_df, test_df,
            classifier.tokenizer,
            config.BATCH_SIZE
        )
        
        # Train
        model_path = os.path.join(config.MODEL_DIR, f'{model_key}_seed{seed}.pt')
        val_acc, train_time = classifier.train(train_loader, val_loader, model_path)
        
        # Log training time
        runtime_tracker.log_runtime(seed, model_key, train_time)
        
        # Evaluate
        test_acc, preds, labels, probs = classifier.predict(test_loader)
        metrics = Evaluator.calculate_metrics(labels, preds)
        metrics['training_time'] = train_time
        metrics['val_accuracy'] = val_acc
        
        # Store
        model_predictions[model_key] = preds
        model_probs[model_key] = probs
        model_val_accuracies[model_key] = val_acc
        
        Evaluator.print_results(f"{model_key} (Test Set)", metrics)
        
        # Save individual results for this seed
        result_path = os.path.join(config.RESULTS_DIR, f'{model_key}_results_seed{seed}.json')
        Evaluator.save_results(metrics, result_path)
        
        # Free memory
        del classifier
        torch.cuda.empty_cache()
        gc.collect()
    
    # Apply ensemble methods
    print(f"\n{'='*70}")
    print("ENSEMBLE METHODS")
    print(f"{'='*70}")
    
    ensemble_results = {}
    true_labels = labels
    
    all_preds = [model_predictions[key] for key in config.MODELS.keys()]
    all_probs = [model_probs[key] for key in config.MODELS.keys()]
    
    # Soft Voting
    soft_preds, soft_probs = EnsembleMethods.soft_voting(all_probs)
    soft_metrics = Evaluator.calculate_metrics(true_labels, soft_preds)
    Evaluator.print_results("Soft Voting Ensemble", soft_metrics)
    ensemble_results['soft_voting'] = soft_metrics
    
    # Hard Voting
    hard_preds = EnsembleMethods.hard_voting(all_preds)
    hard_metrics = Evaluator.calculate_metrics(true_labels, hard_preds)
    Evaluator.print_results("Hard Voting Ensemble", hard_metrics)
    ensemble_results['hard_voting'] = hard_metrics
    
    # Weighted Voting
    weights = [model_val_accuracies[key] for key in config.MODELS.keys()]
    weighted_preds, weighted_probs = EnsembleMethods.weighted_voting(all_probs, weights)
    weighted_metrics = Evaluator.calculate_metrics(true_labels, weighted_preds)
    Evaluator.print_results("Weighted Voting Ensemble", weighted_metrics)
    ensemble_results['weighted_voting'] = weighted_metrics
    
    # Stacking
    val_probs_all = []
    for model_key in config.MODELS.keys():
        classifier = SentimentClassifier(config.MODELS[model_key], config, config.DEVICE)
        model_path = os.path.join(config.MODEL_DIR, f'{model_key}_seed{seed}.pt')
        classifier.model.load_state_dict(torch.load(model_path))
        
        _, val_loader, _ = data_manager.create_dataloaders(
            train_df, val_df, test_df,
            classifier.tokenizer,
            config.BATCH_SIZE
        )
        
        _, _, val_labels, val_probs = classifier.predict(val_loader)
        val_probs_all.append(val_probs)
        
        del classifier
        torch.cuda.empty_cache()
        gc.collect()
    
    val_labels = np.array(val_labels)
    stacking_preds, stacking_probs, meta_clf = EnsembleMethods.stacking(
        val_probs_all, val_labels, all_probs
    )
    stacking_metrics = Evaluator.calculate_metrics(true_labels, stacking_preds)
    Evaluator.print_results("Stacking Ensemble", stacking_metrics)
    ensemble_results['stacking'] = stacking_metrics
    
    # Prepare complete results
    seed_results = {
        'seed': seed,
        'individual_models': {k: Evaluator.calculate_metrics(true_labels, v)
                             for k, v in model_predictions.items()},
        'ensembles': ensemble_results,
        'test_data': {
            'test_df': test_df,
            'true_labels': true_labels,
            'stacking_preds': stacking_preds,
            'stacking_probs': stacking_probs
        }
    }
    
    # Save complete results for this seed
    seed_result_path = os.path.join(config.RESULTS_DIR, f'results_seed_{seed}.json')
    complete_seed_results = {
        **{k: Evaluator.calculate_metrics(true_labels, v) 
           for k, v in model_predictions.items()},
        **ensemble_results
    }
    Evaluator.save_results(complete_seed_results, seed_result_path)
    
    print(f"\n{'='*70}")
    print(f"✓ SEED {seed} COMPLETE!")
    print(f"{'='*70}")
    print(f"✓ Results saved to: {seed_result_path}")
    
    # Show runtime for this seed
    seed_time = runtime_tracker.get_seed_total(seed)
    print(f"✓ Seed runtime: {runtime_tracker.format_hours(seed_time):.2f} hours")
    
    return seed_results

def run_all_analyses():
    """
    Run comprehensive analysis across all completed seeds
    
    This function:
    1. Loads results from all completed seeds
    2. Performs statistical analysis (mean ± std, t-tests)
    3. Performs error analysis (FP/FN, label noise)
    4. Generates runtime analysis
    5. Creates all tables and figures
    6. Exports everything for paper
    
    Call this AFTER running all seeds to generate final results.
    """
    
    print(f"\n{'='*70}")
    print("RUNNING COMPREHENSIVE ANALYSIS")
    print(f"{'='*70}\n")
    
    # Find which seeds have been completed
    completed_seeds = []
    for seed in config.RANDOM_SEEDS:
        result_file = os.path.join(config.RESULTS_DIR, f'results_seed_{seed}.json')
        if os.path.exists(result_file):
            completed_seeds.append(seed)
    
    if len(completed_seeds) == 0:
        print("❌ No completed seeds found!")
        print("   Please run at least one seed first.")
        return
    
    print(f"✓ Found {len(completed_seeds)} completed seeds: {completed_seeds}\n")
    
    # Runtime Analysis
    print(f"{'='*70}")
    print("RUNTIME ANALYSIS")
    print(f"{'='*70}")
    
    runtime_tracker.generate_runtime_table()
    runtime_file = os.path.join(config.RESULTS_DIR, 'runtime_analysis.txt')
    runtime_tracker.export_runtime_analysis(runtime_file)
    print(f"\n✓ Runtime analysis saved to: {runtime_file}")
    
    # Statistical Analysis
    print(f"\n{'='*70}")
    print("STATISTICAL ANALYSIS")
    print(f"{'='*70}\n")
    
    analyzer = StatisticalAnalyzer(config.RESULTS_DIR, completed_seeds)
    
    if len(analyzer.all_results) > 0:
        summary = analyzer.generate_summary_table()
        
        print("Individual Models (Mean ± Std):")
        for model in config.MODELS.keys():
            if model in summary and 'f1' in summary[model]:
                print(f"  {model:15s}: F1 = {summary[model]['f1']['formatted']}%")
        
        print("\nEnsemble Methods (Mean ± Std):")
        for model in ['soft_voting', 'hard_voting', 'weighted_voting', 'stacking']:
            if model in summary and 'f1' in summary[model]:
                print(f"  {model:15s}: F1 = {summary[model]['f1']['formatted']}%")
        
        sig_results, best_ens, best_ind = analyzer.perform_significance_tests()
        print(f"\n✓ Best Ensemble: {best_ens}")
        print(f"✓ Best Individual: {best_ind}")
        
        stat_file = os.path.join(config.RESULTS_DIR, 'statistical_analysis.txt')
        analyzer.export_for_paper(stat_file)
        
        plot_path = os.path.join(config.FIGURES_DIR, 'performance_boxplots.png')
        analyzer.plot_performance(plot_path)
        
        print(f"\n✓ Statistical analysis saved to: {stat_file}")
        print(f"✓ Box plots saved to: {plot_path}")
    
    # Error Analysis (using first completed seed)
    first_seed = completed_seeds[0]
    
    # Try to load test data if available
    print(f"\n{'='*70}")
    print("ERROR ANALYSIS")
    print(f"{'='*70}\n")
    print(f"Note: Using data from seed {first_seed} for error analysis")
    
    # Load the seed results
    seed_file = os.path.join(config.RESULTS_DIR, f'results_seed_{first_seed}.json')
    
    # We need to re-run the seed to get test_df and predictions
    # For now, we'll create a placeholder
    print("\nTo run complete error analysis:")
    print("1. The test_df and predictions are saved during seed execution")
    print("2. Error analysis will be performed on the first seed's data")
    print("\nError analysis requires re-running or storing test data.")
    print("See error_analysis_manual.txt for instructions on manual analysis.")
    
    print(f"\n{'='*70}")
    print("✓ ALL ANALYSES COMPLETE!")
    print(f"{'='*70}")
    print(f"\nResults saved to: {config.RESULTS_DIR}")
    print("\nGenerated files:")
    print("  - results_seed_*.json (for each completed seed)")
    print("  - statistical_analysis.txt (mean±std, t-tests, LaTeX)")
    print("  - runtime_analysis.txt (training times, LaTeX)")
    print("  - performance_boxplots.png (visualizations)")

# Initialize runtime tracker
runtime_tracker.start_experiment()

print("✓ run_single_seed() function defined")
print("✓ run_all_analyses() function defined")
print("✓ Runtime tracker initialized")
print("\nYou can now run seeds individually:")
print("  results_42 = run_single_seed(42)")
print("  results_123 = run_single_seed(123)")
print("  ... etc")
print("\nAfter running all seeds, call:")
print("  run_all_analyses()")

def run_all_analyses():
    """
    Run comprehensive analysis across all completed seeds
    
    This function:
    1. Loads results from all completed seeds
    2. Performs statistical analysis (mean ± std, t-tests)
    3. Performs error analysis (FP/FN, label noise)
    4. Generates runtime analysis
    5. Creates all tables and figures
    6. Exports everything for paper
    
    Call this AFTER running all seeds to generate final results.
    """
    
    print(f"\n{'='*70}")
    print("RUNNING COMPREHENSIVE ANALYSIS")
    print(f"{'='*70}\n")
    
    # Find which seeds have been completed
    completed_seeds = []
    for seed in config.RANDOM_SEEDS:
        result_file = os.path.join(config.RESULTS_DIR, f'results_seed_{seed}.json')
        if os.path.exists(result_file):
            completed_seeds.append(seed)
    
    if len(completed_seeds) == 0:
        print("❌ No completed seeds found!")
        print("   Please run at least one seed first.")
        return
    
    print(f"✓ Found {len(completed_seeds)} completed seeds: {completed_seeds}\n")
    
    # Runtime Analysis
    print(f"{'='*70}")
    print("RUNTIME ANALYSIS")
    print(f"{'='*70}")
    
    runtime_tracker.generate_runtime_table()
    runtime_file = os.path.join(config.RESULTS_DIR, 'runtime_analysis.txt')
    runtime_tracker.export_runtime_analysis(runtime_file)
    print(f"\n✓ Runtime analysis saved to: {runtime_file}")
    
    # Statistical Analysis
    print(f"\n{'='*70}")
    print("STATISTICAL ANALYSIS")
    print(f"{'='*70}\n")
    
    analyzer = StatisticalAnalyzer(config.RESULTS_DIR, completed_seeds)
    
    if len(analyzer.all_results) > 0:
        summary = analyzer.generate_summary_table()
        
        print("Individual Models (Mean ± Std):")
        for model in config.MODELS.keys():
            if model in summary and 'f1' in summary[model]:
                print(f"  {model:15s}: F1 = {summary[model]['f1']['formatted']}%")
        
        print("\nEnsemble Methods (Mean ± Std):")
        for model in ['soft_voting', 'hard_voting', 'weighted_voting', 'stacking']:
            if model in summary and 'f1' in summary[model]:
                print(f"  {model:15s}: F1 = {summary[model]['f1']['formatted']}%")
        
        sig_results, best_ens, best_ind = analyzer.perform_significance_tests()
        print(f"\n✓ Best Ensemble: {best_ens}")
        print(f"✓ Best Individual: {best_ind}")
        
        stat_file = os.path.join(config.RESULTS_DIR, 'statistical_analysis.txt')
        analyzer.export_for_paper(stat_file)
        
        plot_path = os.path.join(config.FIGURES_DIR, 'performance_boxplots.png')
        analyzer.plot_performance(plot_path)
        
        print(f"\n✓ Statistical analysis saved to: {stat_file}")
        print(f"✓ Box plots saved to: {plot_path}")
    
    # Error Analysis (using first completed seed)
    first_seed = completed_seeds[0]
    
    # Try to load test data if available
    print(f"\n{'='*70}")
    print("ERROR ANALYSIS")
    print(f"{'='*70}\n")
    print(f"Note: Using data from seed {first_seed} for error analysis")
    
    # Load the seed results
    seed_file = os.path.join(config.RESULTS_DIR, f'results_seed_{first_seed}.json')
    
    # We need to re-run the seed to get test_df and predictions
    # For now, we'll create a placeholder
    print("\nTo run complete error analysis:")
    print("1. The test_df and predictions are saved during seed execution")
    print("2. Error analysis will be performed on the first seed's data")
    print("\nError analysis requires re-running or storing test data.")
    print("See error_analysis_manual.txt for instructions on manual analysis.")
    
    print(f"\n{'='*70}")
    print("✓ ALL ANALYSES COMPLETE!")
    print(f"{'='*70}")
    print(f"\nResults saved to: {config.RESULTS_DIR}")
    print("\nGenerated files:")
    print("  - results_seed_*.json (for each completed seed)")
    print("  - statistical_analysis.txt (mean±std, t-tests, LaTeX)")
    print("  - runtime_analysis.txt (training times, LaTeX)")
    print("  - performance_boxplots.png (visualizations)")

# Initialize runtime tracker
runtime_tracker.start_experiment()

print("✓ run_single_seed() function defined")
print("✓ run_all_analyses() function defined")
print("✓ Runtime tracker initialized")
print("\nYou can now run seeds individually:")
print("  results_42 = run_single_seed(42)")
print("  results_123 = run_single_seed(123)")
print("  ... etc")
print("\nAfter running all seeds, call:")
print("  run_all_analyses()")

## 11 - Run Seeds Individually

**Instructions:**
- Run cells 1-10 first (setup)
- Then run seeds one at a time below
- Each seed takes 3-4 hours on A100
- Results saved automatically after each seed
- Safe to disconnect between seeds

In [None]:
# =====================================
# RUN SEED 42
# =====================================
# Estimated time: 3-4 hours on A100, 7-8 hours on T4
# Results will be saved to Google Drive automatically

results_seed_42 = run_single_seed(42)

In [None]:
# =====================================
# RUN SEED 123
# =====================================
# Estimated time: 3-4 hours on A100, 7-8 hours on T4
# Results will be saved to Google Drive automatically

results_seed_123 = run_single_seed(123)

In [None]:
# =====================================
# RUN SEED 456
# =====================================
# Estimated time: 3-4 hours on A100, 7-8 hours on T4
# Results will be saved to Google Drive automatically

results_seed_456 = run_single_seed(456)

In [None]:
# =====================================
# RUN SEED 789
# =====================================
# Estimated time: 3-4 hours on A100, 7-8 hours on T4
# Results will be saved to Google Drive automatically

results_seed_789 = run_single_seed(789)

In [None]:
# =====================================
# RUN SEED 2024
# =====================================
# Estimated time: 3-4 hours on A100, 7-8 hours on T4
# Results will be saved to Google Drive automatically

results_seed_2024 = run_single_seed(2024)

## 12 - Run Final Analysis

**Run this AFTER completing all (or some) seeds**

This generates:
- statistical_analysis.txt (mean±std, t-tests, LaTeX)
- runtime_analysis.txt (timing tables, LaTeX)
- performance_boxplots.png (visualizations)

Works with any number of completed seeds (minimum 2 recommended).

In [None]:
# =====================================
# RUN COMPREHENSIVE ANALYSIS
# =====================================
# Run this AFTER completing seeds
# Works with partial results (e.g., 3 out of 5 seeds)

run_all_analyses()

## 13 - Check Progress (Optional)

Run this cell anytime to see which seeds have been completed.

In [None]:
# =====================================
# CHECK PROGRESS
# =====================================
# Run this anytime to see which seeds are done

import os

print("Checking completed seeds...\n")

for seed in [42, 123, 456, 789, 2024]:
    result_file = os.path.join(config.RESULTS_DIR, f'results_seed_{seed}.json')
    if os.path.exists(result_file):
        print(f"✓ Seed {seed} - COMPLETED")
        size_kb = os.path.getsize(result_file) / 1024
        from datetime import datetime
        mod_time = datetime.fromtimestamp(os.path.getmtime(result_file))
        print(f"  Size: {size_kb:.1f} KB")
        print(f"  Completed: {mod_time.strftime('%Y-%m-%d %H:%M:%S')}")
    else:
        print(f"✗ Seed {seed} - NOT STARTED")
    print()

completed = sum(1 for seed in [42, 123, 456, 789, 2024] 
                if os.path.exists(os.path.join(config.RESULTS_DIR, f'results_seed_{seed}.json')))

print(f"\nProgress: {completed}/5 seeds completed ({completed*20}%)")

if completed == 5:
    print("\n🎉 All seeds complete! Run the analysis cell above.")
elif completed >= 3:
    print(f"\n✓ {completed} seeds complete. You can run analysis now or wait for more.")
elif completed > 0:
    print(f"\n⏳ {completed} seed(s) complete. Continue with remaining seeds.")
else:
    print("\n⚠️  No seeds completed yet. Start with Seed 42 above.")