# op

In [2]:
import torch
import torch.nn as nn
import torch_pruning as tp
import matplotlib.pyplot as plt
from torch import optim
import os
import numpy as np
import copy
import json
import pandas as pd
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau

import torchvision
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import DataLoader

# Configuration
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_BASE_NAME = "mobilenet_v2"
print(f"Using device: {DEVICE}")

def get_data_loaders(data_dir_path='./data', batch_size=128, val_split=0.1, seed=42):
    """Load CIFAR-10 dataset with train/val/test splits and data augmentation"""
    abs_data_dir = os.path.abspath(data_dir_path)
    print(f"Loading CIFAR-10 from: {abs_data_dir}")

    # Data augmentation for training
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ])

    # No augmentation for val/test
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ])

    # Load datasets (assuming pre-downloaded)
    full_train_dataset = torchvision.datasets.CIFAR10(
        root=abs_data_dir, train=True, download=False, transform=transform_train
    )

    # Create a version with test transforms for validation
    full_train_dataset_val = torchvision.datasets.CIFAR10(
        root=abs_data_dir, train=True, download=False, transform=transform_test
    )

    test_dataset = torchvision.datasets.CIFAR10(
        root=abs_data_dir, train=False, download=False, transform=transform_test
    )

    # Create train/validation split
    val_size = int(len(full_train_dataset) * val_split)
    train_size = len(full_train_dataset) - val_size
    generator = torch.Generator().manual_seed(seed)

    train_indices, val_indices = torch.utils.data.random_split(
        range(len(full_train_dataset)), [train_size, val_size], generator=generator
    )

    # Create subset datasets
    train_dataset = torch.utils.data.Subset(full_train_dataset, train_indices.indices)
    val_dataset = torch.utils.data.Subset(full_train_dataset_val, val_indices.indices)

    # Create data loaders
    num_workers = min(4, os.cpu_count() or 2)
    pin_memory = True if DEVICE.type == 'cuda' else False

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                             num_workers=num_workers, pin_memory=pin_memory)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                           num_workers=num_workers, pin_memory=pin_memory)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                            num_workers=num_workers, pin_memory=pin_memory)

    print(f"DataLoaders created - Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}")
    return train_loader, val_loader, test_loader

def get_mobilenetv2_model(num_classes=10, use_pretrained=True, width_mult=1.0):
    """Get MobileNetV2 model adapted for CIFAR-10 with proper initialization"""
    if use_pretrained and os.path.exists('./mobilenet_v2-b0353104.pth'):
        # Load pre-trained model
        model = models.mobilenet_v2(weights=None)
        pretrained_state = torch.load('./mobilenet_v2-b0353104.pth', map_location=DEVICE)

        # Load all weights except the classifier
        model_dict = model.state_dict()
        pretrained_dict = {k: v for k, v in pretrained_state.items()
                          if k in model_dict and 'classifier' not in k}
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)
        print("✅ Loaded MobileNetV2 with ImageNet pretrained features (excluding classifier)")
    else:
        model = models.mobilenet_v2(weights=None)
        print("✅ Created MobileNetV2 without pretrained weights")

    # Replace classifier with a more suitable one for CIFAR-10
    in_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(0.2),
        nn.Linear(in_features, num_classes)
    )

    # Initialize the new classifier properly
    nn.init.xavier_uniform_(model.classifier[1].weight)
    nn.init.zeros_(model.classifier[1].bias)

    print(f"✅ Adapted classifier for {num_classes} classes")
    return model

def mixup_data(x, y, alpha=0.2):
    """Apply mixup augmentation"""
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    """Mixup loss"""
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

def get_ignored_layers(model):
    """Get layers to ignore during pruning (typically final classifier)"""
    ignored_layers = []
    if hasattr(model, 'classifier'):
        if isinstance(model.classifier, nn.Sequential):
            for layer in model.classifier:
                if isinstance(layer, nn.Linear):
                    ignored_layers.append(layer)
        elif isinstance(model.classifier, nn.Linear):
            ignored_layers.append(model.classifier)
    return ignored_layers

def calculate_macs_params(model, example_input):
    """Calculate MACs and parameters using torch_pruning"""
    model.eval()
    target_device = example_input.device
    model_on_device = model.to(target_device)

    with torch.no_grad():
        macs, params = tp.utils.count_ops_and_params(model_on_device, example_input)

    return macs, params

def save_model(model, save_path, example_input_cpu=None):
    """Save model state dict and optionally ONNX"""
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    torch.save(model.state_dict(), save_path)
    print(f"✅ Model saved to {save_path}")

    if example_input_cpu is not None:
        onnx_path = save_path.replace('.pth', '.onnx')
        try:
            model_cpu = model.to('cpu')
            torch.onnx.export(
                model_cpu, example_input_cpu, onnx_path,
                export_params=True, opset_version=13,
                input_names=['input'], output_names=['output'],
                dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
            )
            print(f"✅ ONNX model saved to {onnx_path}")
        except Exception as e:
            print(f"Warning: ONNX export failed: {e}")

def evaluate_model(model, data_loader, example_input, criterion, device):
    """Evaluate model and return comprehensive metrics"""
    model.eval()
    model.to(device)

    # Calculate efficiency metrics
    macs, params = calculate_macs_params(model, example_input.to(device))
    model_size_mb = params * 4 / (1024 * 1024)  # Assuming float32

    # Calculate accuracy and loss
    correct = 0
    total = 0
    total_loss = 0.0

    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)

            total_loss += loss.item() * data.size(0)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    avg_loss = total_loss / total if total > 0 else float('nan')
    accuracy = 100.0 * correct / total if total > 0 else 0.0

    return {
        'accuracy': accuracy,
        'loss': avg_loss,
        'macs': macs,
        'params': params,
        'size_mb': model_size_mb
    }

def prune_model(model, strategy_config, sparsity_ratio, example_input, ignored_layers=None):
    """Apply structured pruning to model"""
    if sparsity_ratio == 0.0:
        print("No pruning needed (sparsity = 0.0)")
        return model

    model.eval()
    pruned_model = copy.deepcopy(model)
    pruned_model.to(example_input.device)

    # Calculate initial MACs
    initial_macs, _ = calculate_macs_params(pruned_model, example_input)
    print(f"Initial MACs: {initial_macs / 1e6:.2f}M")

    ignored_layers = ignored_layers or []

    # Create pruner based on strategy
    pruner = strategy_config['pruner'](
        pruned_model,
        example_input,
        importance=strategy_config['importance'],
        iterative_steps=5,
        ch_sparsity=sparsity_ratio,
        root_module_types=[nn.Conv2d],
        ignored_layers=ignored_layers
    )

    print(f"Applying {strategy_config['importance'].__class__.__name__} pruning at {sparsity_ratio:.1%} sparsity...")

    # Apply pruning
    pruner.step()

    # Calculate final MACs
    final_macs, _ = calculate_macs_params(pruned_model, example_input)
    reduction = (initial_macs - final_macs) / initial_macs * 100 if initial_macs > 0 else 0
    print(f"Final MACs: {final_macs / 1e6:.2f}M (Reduction: {reduction:.1f}%)")

    return pruned_model

def train_model(model, train_loader, criterion, optimizer, device, num_epochs,
                val_loader=None, patience=7, log_prefix="", scheduler=None,
                use_mixup=False, mixup_alpha=0.2):
    """Train model with early stopping and advanced techniques"""
    model.to(device)

    best_val_acc = 0.0
    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model_state = None

    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            # Apply mixup if enabled
            if use_mixup and epoch < num_epochs - 5:  # Disable mixup for last 5 epochs
                data, target_a, target_b, lam = mixup_data(data, target, mixup_alpha)

                optimizer.zero_grad()
                output = model(data)
                loss = mixup_criterion(criterion, output, target_a, target_b, lam)
            else:
                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            train_total += target.size(0)
            if use_mixup and epoch < num_epochs - 5:
                train_correct += (lam * predicted.eq(target_a).sum().item() +
                                (1 - lam) * predicted.eq(target_b).sum().item())
            else:
                train_correct += (predicted == target).sum().item()

        avg_train_loss = train_loss / len(train_loader)
        train_acc = 100.0 * train_correct / train_total

        history['train_loss'].append(avg_train_loss)
        history['train_acc'].append(train_acc)

        log_msg = f"Epoch {epoch+1}/{num_epochs} ({log_prefix}): Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.2f}%"

        # Validation phase
        if val_loader:
            model.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0

            with torch.no_grad():
                for data, target in val_loader:
                    data, target = data.to(device), target.to(device)
                    output = model(data)
                    loss = criterion(output, target)

                    val_loss += loss.item()
                    _, predicted = torch.max(output.data, 1)
                    val_total += target.size(0)
                    val_correct += (predicted == target).sum().item()

            avg_val_loss = val_loss / len(val_loader)
            val_acc = 100.0 * val_correct / val_total

            history['val_loss'].append(avg_val_loss)
            history['val_acc'].append(val_acc)

            log_msg += f", Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%"

            # Learning rate scheduling
            if scheduler:
                if isinstance(scheduler, ReduceLROnPlateau):
                    scheduler.step(avg_val_loss)
                else:
                    scheduler.step()

            # Early stopping based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                best_val_loss = avg_val_loss
                epochs_no_improve = 0
                best_model_state = copy.deepcopy(model.state_dict())
                log_msg += " (Best)"
            else:
                epochs_no_improve += 1

            if epochs_no_improve >= patience:
                print(f"{log_msg}")
                print(f"Early stopping triggered after {epoch+1} epochs")
                break
        else:
            history['val_loss'].append(None)
            history['val_acc'].append(None)
            if scheduler and not isinstance(scheduler, ReduceLROnPlateau):
                scheduler.step()

        print(log_msg)

    # Load best model state if available
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print(f"Loaded best model state with val accuracy: {best_val_acc:.2f}%")

    return model, history

def save_results_to_files(all_results, output_dir):
    """Save experimental results to JSON and CSV files"""
    os.makedirs(output_dir, exist_ok=True)

    # Save complete results as JSON
    results_json_path = os.path.join(output_dir, 'complete_results.json')
    with open(results_json_path, 'w') as f:
        json.dump(all_results, f, indent=2, default=str)
    print(f"✅ Complete results saved to {results_json_path}")

    # Create summary DataFrame
    summary_data = []
    for strategy, strategy_results in all_results.items():
        for sparsity, metrics in strategy_results.items():
            row = {
                'strategy': strategy,
                'sparsity_ratio': sparsity,
                'accuracy': metrics['accuracy'],
                'loss': metrics['loss'],
                'macs_millions': metrics['macs'] / 1e6,
                'params_millions': metrics['params'] / 1e6,
                'size_mb': metrics['size_mb']
            }
            summary_data.append(row)

    # Save summary as CSV
    summary_df = pd.DataFrame(summary_data)
    summary_csv_path = os.path.join(output_dir, 'summary_results.csv')
    summary_df.to_csv(summary_csv_path, index=False)
    print(f"✅ Summary results saved to {summary_csv_path}")

    return summary_df

def create_results_plots(summary_df, output_dir):
    """Create visualization plots"""
    os.makedirs(output_dir, exist_ok=True)

    strategies = summary_df['strategy'].unique()
    sparsity_levels = sorted(summary_df['sparsity_ratio'].unique())

    # Plot 1: Accuracy vs Sparsity
    plt.figure(figsize=(10, 6))
    for strategy in strategies:
        strategy_data = summary_df[summary_df['strategy'] == strategy].sort_values('sparsity_ratio')
        plt.plot(strategy_data['sparsity_ratio'] * 100, strategy_data['accuracy'],
                'o-', linewidth=2, markersize=8, label=strategy)

    plt.xlabel('Sparsity (%)', fontsize=12)
    plt.ylabel('Accuracy (%)', fontsize=12)
    plt.title('MobileNetV2: Accuracy vs Sparsity', fontsize=14, fontweight='bold')
    plt.legend(fontsize=12)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()

    plot_path = os.path.join(output_dir, 'accuracy_vs_sparsity.png')
    plt.savefig(plot_path, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"✅ Accuracy plot saved to {plot_path}")

    # Plot 2: Efficiency frontier (Accuracy vs MACs)
    plt.figure(figsize=(10, 6))
    for strategy in strategies:
        strategy_data = summary_df[summary_df['strategy'] == strategy].sort_values('sparsity_ratio')
        plt.scatter(strategy_data['macs_millions'], strategy_data['accuracy'],
                   s=100, label=strategy, alpha=0.8)
        plt.plot(strategy_data['macs_millions'], strategy_data['accuracy'],
                '--', alpha=0.6)

    plt.xlabel('MACs (Millions)', fontsize=12)
    plt.ylabel('Accuracy (%)', fontsize=12)
    plt.title('MobileNetV2: Efficiency Frontier (Accuracy vs MACs)', fontsize=14, fontweight='bold')
    plt.legend(fontsize=12)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()

    plot_path = os.path.join(output_dir, 'efficiency_frontier.png')
    plt.savefig(plot_path, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"✅ Efficiency frontier plot saved to {plot_path}")

def print_results_table(summary_df):
    """Print formatted results table"""
    print("\n" + "="*80)
    print("EXPERIMENTAL RESULTS SUMMARY")
    print("="*80)

    # Baseline results
    baseline_results = summary_df[summary_df['sparsity_ratio'] == 0.0].iloc[0]
    print(f"\nBaseline Performance:")
    print(f"  Accuracy: {baseline_results['accuracy']:.2f}%")
    print(f"  MACs: {baseline_results['macs_millions']:.2f}M")
    print(f"  Parameters: {baseline_results['params_millions']:.2f}M")
    print(f"  Model Size: {baseline_results['size_mb']:.2f}MB")

    # Strategy comparison at 50% sparsity
    print(f"\nStrategy Comparison at 50% Sparsity:")
    sparsity_50_data = summary_df[summary_df['sparsity_ratio'] == 0.5]
    for _, row in sparsity_50_data.iterrows():
        degradation = baseline_results['accuracy'] - row['accuracy']
        retention = (row['accuracy'] / baseline_results['accuracy']) * 100
        print(f"  {row['strategy']:>12}: {row['accuracy']:>6.2f}% accuracy ({degradation:>+5.2f}%, {retention:>5.1f}% retention)")

    # Complete results table
    print(f"\nComplete Results Table:")
    print("-" * 80)
    print(f"{'Strategy':<12} {'Sparsity':<8} {'Accuracy':<8} {'MACs(M)':<8} {'Params(M)':<9} {'Size(MB)':<8}")
    print("-" * 80)

    for _, row in summary_df.sort_values(['strategy', 'sparsity_ratio']).iterrows():
        print(f"{row['strategy']:<12} {row['sparsity_ratio']*100:>6.0f}% "
              f"{row['accuracy']:>7.2f}% {row['macs_millions']:>7.2f} "
              f"{row['params_millions']:>8.2f} {row['size_mb']:>7.2f}")

def main():
    """Main experimental workflow"""
    print("Starting Enhanced MobileNetV2 CIFAR-10 Pruning Experiments")
    print("=" * 60)

    # Configuration
    config = {
        'strategies': {
            'BNScale': {
                'pruner': tp.pruner.BNScalePruner,
                'importance': tp.importance.BNScaleImportance()
            },
            'MagnitudeL2': {
                'pruner': tp.pruner.MagnitudePruner,
                'importance': tp.importance.MagnitudeImportance(p=2)
            },
            'Random': {
                'pruner': tp.pruner.MagnitudePruner,
                'importance': tp.importance.RandomImportance()
            },
        },
        'pruning_ratios': [0.0, 0.2, 0.5, 0.7],
        'num_classes': 10,
        'batch_size': 128,
        'learning_rate': 0.001,  # Higher initial learning rate
        'learning_rate_finetune': 0.001,  # Lower for fine-tuning
        'epochs': 1000,  # More epochs for better training
        'patience': 20,  # More patience
        'weight_decay': 1e-4,  # Add weight decay
        'use_mixup': True,  # Enable mixup
        'mixup_alpha': 0.2,
        'output_dir': './results_mobilenetv2_cifar10_enhanced',
        'models_dir': './models_mobilenetv2_enhanced'
    }

    # Create output directories
    os.makedirs(config['output_dir'], exist_ok=True)
    os.makedirs(config['models_dir'], exist_ok=True)

    # Load data with augmentation
    print("Loading CIFAR-10 dataset with data augmentation...")
    train_loader, val_loader, test_loader = get_data_loaders(
        batch_size=config['batch_size']
    )

    # Prepare inputs and criterion
    example_input_cpu = torch.randn(1, 3, 32, 32)
    example_input_device = example_input_cpu.to(DEVICE)
    criterion = nn.CrossEntropyLoss()

    # Get baseline model and train it
    print("\nCreating and training baseline model...")
    model = get_mobilenetv2_model(num_classes=config['num_classes'], use_pretrained=True)
    model.to(DEVICE)

    # Train baseline model with enhanced settings
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'],
                          weight_decay=config['weight_decay'])

    # Use cosine annealing scheduler for better convergence
    scheduler = CosineAnnealingLR(optimizer, T_max=config['epochs'], eta_min=1e-6)

    trained_model, training_history = train_model(
        model, train_loader, criterion, optimizer, DEVICE,
        config['epochs'], val_loader, config['patience'], "Baseline Training",
        scheduler=scheduler, use_mixup=config['use_mixup'],
        mixup_alpha=config['mixup_alpha']
    )

    # Save baseline model
    baseline_model_path = os.path.join(config['models_dir'], 'baseline_model.pth')
    save_model(trained_model, baseline_model_path, example_input_cpu)

    # Plot training history
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(training_history['train_loss'], label='Train')
    plt.plot(training_history['val_loss'], label='Validation')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training History - Loss')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.subplot(1, 2, 2)
    plt.plot(training_history['train_acc'], label='Train')
    plt.plot(training_history['val_acc'], label='Validation')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Training History - Accuracy')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(os.path.join(config['output_dir'], 'training_history.png'))
    plt.close()

    # Evaluate baseline
    print("\nEvaluating baseline model...")
    baseline_metrics = evaluate_model(trained_model, test_loader, example_input_device, criterion, DEVICE)
    print(f"Baseline Results: Accuracy={baseline_metrics['accuracy']:.2f}%, "
          f"MACs={baseline_metrics['macs']/1e6:.2f}M, "
          f"Params={baseline_metrics['params']/1e6:.2f}M")

    # Initialize results storage
    all_results = {}
    for strategy_name in config['strategies'].keys():
        all_results[strategy_name] = {0.0: baseline_metrics}

    # Get ignored layers
    ignored_layers = get_ignored_layers(trained_model)

    # Run pruning experiments
    print("\nStarting pruning experiments...")
    for strategy_name, strategy_config in config['strategies'].items():
        print(f"\n--- Strategy: {strategy_name} ---")

        for sparsity_ratio in config['pruning_ratios']:
            if sparsity_ratio == 0.0:
                continue  # Skip baseline (already done)

            print(f"\nProcessing {strategy_name} at {sparsity_ratio:.1%} sparsity...")

            # Load fresh copy of trained baseline
            model_copy = get_mobilenetv2_model(num_classes=config['num_classes'], use_pretrained=False)
            model_copy.load_state_dict(torch.load(baseline_model_path, map_location=DEVICE))
            model_copy.to(DEVICE)

            # Apply pruning
            pruned_model = prune_model(
                model_copy, strategy_config, sparsity_ratio,
                example_input_device, ignored_layers
            )

            # Fine-tune pruned model with lower learning rate
            print("Fine-tuning pruned model...")
            optimizer_ft = optim.Adam(pruned_model.parameters(),
                                     lr=config['learning_rate_finetune'],
                                     weight_decay=config['weight_decay'])

            scheduler_ft = CosineAnnealingLR(optimizer_ft, T_max=config['epochs'], eta_min=1e-7)

            fine_tuned_model, ft_history = train_model(
                pruned_model, train_loader, criterion, optimizer_ft, DEVICE,
                config['epochs'], val_loader, config['patience'],
                f"{strategy_name}-{sparsity_ratio:.1%}",
                scheduler=scheduler_ft, use_mixup=False  # No mixup for fine-tuning
            )

            # Evaluate fine-tuned model
            final_metrics = evaluate_model(fine_tuned_model, test_loader, example_input_device, criterion, DEVICE)
            all_results[strategy_name][sparsity_ratio] = final_metrics

            print(f"Results: Accuracy={final_metrics['accuracy']:.2f}%, "
                  f"MACs={final_metrics['macs']/1e6:.2f}M")

            # Save fine-tuned model
            model_filename = f"{strategy_name.lower()}_sparsity_{sparsity_ratio:.1f}.pth"
            model_path = os.path.join(config['models_dir'], model_filename)
            save_model(fine_tuned_model, model_path, example_input_cpu)

    # Save and analyze results
    print("\nSaving results...")
    summary_df = save_results_to_files(all_results, config['output_dir'])

    # Create plots
    print("Creating plots...")
    create_results_plots(summary_df, config['output_dir'])

    # Print summary
    print_results_table(summary_df)

    print(f"\n🎉 All experiments completed!")
    print(f"📁 Results saved to: {os.path.abspath(config['output_dir'])}")
    print(f"📁 Models saved to: {os.path.abspath(config['models_dir'])}")

if __name__ == "__main__":
    main()


Using device: cuda
Starting Enhanced MobileNetV2 CIFAR-10 Pruning Experiments
Loading CIFAR-10 dataset with data augmentation...
Loading CIFAR-10 from: /home/muis/thesis/github-repo/master-thesis/cnn/mobile_net_v2/data
DataLoaders created - Train: 45000, Val: 5000, Test: 10000

Creating and training baseline model...
✅ Created MobileNetV2 without pretrained weights
✅ Adapted classifier for 10 classes
Epoch 1/250 (Baseline Training): Train Loss: 2.2837, Train Acc: 17.79%, Val Loss: 2.0143, Val Acc: 26.04% (Best)
Epoch 2/250 (Baseline Training): Train Loss: 1.9982, Train Acc: 26.11%, Val Loss: 1.7804, Val Acc: 33.08% (Best)
Epoch 3/250 (Baseline Training): Train Loss: 1.8710, Train Acc: 32.05%, Val Loss: 1.6899, Val Acc: 37.88% (Best)
Epoch 4/250 (Baseline Training): Train Loss: 1.7915, Train Acc: 35.89%, Val Loss: 1.5276, Val Acc: 43.54% (Best)
Epoch 5/250 (Baseline Training): Train Loss: 1.7253, Train Acc: 38.98%, Val Loss: 1.5453, Val Acc: 45.70% (Best)
Epoch 6/250 (Baseline Training)



Final MACs: 6.00M (Reduction: 8.0%)
Fine-tuning pruned model...
Epoch 1/250 (BNScale-20.0%): Train Loss: 0.3536, Train Acc: 88.23%, Val Loss: 0.4270, Val Acc: 85.46% (Best)
Epoch 2/250 (BNScale-20.0%): Train Loss: 0.3145, Train Acc: 89.42%, Val Loss: 0.4210, Val Acc: 85.62% (Best)
Epoch 3/250 (BNScale-20.0%): Train Loss: 0.3053, Train Acc: 89.57%, Val Loss: 0.4116, Val Acc: 85.94% (Best)
Epoch 4/250 (BNScale-20.0%): Train Loss: 0.2946, Train Acc: 89.83%, Val Loss: 0.4068, Val Acc: 86.62% (Best)
Epoch 5/250 (BNScale-20.0%): Train Loss: 0.2896, Train Acc: 90.04%, Val Loss: 0.4106, Val Acc: 86.44%
Epoch 6/250 (BNScale-20.0%): Train Loss: 0.2822, Train Acc: 90.47%, Val Loss: 0.4060, Val Acc: 86.24%
Epoch 7/250 (BNScale-20.0%): Train Loss: 0.2782, Train Acc: 90.56%, Val Loss: 0.4085, Val Acc: 86.28%
Epoch 8/250 (BNScale-20.0%): Train Loss: 0.2704, Train Acc: 90.84%, Val Loss: 0.4144, Val Acc: 86.08%
Epoch 9/250 (BNScale-20.0%): Train Loss: 0.2668, Train Acc: 90.81%, Val Loss: 0.4095, Val Ac