<a href="https://colab.research.google.com/github/tamara-kostova/MSc_Thesis_Neuroimaging/blob/master/04_models_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.models as models
import torchvision.transforms as transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
import numpy as np
import pandas as pd
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report
)
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
from pathlib import Path
from datetime import datetime
import pickle

warnings.filterwarnings('ignore')

In [23]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
class Config:
    """Global configuration for all models"""

    # Paths
    BASE_DIR = "/content/drive/MyDrive/MSc_Thesis_Neuroimaging"
    SPLIT_DIR = f"{BASE_DIR}/data/split"
    RESULTS_DIR = f"{BASE_DIR}/results/benchmarks"
    CHECKPOINT_DIR = f"{BASE_DIR}/checkpoints"

    # Training parameters
    BATCH_SIZE = 32
    NUM_EPOCHS = 5
    LEARNING_RATE = 1e-4
    WEIGHT_DECAY = 1e-5

    # Device
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Random seed
    SEED = 42

    # Datasets to train on
    DATASETS = [
        "MRI_tumor_binary_norm",
        "MRI_tumor_multiclass_norm",
        "MRI_ms_norm",
        "CT_stroke_binary_norm"
    ]

    # Models to benchmark
    MODELS = [
        "resnet50",
        "resnet101",
        "vgg16",
        "densenet121",
        "densenet169",
        "inception_v3",
        "mobilenet_v2",
        "efficientnet_b0",
        "efficientnet_b4",
    ]

    # Early stopping
    PATIENCE = 10
    MIN_DELTA = 1e-3

    def __init__(self):
        os.makedirs(self.RESULTS_DIR, exist_ok=True)
        os.makedirs(self.CHECKPOINT_DIR, exist_ok=True)

In [25]:
class MedicalImageDataset(Dataset):
    """PyTorch Dataset for medical images with stratified splits"""

    def __init__(self, split_dir, split_type="train", transform=None):
        """
        Args:
            split_dir: path to split directory
            split_type: "train", "val", or "test"
            transform: image transformations
        """
        self.split_dir = split_dir
        self.split_type = split_type
        self.transform = transform

        self.samples = []
        self.class_to_idx = {}
        self._build_samples()

    def _build_samples(self):
        """Build list of (path, label) tuples"""
        split_path = os.path.join(self.split_dir, self.split_type)

        idx = 0
        for class_name in sorted(os.listdir(split_path)):
            class_path = os.path.join(split_path, class_name)

            if not os.path.isdir(class_path):
                continue

            if class_name not in self.class_to_idx:
                self.class_to_idx[class_name] = idx
                idx += 1

            label = self.class_to_idx[class_name]

            for img_name in os.listdir(class_path):
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(class_path, img_name)
                    self.samples.append((img_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        from PIL import Image

        img_path, label = self.samples[idx]

        # Load as grayscale and convert to RGB (3 channels for pretrained models)
        image = Image.open(img_path).convert('L')
        image_rgb = Image.new('RGB', image.size)
        image_rgb.paste(image)

        if self.transform:
            image_rgb = self.transform(image_rgb)

        return image_rgb, label

In [26]:
def get_data_loaders(split_dir, batch_size=32, num_workers=2):
    """Create train/val/test DataLoaders"""

    # ImageNet normalization
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )

    # Training transforms (with augmentation)
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.RandomAffine(degrees=5, scale=(0.9, 1.1)),
        transforms.ToTensor(),
        normalize,
    ])

    # Val/Test transforms (no augmentation)
    test_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize,
    ])

    # Create datasets
    train_ds = MedicalImageDataset(split_dir, "train", train_transform)
    val_ds = MedicalImageDataset(split_dir, "val", test_transform)
    test_ds = MedicalImageDataset(split_dir, "test", test_transform)

    # Create loaders
    loaders = {
        'train': DataLoader(train_ds, batch_size=batch_size, shuffle=True,
                           num_workers=num_workers, pin_memory=True),
        'val': DataLoader(val_ds, batch_size=batch_size, shuffle=False,
                         num_workers=num_workers, pin_memory=True),
        'test': DataLoader(test_ds, batch_size=batch_size, shuffle=False,
                          num_workers=num_workers, pin_memory=True),
    }

    return loaders, train_ds.class_to_idx

In [27]:
def create_model(model_name, num_classes, pretrained=True):
    """Create model with specified architecture"""

    if model_name == "resnet50":
        model = models.resnet50(pretrained=pretrained)
        model.fc = nn.Linear(model.fc.in_features, num_classes)

    elif model_name == "resnet101":
        model = models.resnet101(pretrained=pretrained)
        model.fc = nn.Linear(model.fc.in_features, num_classes)

    elif model_name == "vgg16":
        model = models.vgg16(pretrained=pretrained)
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)

    elif model_name == "vgg19":
        model = models.vgg19(pretrained=pretrained)
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)

    elif model_name == "densenet121":
        model = models.densenet121(pretrained=pretrained)
        model.classifier = nn.Linear(model.classifier.in_features, num_classes)

    elif model_name == "densenet169":
        model = models.densenet169(pretrained=pretrained)
        model.classifier = nn.Linear(model.classifier.in_features, num_classes)

    elif model_name == "inception_v3":
        model = models.inception_v3(pretrained=pretrained)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
        model.AuxLogits.fc = nn.Linear(model.AuxLogits.fc.in_features, num_classes)

    elif model_name == "mobilenet_v2":
        model = models.mobilenet_v2(pretrained=pretrained)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

    elif model_name == "efficientnet_b0":
        model = models.efficientnet_b0(pretrained=pretrained)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

    elif model_name == "efficientnet_b4":
        model = models.efficientnet_b4(pretrained=pretrained)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

    else:
        raise ValueError(f"Unknown model: {model_name}")

    return model

In [28]:
class EarlyStopping:
    """Early stopping to prevent overfitting"""

    def __init__(self, patience=10, min_delta=0.0, restore_best=True):
        self.patience = patience
        self.min_delta = min_delta
        self.restore_best = restore_best
        self.counter = 0
        self.best_loss = None
        self.best_epoch = None
        self.best_state = None

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.best_state = model.state_dict().copy()
            self.best_epoch = 0
        elif val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
            self.best_state = model.state_dict().copy()
            self.best_epoch = 0
        else:
            self.counter += 1
            self.best_epoch += 1

        return self.counter >= self.patience

    def restore_best_weights(self, model):
        if self.best_state is not None and self.restore_best:
            model.load_state_dict(self.best_state)

In [29]:
def train_epoch(model, loader, criterion, optimizer, device):
    """Train for one epoch"""
    model.train()

    total_loss = 0.0
    all_preds = []
    all_labels = []

    pbar = tqdm(loader, desc="Training", leave=False)
    for images, labels in pbar:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        total_loss += loss.item() * images.size(0)

        with torch.no_grad():
            preds = outputs.argmax(dim=1).cpu().numpy()  # [batch_size]
            all_preds.append(preds)
            all_labels.append(labels.cpu().numpy())  # [batch_size]

        pbar.update(1)

    avg_loss = total_loss / len(loader.dataset)

    all_preds = np.concatenate(all_preds, axis=0)  # [total_samples]
    all_labels = np.concatenate(all_labels, axis=0)  # [total_samples]

    avg_acc = accuracy_score(all_labels, all_preds)

    return avg_loss, avg_acc

In [30]:
def validate_epoch(model, loader, criterion, device):
    """Validate for one epoch"""
    model.eval()

    total_loss = 0.0
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        pbar = tqdm(loader, desc="Validating", leave=False)
        for images, labels in pbar:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item() * images.size(0)

            preds = outputs.argmax(dim=1).cpu().numpy()  # [batch_size]
            probs = torch.softmax(outputs, dim=1).cpu().numpy()  # [batch_size, num_classes]

            all_preds.append(preds)
            all_labels.append(labels.cpu().numpy())  # [batch_size]
            all_probs.append(probs)

            pbar.update(1)

    avg_loss = total_loss / len(loader.dataset)

    all_preds = np.concatenate(all_preds, axis=0)  # [total_samples]
    all_labels = np.concatenate(all_labels, axis=0)  # [total_samples]
    all_probs = np.concatenate(all_probs, axis=0)  # [total_samples, num_classes]

    avg_acc = accuracy_score(all_labels, all_preds)

    # Compute AUC
    try:
        if len(np.unique(all_labels)) == 2:
            # Binary: use positive class probabilities (class 1)
            avg_auc = roc_auc_score(all_labels, all_probs[:, 1])
        else:
            # Multiclass: one-vs-rest
            avg_auc = roc_auc_score(all_labels, all_probs, multi_class='ovr')
    except Exception as e:
        print(f"  Warning: Could not compute AUC: {e}")
        avg_auc = 0.0

    return avg_loss, avg_acc, avg_auc

In [31]:
def train_model(model, loaders, criterion, optimizer, scheduler, device,
                num_epochs, model_name, dataset_name, checkpoint_dir):
    """Train model with early stopping"""

    checkpoint_path = os.path.join(checkpoint_dir, f"{model_name}_{dataset_name}.pt")

    early_stop = EarlyStopping(patience=Config.PATIENCE, min_delta=Config.MIN_DELTA)

    history = {
        'train_loss': [], 'train_acc': [],
        'val_loss': [], 'val_acc': [], 'val_auc': []
    }

    print(f"\n{'='*70}")
    print(f"Training {model_name} on {dataset_name}")
    print(f"{'='*70}")

    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(
            model, loaders['train'], criterion, optimizer, device
        )

        val_loss, val_acc, val_auc = validate_epoch(
            model, loaders['val'], criterion, device
        )

        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_auc'].append(val_auc)

        print(f"Epoch {epoch+1:3d}/{num_epochs} | "
              f"TrLoss: {train_loss:.4f} | TrAcc: {train_acc:.4f} | "
              f"VaLoss: {val_loss:.4f} | VaAcc: {val_acc:.4f} | VaAUC: {val_auc:.4f}")

        # Learning rate scheduling
        if scheduler is not None:
            scheduler.step(val_loss)

        # Early stopping
        if early_stop(val_loss, model):
            print(f"Early stopping at epoch {epoch+1}")
            early_stop.restore_best_weights(model)
            break

        # Save checkpoint
        if epoch % 5 == 0:
            torch.save(model.state_dict(), checkpoint_path)

    return history, checkpoint_path

In [32]:
def evaluate_model(model, loader, device):
    """Full evaluation metrics"""
    model.eval()

    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Evaluating", leave=False):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)

            preds = outputs.argmax(dim=1).cpu().numpy()
            probs = torch.softmax(outputs, dim=1).cpu().numpy()

            all_preds.append(preds)
            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs)

    all_preds = np.concatenate(all_preds, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)
    all_probs = np.concatenate(all_probs, axis=0)

    # Compute metrics
    metrics = {
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds, average='weighted', zero_division=0),
        'recall': recall_score(all_labels, all_preds, average='weighted', zero_division=0),
        'f1': f1_score(all_labels, all_preds, average='weighted', zero_division=0),
    }

    # AUC
    try:
        if len(np.unique(all_labels)) == 2:
            # Binary: use positive class probabilities (class 1)
            metrics['auc'] = roc_auc_score(all_labels, all_probs[:, 1])
        else:
            # Multiclass: one-vs-rest
            metrics['auc'] = roc_auc_score(all_labels, all_probs, multi_class='ovr')
    except Exception as e:
        print(f"  Warning: Could not compute AUC: {e}")
        metrics['auc'] = 0.0

    return metrics, all_preds, all_labels

In [33]:
def save_results(results, output_path):
    """Save results to JSON"""
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, 'w') as f:
        json.dump(results, f, indent=2, default=str)

In [34]:
config = Config()
torch.manual_seed(config.SEED)
np.random.seed(config.SEED)
all_results = {}

In [None]:
for dataset_name in config.DATASETS:
        dataset_path = os.path.join(config.SPLIT_DIR, dataset_name)

        if not os.path.exists(dataset_path):
            print(f"Dataset not found: {dataset_path}")
            continue

        print(f"\n\n{'#'*70}")
        print(f"# DATASET: {dataset_name}")
        print(f"{'#'*70}")

        # Load data
        loaders, class_to_idx = get_data_loaders(
            dataset_path,
            batch_size=config.BATCH_SIZE,
            num_workers=2
        )

        num_classes = len(class_to_idx)
        print(f"Number of classes: {num_classes}")
        print(f"Classes: {list(class_to_idx.keys())}")

        dataset_results = {}

        # Iterate over models
        for model_name in config.MODELS:
            try:
                print(f"\n--- Training {model_name} ---")

                # Create model
                model = create_model(model_name, num_classes, pretrained=True)
                model = model.to(config.DEVICE)

                # Count parameters
                total_params = sum(p.numel() for p in model.parameters())
                trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
                print(f"Parameters: {total_params/1e6:.2f}M (trainable: {trainable_params/1e6:.2f}M)")

                # Loss function
                criterion = nn.CrossEntropyLoss()

                # Optimizer
                optimizer = optim.AdamW(
                    model.parameters(),
                    lr=config.LEARNING_RATE,
                    weight_decay=config.WEIGHT_DECAY
                )

                # Scheduler
                scheduler = ReduceLROnPlateau(
                    optimizer, mode='min', factor=0.5, patience=5
                )

                # Train
                history, checkpoint_path = train_model(
                    model, loaders, criterion, optimizer, scheduler, config.DEVICE,
                    config.NUM_EPOCHS, model_name, dataset_name, config.CHECKPOINT_DIR
                )

                # Load best model
                model.load_state_dict(torch.load(checkpoint_path, map_location=config.DEVICE))

                # Evaluate
                test_metrics, _, _ = evaluate_model(model, loaders['test'], config.DEVICE)

                print(f"\nTest Results:")
                for metric, value in test_metrics.items():
                    print(f"  {metric}: {value:.4f}")

                dataset_results[model_name] = {
                    'test_metrics': test_metrics,
                    'history': history,
                    'params': trainable_params,
                }

            except Exception as e:
                print(f"Error training {model_name}: {str(e)}")
                dataset_results[model_name] = {'error': str(e)}

        all_results[dataset_name] = dataset_results



######################################################################
# DATASET: MRI_tumor_binary_norm
######################################################################
Number of classes: 2
Classes: ['normal', 'tumor']

--- Training resnet50 ---
Parameters: 23.51M (trainable: 23.51M)

Training resnet50 on MRI_tumor_binary_norm




Epoch   1/5 | TrLoss: 0.1662 | TrAcc: 0.9343 | VaLoss: 0.0391 | VaAcc: 0.9867 | VaAUC: 0.9988




Epoch   2/5 | TrLoss: 0.0568 | TrAcc: 0.9829 | VaLoss: 0.0074 | VaAcc: 0.9978 | VaAUC: 1.0000




Epoch   3/5 | TrLoss: 0.0324 | TrAcc: 0.9886 | VaLoss: 0.0083 | VaAcc: 0.9978 | VaAUC: 1.0000




Epoch   4/5 | TrLoss: 0.0073 | TrAcc: 0.9971 | VaLoss: 0.0112 | VaAcc: 0.9978 | VaAUC: 0.9999




Epoch   5/5 | TrLoss: 0.0176 | TrAcc: 0.9952 | VaLoss: 0.0097 | VaAcc: 0.9978 | VaAUC: 1.0000





Test Results:
  accuracy: 0.9822
  precision: 0.9824
  recall: 0.9822
  f1: 0.9822
  auc: 0.9991

--- Training resnet101 ---
Parameters: 42.50M (trainable: 42.50M)

Training resnet101 on MRI_tumor_binary_norm




Epoch   1/5 | TrLoss: 0.1481 | TrAcc: 0.9390 | VaLoss: 0.0574 | VaAcc: 0.9867 | VaAUC: 0.9963




Epoch   2/5 | TrLoss: 0.0555 | TrAcc: 0.9857 | VaLoss: 0.0809 | VaAcc: 0.9822 | VaAUC: 0.9980




Epoch   3/5 | TrLoss: 0.0465 | TrAcc: 0.9852 | VaLoss: 0.1446 | VaAcc: 0.9844 | VaAUC: 0.9929




Epoch   4/5 | TrLoss: 0.0312 | TrAcc: 0.9919 | VaLoss: 0.0067 | VaAcc: 0.9978 | VaAUC: 1.0000




Epoch   5/5 | TrLoss: 0.0256 | TrAcc: 0.9924 | VaLoss: 0.0136 | VaAcc: 0.9933 | VaAUC: 0.9999





Test Results:
  accuracy: 0.9956
  precision: 0.9956
  recall: 0.9956
  f1: 0.9956
  auc: 0.9993

--- Training vgg16 ---
Parameters: 134.27M (trainable: 134.27M)

Training vgg16 on MRI_tumor_binary_norm




Epoch   1/5 | TrLoss: 0.3378 | TrAcc: 0.8638 | VaLoss: 0.0917 | VaAcc: 0.9689 | VaAUC: 0.9964




Epoch   2/5 | TrLoss: 0.1178 | TrAcc: 0.9719 | VaLoss: 0.0800 | VaAcc: 0.9822 | VaAUC: 0.9986




Epoch   3/5 | TrLoss: 0.0662 | TrAcc: 0.9838 | VaLoss: 0.1331 | VaAcc: 0.9844 | VaAUC: 0.9972




Epoch   4/5 | TrLoss: 0.0621 | TrAcc: 0.9829 | VaLoss: 0.0318 | VaAcc: 0.9867 | VaAUC: 0.9999




Epoch   5/5 | TrLoss: 0.0440 | TrAcc: 0.9895 | VaLoss: 0.1014 | VaAcc: 0.9844 | VaAUC: 0.9995





Test Results:
  accuracy: 0.9800
  precision: 0.9801
  recall: 0.9800
  f1: 0.9800
  auc: 0.9991

--- Training densenet121 ---
Parameters: 6.96M (trainable: 6.96M)

Training densenet121 on MRI_tumor_binary_norm




Epoch   1/5 | TrLoss: 0.2146 | TrAcc: 0.9143 | VaLoss: 0.0477 | VaAcc: 0.9867 | VaAUC: 0.9992




Epoch   2/5 | TrLoss: 0.0423 | TrAcc: 0.9876 | VaLoss: 0.0203 | VaAcc: 0.9978 | VaAUC: 0.9996




Epoch   3/5 | TrLoss: 0.0245 | TrAcc: 0.9914 | VaLoss: 0.0143 | VaAcc: 0.9956 | VaAUC: 1.0000




Epoch   4/5 | TrLoss: 0.0173 | TrAcc: 0.9943 | VaLoss: 0.0120 | VaAcc: 0.9978 | VaAUC: 0.9998




Epoch   5/5 | TrLoss: 0.0117 | TrAcc: 0.9971 | VaLoss: 0.0045 | VaAcc: 0.9978 | VaAUC: 1.0000





Test Results:
  accuracy: 0.9778
  precision: 0.9779
  recall: 0.9778
  f1: 0.9778
  auc: 0.9992

--- Training densenet169 ---
Parameters: 12.49M (trainable: 12.49M)

Training densenet169 on MRI_tumor_binary_norm




Epoch   1/5 | TrLoss: 0.1632 | TrAcc: 0.9414 | VaLoss: 0.0247 | VaAcc: 0.9911 | VaAUC: 0.9998




Epoch   2/5 | TrLoss: 0.0447 | TrAcc: 0.9871 | VaLoss: 0.0191 | VaAcc: 0.9933 | VaAUC: 0.9998




Epoch   3/5 | TrLoss: 0.0118 | TrAcc: 0.9967 | VaLoss: 0.0222 | VaAcc: 0.9911 | VaAUC: 0.9998




Epoch   4/5 | TrLoss: 0.0302 | TrAcc: 0.9910 | VaLoss: 0.0095 | VaAcc: 0.9956 | VaAUC: 1.0000




Epoch   5/5 | TrLoss: 0.0116 | TrAcc: 0.9957 | VaLoss: 0.0232 | VaAcc: 0.9889 | VaAUC: 0.9997





Test Results:
  accuracy: 0.9822
  precision: 0.9823
  recall: 0.9822
  f1: 0.9822
  auc: 0.9972

--- Training inception_v3 ---
Parameters: 24.35M (trainable: 24.35M)

Training inception_v3 on MRI_tumor_binary_norm




Error training inception_v3: Calculated padded input size per channel: (3 x 3). Kernel size: (5 x 5). Kernel size can't be greater than actual input size

--- Training mobilenet_v2 ---
Parameters: 2.23M (trainable: 2.23M)

Training mobilenet_v2 on MRI_tumor_binary_norm




Epoch   1/5 | TrLoss: 0.2277 | TrAcc: 0.9057 | VaLoss: 0.0692 | VaAcc: 0.9800 | VaAUC: 0.9975




Epoch   2/5 | TrLoss: 0.0650 | TrAcc: 0.9800 | VaLoss: 0.0450 | VaAcc: 0.9844 | VaAUC: 0.9990




Epoch   3/5 | TrLoss: 0.0316 | TrAcc: 0.9890 | VaLoss: 0.0513 | VaAcc: 0.9844 | VaAUC: 0.9981




Epoch   4/5 | TrLoss: 0.0285 | TrAcc: 0.9929 | VaLoss: 0.0353 | VaAcc: 0.9889 | VaAUC: 0.9997




Epoch   5/5 | TrLoss: 0.0269 | TrAcc: 0.9933 | VaLoss: 0.0373 | VaAcc: 0.9889 | VaAUC: 0.9995





Test Results:
  accuracy: 0.9867
  precision: 0.9867
  recall: 0.9867
  f1: 0.9867
  auc: 0.9986

--- Training efficientnet_b0 ---
Parameters: 4.01M (trainable: 4.01M)

Training efficientnet_b0 on MRI_tumor_binary_norm


Training:  42%|████▏     | 28/66 [00:04<00:07,  5.37it/s]

In [None]:
results_path = os.path.join(config.RESULTS_DIR, f"benchmark_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
save_results(all_results, results_path)

    # Print summary
print(f"\n\n{'#'*70}")
print(f"# SUMMARY")
print(f"{'#'*70}")

summary_df = []
for dataset_name, models in all_results.items():
    for model_name, results in models.items():
        if 'test_metrics' in results:
            row = {
                'dataset': dataset_name,
                'model': model_name,
                'accuracy': results['test_metrics']['accuracy'],
                'f1': results['test_metrics']['f1'],
                'auc': results['test_metrics']['auc'],
            }
            summary_df.append(row)

summary_df = pd.DataFrame(summary_df)
summary_df = summary_df.sort_values('accuracy', ascending=False)

print("\nTop Results (by Accuracy):")
print(summary_df.head(10).to_string(index=False))

# Save summary
summary_path = os.path.join(config.RESULTS_DIR, f"summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
summary_df.to_csv(summary_path, index=False)

print(f"\nResults saved to: {results_path}")
print(f"Summary saved to: {summary_path}")