In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install thop scikit-learn torch torchvision seaborn matplotlib numpy pillow -q

import os
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report, confusion_matrix, accuracy_score,
    f1_score, precision_score, recall_score,
    precision_recall_curve, auc
)
from sklearn.preprocessing import label_binarize
from PIL import Image
import time
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from thop import profile


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


def load_benchmark_dataset_fixed():
    base_path = "/kaggle/input/benchmark/Benchmark Diagnostic MRI and Medical Imaging Dataset/Medical Imaging Dataset"
    
    class_mapping = {
        "Low Medial Insertion of Common Bile Duct with Pancreas Divisum-20240916T165825Z-001": "Low Medial Insertion of Common Bile Duct with Pancreas Divisum",
        "Inferior Vena Cava (IVC) Leiomyosarcoma-20240916T165709Z-001": "Inferior Vena Cava (IVC) Leiomyosarcoma",
        "Acute Cerebellitis in HIV": "Acute Cerebellitis in HIV",
        "Acute Unilateral Cerebellitis in HIV": "Acute Unilateral Cerebellitis in HIV",
        "Adenomyosis in Gravid Uterus": "Adenomyosis in Gravid Uterus",
        "Balloon Cell Cortical Dysplasia": "Balloon Cell Cortical Dysplasia",
        "Bilateral Osgood-Schlatter Disease with Chronic Inflammatory Arthritis": "Bilateral Osgood-Schlatter Disease with Chronic Inflammatory Arthritis",
        "Bilateral Ulnar Impaction Syndrome": "Bilateral Ulnar Impaction Syndrome",
        "Carolis Disease": "Carolis Disease",
        "Congenital Toxoplasmosis": "Congenital Toxoplasmosis",
        "Congenital Vaginal Cyst": "Congenital Vaginal Cyst",
        "Dermatomyositis": "Dermatomyositis",
        "Fukuyama Muscular Dystrophy": "Fukuyama Muscular Dystrophy",
        "Gamekeepers Thumb": "Gamekeepers Thumb",
        "Hallervorden-Spatz Disease (now called Pantothenate Kinase-Associated Neurodegeneration)": "Hallervorden-Spatz Disease (now called Pantothenate Kinase-Associated Neurodegeneration)",
        "Hepatocellular Carcinoma (HCC) and Dysplastic Nodules with Cirrhosis": "Hepatocellular Carcinoma (HCC) and Dysplastic Nodules with Cirrhosis",
        "Japanese B Encephalitis or Epstein-Barr Encephalitis": "Japanese B Encephalitis or Epstein-Barr Encephalitis",
        "Leighs Disease in Spinal Cord and Inferior Colliculi": "Leighs Disease in Spinal Cord and Inferior Colliculi",
        "Lumbosacral Plexitis": "Lumbosacral Plexitis",
        "Magnetic Resonance (MR) Brain": "Magnetic Resonance (MR) Brain",
        "Magnetic Resonance (MR) Spine": "Magnetic Resonance (MR) Spine",
        "Moyamoya Disease with Intraventricular Hemorrhage": "Moyamoya Disease with Intraventricular Hemorrhage",
        "Myositis Ossificans Progressiva": "Myositis Ossificans Progressiva",
        "Neurofibromatosis Type 1 (NF1) with Optic Glioma and Intracranial Extension": "Neurofibromatosis Type 1 (NF1) with Optic Glioma and Intracranial Extension",
        "Optic Glioma": "Optic Glioma",
        "Osmotic Demyelination Syndrome": "Osmotic Demyelination Syndrome",
        "Pachygyria with Cerebellar Hypoplasia": "Pachygyria with Cerebellar Hypoplasia",
        "Perisylvian Syndrome": "Perisylvian Syndrome",
        "Pigmented Villonodular Synovitis (PVNS) of Ankle": "Pigmented Villonodular Synovitis (PVNS) of Ankle",
        "Plexiform Neurofibroma with Sphenoid Wing Absence": "Plexiform Neurofibroma with Sphenoid Wing Absence",
        "Rasmussens Encephalitis": "Rasmussens Encephalitis",
        "Retinoblastoma with Intracranial Spread Along Cranial Nerve": "Retinoblastoma with Intracranial Spread Along Cranial Nerve",
        "Right Brachial Plexitis": "Right Brachial Plexitis",
        "Sjögrens Syndrome": "Sjögrens Syndrome",
        "Sural Nerve Neurofibroma": "Sural Nerve Neurofibroma",
        "Thoracic Outlet Syndrome": "Thoracic Outlet Syndrome",
        "Tuberous Sclerosis": "Tuberous Sclerosis",
        "Two-Week Follow-Up with Spectroscopy": "Two-Week Follow-Up with Spectroscopy",
        "Typical Adrenoleukodystrophy": "Typical Adrenoleukodystrophy",
        "Walker-Warburg Syndrome": "Walker-Warburg Syndrome"
    }
    
    image_paths = []
    labels = []
    
    for class_name, subdir_name in class_mapping.items():
        class_dir = os.path.join(base_path, class_name)
        if os.path.exists(class_dir):
            subdir = os.path.join(class_dir, subdir_name)
            if os.path.exists(subdir):
                for file in os.listdir(subdir):
                    if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.dcm', '.nii')):
                        image_paths.append(os.path.join(subdir, file))
                        labels.append(class_name)
    
    print(f"Total images found: {len(image_paths)}")
    
    unique_labels = sorted(list(set(labels)))
    label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}
    encoded_labels = [label_to_idx[label] for label in labels]
    
    return image_paths, encoded_labels, unique_labels, label_to_idx


class BenchmarkMRIDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        label = self.labels[idx]
        try:
            img = Image.open(img_path).convert('L')
            if self.transform:
                img = self.transform(img)
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            img = torch.zeros(1, 224, 224)
        return img, label


class MedicalMicroNet(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels, 8, 3, padding=1), nn.BatchNorm2d(8), nn.ReLU(inplace=True), nn.MaxPool2d(2),
            nn.Conv2d(8, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(inplace=True), nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.MaxPool2d(2),
            nn.AdaptiveAvgPool2d(1)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32, 64), nn.ReLU(inplace=True), nn.Dropout(0.2), nn.Linear(64, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

class TextureNet(nn.Module):
    def __init__(self, num_classes): super().__init__(); self.net = MedicalMicroNet(1, num_classes)
    def forward(self, x): return self.net(x)

class ShapeNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.net = MedicalMicroNet(2, num_classes)
    def forward(self, x):
        sobel_x = torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=torch.float32).view(1,1,3,3).to(x.device)
        sobel_y = torch.tensor([[-1,-2,-1], [0,0,0], [1,2,1]], dtype=torch.float32).view(1,1,3,3).to(x.device)
        edges_x = F.conv2d(x, sobel_x, padding=1)
        edges_y = F.conv2d(x, sobel_y, padding=1)
        edges = torch.sqrt(edges_x**2 + edges_y**2)
        x_combined = torch.cat([x, edges], dim=1)
        return self.net(x_combined)

class IntensityNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.net = MedicalMicroNet(1, num_classes)
    def forward(self, x):
        x = (x - x.mean(dim=(2,3), keepdim=True)) / (x.std(dim=(2,3), keepdim=True) + 1e-8)
        return self.net(x)

class SpatialNet(nn.Module):
    def __init__(self, num_classes): super().__init__(); self.net = MedicalMicroNet(1, num_classes)
    def forward(self, x): return self.net(x)

class MultiScaleNet(nn.Module):
    def __init__(self, num_classes): super().__init__(); self.net = MedicalMicroNet(1, num_classes)
    def forward(self, x): return self.net(x)

class UltraLightUALE(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.texture_net = TextureNet(num_classes)
        self.shape_net = ShapeNet(num_classes)
        self.intensity_net = IntensityNet(num_classes)
        self.spatial_net = SpatialNet(num_classes)
        self.multiscale_net = MultiScaleNet(num_classes)
        self.num_classes = num_classes
    def forward(self, x):
        p1 = self.texture_net(x)
        p2 = self.shape_net(x)
        p3 = self.intensity_net(x)
        p4 = self.spatial_net(x)
        p5 = self.multiscale_net(x)
        preds = torch.stack([p1, p2, p3, p4, p5], dim=0)
        ensemble_pred = torch.mean(preds, dim=0)
        uncertainty = torch.var(preds, dim=0).mean(dim=1)
        return ensemble_pred, uncertainty, preds


def get_transforms():
    train_transform = transforms.Compose([
        transforms.Resize(256), transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(), transforms.RandomRotation(10),
        transforms.ToTensor(), transforms.Normalize([0.5], [0.5])
    ])
    val_transform = transforms.Compose([
        transforms.Resize(256), transforms.CenterCrop(224),
        transforms.ToTensor(), transforms.Normalize([0.5], [0.5])
    ])
    return train_transform, val_transform

def train_one_run(model, train_loader, val_loader, epochs=100, patience=15):
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=5, verbose=False)
    criterion = nn.CrossEntropyLoss()
    
    best_val_acc = 0
    no_improve = 0
    
    for epoch in range(epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs, _, _ = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        val_preds, val_targets = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs, _, _ = model(images)
                _, predicted = outputs.max(1)
                val_preds.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())
        
        val_acc = accuracy_score(val_targets, val_preds)
        scheduler.step(val_acc)
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            no_improve = 0
            torch.save(model.state_dict(), f'best_model_temp.pth')
        else:
            no_improve += 1
            if no_improve >= patience:
                break
    
    model.load_state_dict(torch.load('best_model_temp.pth'))
    return model


def evaluate_with_auprc(model, test_loader, num_classes):
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs, _, _ = model(images)
            probs = F.softmax(outputs, dim=1)
            _, pred = torch.max(outputs, 1)
            
            all_probs.extend(probs.cpu().numpy())
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(labels.numpy())
    
    all_probs = np.array(all_probs)
    all_labels_onehot = label_binarize(all_labels, classes=range(num_classes))
    

    precision = dict()
    recall = dict()
    auprc = dict()
    for i in range(num_classes):
        if np.sum(all_labels_onehot[:, i]) > 0:
            precision[i], recall[i], _ = precision_recall_curve(all_labels_onehot[:, i], all_probs[:, i])
            auprc[i] = auc(recall[i], precision[i])
    
    # Weighted average by support
    weights = np.bincount(all_labels, minlength=num_classes)
    weights = weights / weights.sum()
    weighted_auprc = sum(auprc.get(i, 0) * weights[i] for i in range(num_classes))
    
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return acc, f1, weighted_auprc


def run_multiple_experiments(n_runs=5):
    print("Loading dataset...")
    image_paths, encoded_labels, unique_labels, _ = load_benchmark_dataset_fixed()
    num_classes = len(unique_labels)
    
    results = {'accuracy': [], 'f1': [], 'auprc': []}
    
    for run in range(1, n_runs + 1):
        print(f"\n{'='*20} RUN {run}/{n_runs} {'='*20}")
        set_seed(1000 + run) 
        
  
        train_paths, test_paths, train_labels, test_labels = train_test_split(
            image_paths, encoded_labels, test_size=0.2, random_state=42+run, stratify=encoded_labels)
        
        train_paths, val_paths, train_labels, val_labels = train_test_split(
            train_paths, train_labels, test_size=0.15, random_state=42+run, stratify=train_labels)
        
        train_transform, val_transform = get_transforms()
        
        train_dataset = BenchmarkMRIDataset(train_paths, train_labels, transform=train_transform)
        val_dataset   = BenchmarkMRIDataset(val_paths,   val_labels,   transform=val_transform)
        test_dataset  = BenchmarkMRIDataset(test_paths,  test_labels,  transform=val_transform)
        
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,  num_workers=2, pin_memory=True)
        val_loader   = DataLoader(val_dataset,   batch_size=32, shuffle=False, num_workers=2, pin_memory=True)
        test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False, num_workers=2, pin_memory=True)
        
        model = UltraLightUALE(num_classes=num_classes)
        print(f"Run {run} - Total params: {sum(p.numel() for p in model.parameters())/1e6:.3f}M")
        
        model = train_one_run(model, train_loader, val_loader, epochs=100, patience=5)
        
        acc, f1, auprc = evaluate_with_auprc(model, test_loader, num_classes)
        
        results['accuracy'].append(acc * 100)
        results['f1'].append(f1 * 100)
        results['auprc'].append(auprc * 100)
        
        print(f"Run {run} → Acc: {acc*100:.2f}% | | F1: {f1*100:.2f}% | AUPRC: {auprc*100:.2f}%")
    
    # Final summary
    print("\n" + "="*60)
    print("FINAL RESULTS ACROSS 5 RUNS")
    print("="*60)
    print(f"Accuracy : {np.mean(results['accuracy']):.2f}% ± {np.std(results['accuracy']):.2f}%")
    print(f"F1-Score : {np.mean(results['f1']):.2f}% ± {np.std(results['f1']):.2f}%")
    print(f"AUPRC    : {np.mean(results['auprc']):.2f}% ± {np.std(results['auprc']):.2f}%")
    print("="*60)
    
    return results


if __name__ == "__main__":
    final_results = run_multiple_experiments(n_runs=5)

Using device: cuda
Loading dataset...
Total images found: 34192

Run 1 - Total params: 0.054M
Run 1 → Acc: 57.63% | | F1: 56.54% | AUPRC: 62.71%

Run 2 - Total params: 0.054M
Run 2 → Acc: 66.79% | | F1: 65.64% | AUPRC: 73.43%

Run 3 - Total params: 0.054M
Run 3 → Acc: 54.36% | | F1: 52.79% | AUPRC: 62.25%

Run 4 - Total params: 0.054M
Run 4 → Acc: 62.39% | | F1: 61.51% | AUPRC: 68.02%

Run 5 - Total params: 0.054M
Run 5 → Acc: 59.22% | | F1: 58.16% | AUPRC: 65.15%

FINAL RESULTS ACROSS 5 RUNS
Accuracy : 60.08% ± 4.24%
F1-Score : 58.93% ± 4.37%
AUPRC    : 66.31% ± 4.11%
