In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import seaborn as sns
import time
import numpy as np
from scipy import stats
from PIL import Image
import os

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data augmentation for training
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# No augmentation transform (for ablation)
no_aug_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Noise transform for robustness test
noise_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Validation/test transform
test_transform = no_aug_transform  # Alias for clarity

In [2]:
# Custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, samples, transform=None):
        self.samples = samples  # list of (path, label)
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        path, target = self.samples[index]
        img = Image.open(path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, target

# Define relevant classes for lost-and-found items (10 classes selected)
relevant_classes = [
    '003.backpack',      # Backpack
    '033.cd',            # CD (proxy for disc-like items, e.g., wallet)
    '041.coffee-mug',    # Mug (portable)
    '047.computer-mouse',# Computer mouse (peripheral)
    '067.eyeglasses',    # Eyeglasses (sunglasses proxy)
    '101.head-phones',   # Headphones
    '117.ipod',          # iPod (phone-like device)
    '127.laptop-101',    # Laptop
    '235.umbrella-101',  # Umbrella
    '240.watch-101'      # Watch
]

# Load the Caltech-256 dataset without transform
dataset_folder = '/kaggle/input/caltech256/256_ObjectCategories'
orig_dataset = datasets.ImageFolder(root=dataset_folder, transform=None)

# Filter samples to only include relevant classes
class_to_idx = {cls: idx for idx, cls in enumerate(relevant_classes)}
full_samples = []
full_targets = []
for path, target in orig_dataset.samples:
    class_dir = os.path.basename(os.path.dirname(path))
    if class_dir in relevant_classes:
        new_target = class_to_idx[class_dir]
        full_samples.append(path)
        full_targets.append(new_target)

classes = relevant_classes
num_classes = len(classes)
print(f"Selected {num_classes} classes: {classes}")
print(f"Total filtered images: {len(full_samples)}")

Selected 10 classes: ['003.backpack', '033.cd', '041.coffee-mug', '047.computer-mouse', '067.eyeglasses', '101.head-phones', '117.ipod', '127.laptop-101', '235.umbrella-101', '240.watch-101']
Total filtered images: 1219


In [3]:
# Split indices: 70% train, 15% val, 15% test (stratified)
indices = list(range(len(full_samples)))
train_val_idx, test_idx = train_test_split(indices, test_size=0.15, stratify=full_targets, random_state=42)
train_val_targets = [full_targets[i] for i in train_val_idx]
train_idx, val_idx = train_test_split(train_val_idx, test_size=0.15 / 0.85, stratify=train_val_targets, random_state=42)

print(f"Train size: {len(train_idx)} ({len(train_idx)/len(indices)*100:.1f}%)")
print(f"Val size: {len(val_idx)} ({len(val_idx)/len(indices)*100:.1f}%)")
print(f"Test size: {len(test_idx)} ({len(test_idx)/len(indices)*100:.1f}%)")

# Create split samples (path, label)
train_samples = [(full_samples[i], full_targets[i]) for i in train_idx]
val_samples = [(full_samples[i], full_targets[i]) for i in val_idx]
test_samples = [(full_samples[i], full_targets[i]) for i in test_idx]

# Create datasets
train_dataset = CustomDataset(train_samples, train_transform)
val_dataset = CustomDataset(val_samples, test_transform)
test_dataset = CustomDataset(test_samples, test_transform)
noise_test_dataset = CustomDataset(test_samples, noise_transform)

# Loaders with increased batch size and num_workers for efficiency
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
noise_test_loader = DataLoader(noise_test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

Train size: 853 (70.0%)
Val size: 183 (15.0%)
Test size: 183 (15.0%)


In [5]:
# Function to get model by name (dynamic num_classes)
def get_model(model_name, num_classes):
    if model_name == 'resnet50':
        model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, num_classes)
        params_to_optimize = list(model.fc.parameters())
    elif model_name == 'vgg16':
        model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
        num_ftrs = model.classifier[6].in_features
        model.classifier[6] = nn.Linear(num_ftrs, num_classes)
        params_to_optimize = list(model.classifier[6].parameters())
    elif model_name == 'mobilenet_v3':
        model = models.mobilenet_v3_large(weights=models.MobileNet_V3_Large_Weights.IMAGENET1K_V1)
        num_ftrs = model.classifier[3].in_features
        model.classifier[3] = nn.Linear(num_ftrs, num_classes)
        params_to_optimize = list(model.classifier[3].parameters())
    else:
        raise ValueError("Unknown model name")
    
    # Freeze all layers except the classifier
    for param in model.parameters():
        param.requires_grad = False
    for param in params_to_optimize:
        param.requires_grad = True
    
    model = model.to(device)
    return model, params_to_optimize

# Training function with early stopping and validation (increased epochs/patience)
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=30, patience=5):
    best_val_loss = float('inf')
    patience_counter = 0
    train_losses = []
    val_losses = []
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        avg_train_loss = running_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        
        # Validate
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
        
        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break
    return train_losses, val_losses

In [6]:
# Evaluation function (unchanged, but handles more classes)
def evaluate_model(model, loader, classes, alpha=0.05, save_prefix=''):
    model.eval()
    all_labels = []
    all_preds = []
    all_probs = []
    inference_times = []
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            start_time = time.time()
            outputs = model(inputs)
            end_time = time.time()
            _, predicted = torch.max(outputs.data, 1)
            probs = torch.softmax(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            inference_times.append((end_time - start_time) / inputs.size(0))
    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)
    all_probs = np.array(all_probs)
    # Metrics
    accuracy = np.mean(all_preds == all_labels)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    avg_inference_time = np.mean(inference_times)
    # AUC
    bin_labels = label_binarize(all_labels, classes=range(len(classes)))
    auc_macro = roc_auc_score(bin_labels, all_probs, average='macro', multi_class='ovr')
    auc_per_class = roc_auc_score(bin_labels, all_probs, average=None, multi_class='ovr')
    # Confidence intervals (normal approximation)
    n = len(all_labels)
    def ci(metric):
        se = np.sqrt(metric * (1 - metric) / n if n > 0 else 0)
        h = se * stats.norm.ppf(1 - alpha/2)
        return h
    acc_ci = ci(accuracy) * 100
    prec_ci = ci(precision) * 100
    recall_ci = ci(recall) * 100
    f1_ci = ci(f1) * 100
    print(f"Accuracy: {accuracy*100:.2f}% ± {acc_ci:.2f}%")
    print(f"Precision: {precision*100:.2f}% ± {prec_ci:.2f}%")
    print(f"Recall: {recall*100:.2f}% ± {recall_ci:.2f}%")
    print(f"F1-score: {f1*100:.2f}% ± {f1_ci:.2f}%")
    print(f"Macro AUC: {auc_macro:.4f}")
    print(f"Per-class AUC: {', '.join([f'{classes[i]}: {auc_per_class[i]:.4f}' for i in range(len(classes))])}")
    print(f"Average Inference Time: {avg_inference_time:.4f}s per image")
    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10,8), dpi=300)  # Larger for more classes
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.savefig(f'{save_prefix}confusion_matrix_300dpi.png', dpi=300, bbox_inches='tight')
    plt.close()
    # Per-class metrics bar chart
    class_prec = precision_score(all_labels, all_preds, average=None)
    class_recall = recall_score(all_labels, all_preds, average=None)
    class_f1 = f1_score(all_labels, all_preds, average=None)
    x = np.arange(len(classes))
    width = 0.2
    plt.figure(figsize=(12,6), dpi=300)  # Larger for more classes
    plt.bar(x - width, class_prec*100, width, label='Precision')
    plt.bar(x, class_recall*100, width, label='Recall')
    plt.bar(x + width, class_f1*100, width, label='F1-score')
    plt.xticks(x, classes, rotation=45, ha='right')
    plt.ylabel('Score (%)')
    plt.title('Per-Class Metrics')
    plt.legend()
    plt.savefig(f'{save_prefix}per_class_metrics_300dpi.png', dpi=300, bbox_inches='tight')
    plt.close()
    # ROC curves
    plt.figure(figsize=(10,8), dpi=300)
    for i in range(len(classes)):
        fpr, tpr, _ = roc_curve(bin_labels[:, i], all_probs[:, i])
        plt.plot(fpr, tpr, label=f'{classes[i]} (AUC = {auc_per_class[i]:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curves')
    plt.legend(loc='lower right', fontsize='small')
    plt.savefig(f'{save_prefix}roc_curves_300dpi.png', dpi=300, bbox_inches='tight')
    plt.close()
    return accuracy, precision, recall, f1, auc_macro, avg_inference_time

In [7]:
# Function to run 5-fold CV and report mean ± std (updated for larger dataset)
def run_cv(model_name, train_transform, classes, num_classes, patience=5, epochs=30, save_prefix=''):
    print(f"\nRunning 5-fold CV for {model_name} with augmentation: {train_transform != test_transform}")
    # Prepare train_val data for CV
    train_val_samples = [(full_samples[i], full_targets[i]) for i in train_val_idx]
    train_val_targets_cv = [full_targets[i] for i in train_val_idx]  # For stratify
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    fold_accuracies = []
    fold_precisions = []
    fold_recalls = []
    fold_f1s = []
    fold_aucs = []
    for fold, (fold_train_idx, fold_val_idx) in enumerate(skf.split(range(len(train_val_idx)), train_val_targets_cv)):
        print(f"Fold {fold+1}")
        # Create fold datasets
        fold_train_samples = [train_val_samples[j] for j in fold_train_idx]
        fold_val_samples = [train_val_samples[j] for j in fold_val_idx]
        fold_train_dataset = CustomDataset(fold_train_samples, train_transform)
        fold_val_dataset = CustomDataset(fold_val_samples, test_transform)
        fold_train_loader = DataLoader(fold_train_dataset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)
        fold_val_loader = DataLoader(fold_val_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)
        # Get model
        model, params_to_optimize = get_model(model_name, num_classes)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(params_to_optimize, lr=0.001)
        # Train with early stopping
        train_losses, val_losses = train_model(model, fold_train_loader, fold_val_loader, criterion, optimizer, epochs, patience)
        # Plot learning curve for fold
        plt.figure(figsize=(8,6), dpi=300)
        plt.plot(train_losses, label='Train Loss')
        plt.plot(val_losses, label='Val Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title(f'Learning Curve - {model_name} Fold {fold+1}')
        plt.legend()
        plt.savefig(f'{save_prefix}{model_name}_fold{fold+1}_learning_curve_300dpi.png', dpi=300)
        plt.close()
        # Evaluate on fold val
        acc, prec, rec, f1, auc, _ = evaluate_model(model, fold_val_loader, classes, save_prefix=f'{save_prefix}{model_name}_fold{fold+1}_')
        fold_accuracies.append(acc)
        fold_precisions.append(prec)
        fold_recalls.append(rec)
        fold_f1s.append(f1)
        fold_aucs.append(auc)
    # Report mean ± std
    print("\n5-Fold CV Results:")
    print(f"Accuracy: {np.mean(fold_accuracies)*100:.2f}% ± {np.std(fold_accuracies)*100:.2f}%")
    print(f"Precision: {np.mean(fold_precisions)*100:.2f}% ± {np.std(fold_precisions)*100:.2f}%")
    print(f"Recall: {np.mean(fold_recalls)*100:.2f}% ± {np.std(fold_recalls)*100:.2f}%")
    print(f"F1-score: {np.mean(fold_f1s)*100:.2f}% ± {np.std(fold_f1s)*100:.2f}%")
    print(f"Macro AUC: {np.mean(fold_aucs):.4f} ± {np.std(fold_aucs):.4f}")

In [8]:
# Main execution
model_names = ['resnet50', 'vgg16', 'mobilenet_v3']

# Run CV for each model with augmentation
for model_name in model_names:
    run_cv(model_name, train_transform, classes, num_classes, save_prefix='aug_')

# Ablation: Without augmentation
for model_name in model_names:
    run_cv(model_name, no_aug_transform, classes, num_classes, save_prefix='noaug_')

# Final evaluation on holdout test (using best model, e.g., resnet50 trained on full train+val)
print("\nTraining final model (ResNet50) on full train+val and evaluating on test/noise test")
train_val_samples_full = [(full_samples[i], full_targets[i]) for i in train_val_idx]
train_val_dataset_full = CustomDataset(train_val_samples_full, train_transform)
train_val_loader_full = DataLoader(train_val_dataset_full, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)

model, params_to_optimize = get_model('resnet50', num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(params_to_optimize, lr=0.001)

# Train on full train_val (fixed epochs, increased to 20)
model.train()
for epoch in range(20):
    running_loss = 0.0
    for inputs, labels in train_val_loader_full:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Final Train Epoch {epoch+1}, Loss: {running_loss / len(train_val_loader_full):.4f}")

# Evaluate on test
print("\nHoldout Test Evaluation:")
evaluate_model(model, test_loader, classes, save_prefix='final_test_')

# Evaluate on noise test (robustness)
print("\nNoise Test Evaluation (Robustness to Gaussian Blur):")
evaluate_model(model, noise_test_loader, classes, save_prefix='noise_test_')

# List all generated PNG files at the end
print("\nGenerated PNG files:")
png_files = [f for f in os.listdir('.') if f.endswith('.png')]
print('\n'.join(sorted(png_files)))


Running 5-fold CV for resnet50 with augmentation: True
Fold 1


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 215MB/s]


Epoch 1/30, Train Loss: 1.7684, Val Loss: 0.9979
Epoch 2/30, Train Loss: 0.9255, Val Loss: 0.5717
Epoch 3/30, Train Loss: 0.5894, Val Loss: 0.4089
Epoch 4/30, Train Loss: 0.4292, Val Loss: 0.3083
Epoch 5/30, Train Loss: 0.3615, Val Loss: 0.2708
Epoch 6/30, Train Loss: 0.3296, Val Loss: 0.2315
Epoch 7/30, Train Loss: 0.2714, Val Loss: 0.2213
Epoch 8/30, Train Loss: 0.2506, Val Loss: 0.2109
Epoch 9/30, Train Loss: 0.2483, Val Loss: 0.1977
Epoch 10/30, Train Loss: 0.2223, Val Loss: 0.1776
Epoch 11/30, Train Loss: 0.2080, Val Loss: 0.1735
Epoch 12/30, Train Loss: 0.1915, Val Loss: 0.1861
Epoch 13/30, Train Loss: 0.1763, Val Loss: 0.1618
Epoch 14/30, Train Loss: 0.1783, Val Loss: 0.1662
Epoch 15/30, Train Loss: 0.1745, Val Loss: 0.1683
Epoch 16/30, Train Loss: 0.1599, Val Loss: 0.1562
Epoch 17/30, Train Loss: 0.1601, Val Loss: 0.1587
Epoch 18/30, Train Loss: 0.1482, Val Loss: 0.1507
Epoch 19/30, Train Loss: 0.1287, Val Loss: 0.1408
Epoch 20/30, Train Loss: 0.1351, Val Loss: 0.1412
Epoch 21/

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 210MB/s] 


Epoch 1/30, Train Loss: 1.2379, Val Loss: 0.3613
Epoch 2/30, Train Loss: 0.4445, Val Loss: 0.2772
Epoch 3/30, Train Loss: 0.3382, Val Loss: 0.2284
Epoch 4/30, Train Loss: 0.2723, Val Loss: 0.2131
Epoch 5/30, Train Loss: 0.2376, Val Loss: 0.2067
Epoch 6/30, Train Loss: 0.2105, Val Loss: 0.2172
Epoch 7/30, Train Loss: 0.2232, Val Loss: 0.2050
Epoch 8/30, Train Loss: 0.1794, Val Loss: 0.2188
Epoch 9/30, Train Loss: 0.1538, Val Loss: 0.2066
Epoch 10/30, Train Loss: 0.1744, Val Loss: 0.2095
Epoch 11/30, Train Loss: 0.1550, Val Loss: 0.1874
Epoch 12/30, Train Loss: 0.1307, Val Loss: 0.2080
Epoch 13/30, Train Loss: 0.1256, Val Loss: 0.1959
Epoch 14/30, Train Loss: 0.1365, Val Loss: 0.2003
Epoch 15/30, Train Loss: 0.1240, Val Loss: 0.1929
Epoch 16/30, Train Loss: 0.1344, Val Loss: 0.1902
Early stopping at epoch 16
Accuracy: 93.27% ± 3.41%
Precision: 93.39% ± 3.38%
Recall: 92.66% ± 3.54%
F1-score: 92.87% ± 3.50%
Macro AUC: 0.9976
Per-class AUC: 003.backpack: 0.9978, 033.cd: 0.9988, 041.coffee-m

Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-8738ca79.pth



5-Fold CV Results:
Accuracy: 94.60% ± 0.98%
Precision: 94.61% ± 0.85%
Recall: 94.33% ± 1.04%
F1-score: 94.33% ± 0.96%
Macro AUC: 0.9981 ± 0.0006

Running 5-fold CV for mobilenet_v3 with augmentation: True
Fold 1


100%|██████████| 21.1M/21.1M [00:00<00:00, 118MB/s]


Epoch 1/30, Train Loss: 1.7545, Val Loss: 0.9038
Epoch 2/30, Train Loss: 0.8822, Val Loss: 0.4563
Epoch 3/30, Train Loss: 0.5495, Val Loss: 0.3183
Epoch 4/30, Train Loss: 0.4117, Val Loss: 0.2650
Epoch 5/30, Train Loss: 0.3448, Val Loss: 0.2364
Epoch 6/30, Train Loss: 0.2802, Val Loss: 0.2165
Epoch 7/30, Train Loss: 0.2700, Val Loss: 0.2024
Epoch 8/30, Train Loss: 0.2326, Val Loss: 0.1907
Epoch 9/30, Train Loss: 0.2171, Val Loss: 0.1833
Epoch 10/30, Train Loss: 0.1859, Val Loss: 0.1793
Epoch 11/30, Train Loss: 0.1862, Val Loss: 0.1763
Epoch 12/30, Train Loss: 0.1639, Val Loss: 0.1722
Epoch 13/30, Train Loss: 0.1484, Val Loss: 0.1692
Epoch 14/30, Train Loss: 0.1344, Val Loss: 0.1653
Epoch 15/30, Train Loss: 0.1458, Val Loss: 0.1589
Epoch 16/30, Train Loss: 0.1254, Val Loss: 0.1568
Epoch 17/30, Train Loss: 0.1132, Val Loss: 0.1548
Epoch 18/30, Train Loss: 0.1113, Val Loss: 0.1522
Epoch 19/30, Train Loss: 0.1072, Val Loss: 0.1516
Epoch 20/30, Train Loss: 0.1028, Val Loss: 0.1511
Epoch 21/