In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/shoe-images/train_B.npz
/kaggle/input/shoe-images/test.npz
/kaggle/input/shoe-images/train_A.npz


In [4]:
import os, random, numpy as np
import torch
from torchvision import models
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import gc

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cuda


In [6]:
data_dir = "/kaggle/input/shoe-images"
data = np.load(os.path.join(data_dir, "train_B.npz"))  # or train_A.npz
X, y = data["X"], data["y"]
print("Loaded:", X.shape, y.shape)

# ---------- encode string labels to ints ----------
encoder = LabelEncoder()
y = encoder.fit_transform(y)            # e.g. Boot→0, Sandal→1, Shoe→2
print("Label mapping:", dict(zip(encoder.classes_,
                                 range(len(encoder.classes_)))))

# ---------- normalise images ----------
X = X.astype("float32") / 255.0

# ---------- reshape for PyTorch (N,C,H,W) ----------
X = np.transpose(X, (0,3,1,2))
y = y.astype("int64")
num_classes = len(np.unique(y))
print("Final tensors:", X.shape, "Classes:", num_classes)

Loaded: (12000, 224, 224, 3) (12000,)
Label mapping: {'Boot': 0, 'Sandal': 1, 'Shoe': 2}
Final tensors: (12000, 3, 224, 224) Classes: 3


In [7]:
def make_folds(n, k=10, seed=42, y=None):
    """
    Create stratified k-fold splits that preserve class distribution.
    
    Parameters:
    - n: Total number of samples
    - k: Number of folds (default=10)
    - seed: Random seed for reproducibility (default=42)
    - y: Labels for stratification (required)
    
    Returns:
    - List of k arrays containing indices for each fold
    """
    np.random.seed(seed)
    
    unique_classes = np.unique(y)
    folds = [[] for _ in range(k)]
    
    # For each class, split its samples across k folds
    for cls in unique_classes:
        cls_indices = np.where(y == cls)[0]
        np.random.shuffle(cls_indices)
        cls_splits = np.array_split(cls_indices, k)
        
        # Add class samples to each fold
        for fold_idx, split in enumerate(cls_splits):
            folds[fold_idx].extend(split)
    
    # Shuffle within each fold and convert to numpy arrays
    for i in range(k):
        np.random.shuffle(folds[i])
        folds[i] = np.array(folds[i])
    
    return folds

In [8]:
def confusion_matrix_manual(y_true, y_pred, labels):
    n = len(labels)
    label_to_idx = {lab: i for i, lab in enumerate(labels)}
    cm = np.zeros((n, n), dtype=int)
    for yt, yp in zip(y_true, y_pred):
        i = label_to_idx[yt]
        j = label_to_idx[yp]
        cm[i, j] += 1
    return cm

In [9]:
def calc_metrics(cm):
    TP = np.diag(cm)
    FP = cm.sum(0) - TP
    FN = cm.sum(1) - TP
    precision = np.mean(TP / (TP + FP + 1e-9))
    recall    = np.mean(TP / (TP + FN + 1e-9))
    f1 = 2 * precision * recall / (precision + recall + 1e-9)
    acc = TP.sum() / cm.sum()
    return acc, precision, recall, f1

In [10]:
def train_one_fold(X_train, y_train, X_val, y_val, model_builder,
                   lr=1e-3, epochs=5, batch=64, device="cpu"):
    """Train one fold and return model + predictions on validation set."""
    
    train_ds = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
    val_ds   = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))
    train_dl = DataLoader(train_ds, batch_size=batch, shuffle=True)
    val_dl   = DataLoader(val_ds, batch_size=batch, shuffle=False)

    # note: difference here — build model dynamically
    model = model_builder().to(device)
    loss_fn = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=lr)

    for ep in range(epochs):
        model.train()
        total_loss = 0.0
        for xb, yb in train_dl:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            loss = loss_fn(out, yb)
            opt.zero_grad()
            loss.backward()
            opt.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_dl)
        print(f"  Epoch {ep+1}/{epochs}, Train loss={avg_loss:.4f}")

    # ----- validation predictions -----
    model.eval()
    preds = []
    with torch.no_grad():
        for xb, _ in val_dl:
            xb = xb.to(device)
            probs = torch.softmax(model(xb), 1)
            preds.append(torch.argmax(probs, 1).cpu().numpy())
    preds = np.concatenate(preds)
    return model, preds

In [11]:
def evaluate_model_nested_cv(
    X, y, model_builder,
    candidate_lr=[1e-3, 3e-4, 1e-4],
    k_outer=10, k_inner=3, epochs=5,
    device="cpu"
):
    """Generic nested cross‑validation for any model."""

    folds = make_folds(len(X), k_outer, seed=42, y=y)
    metrics_all = []

    for i in range(k_outer):
        print(f"\n=== Outer Fold {i+1}/{k_outer} ===")

        test_idx = folds[i]
        train_idx = np.concatenate([folds[j] for j in range(k_outer) if j != i])
        X_train, y_train = X[train_idx], y[train_idx]
        X_test,  y_test  = X[test_idx],  y[test_idx]

        # ---- inner loop: tuning learning rate ----
        inner_folds = make_folds(len(X_train), k_inner, seed=42, y=y_train)
        mean_accs = []

        for lr in candidate_lr:
            inner_scores = []
            for j in range(k_inner):
                val_idx = inner_folds[j]
                tr_idx  = np.concatenate([inner_folds[m] for m in range(k_inner) if m != j])

                _, y_pred_val = train_one_fold(
                    X_train[tr_idx], y_train[tr_idx],
                    X_train[val_idx], y_train[val_idx],
                    model_builder=model_builder,
                    lr=lr, epochs=2, device=device
                )

                cm = confusion_matrix_manual(y_train[val_idx], y_pred_val, labels=np.unique(y))
                acc, prec, rec, f1 = calc_metrics(cm)
                inner_scores.append(acc)

            mean_accs.append(np.mean(inner_scores))

        best_lr = candidate_lr[int(np.argmax(mean_accs))]
        print(f"Best LR = {best_lr:.0e}")

        # ---- outer test fold ----
        model, y_pred = train_one_fold(
            X_train, y_train, X_test, y_test,
            model_builder=model_builder,
            lr=best_lr, epochs=epochs, device=device
        )

        cm = confusion_matrix_manual(y_test, y_pred, labels=np.unique(y))
        acc, prec, rec, f1 = calc_metrics(cm)
        metrics_all.append([acc, prec, rec, f1])

        print(f"Fold {i+1}: Acc={acc:.3f}, P={prec:.3f}, R={rec:.3f}, F1={f1:.3f}")

    # ---- summary ----
    metrics_all = np.array(metrics_all)
    mean, std = metrics_all.mean(0), metrics_all.std(0)

    print("\n=== Nested CV Results ===")
    print(f"Accuracy : {mean[0]:.3f} ± {std[0]:.3f}")
    print(f"Precision: {mean[1]:.3f} ± {std[1]:.3f}")
    print(f"Recall   : {mean[2]:.3f} ± {std[2]:.3f}")
    print(f"F1-score : {mean[3]:.3f} ± {std[3]:.3f}")

    return mean, std

In [12]:
class DeepCNN(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        # Feature extractor
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),  # 32 filters
            nn.ReLU(),
            nn.MaxPool2d(2),                 # Downsample

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.MaxPool2d(2)
        )

        self.flatten_dim = None
        self.classifier = None
        self.n_classes = n_classes

    def _get_flatten_dim(self, x):
        with torch.no_grad():
            f = self.features(x)
            return f.view(f.size(0), -1).shape[1]

    def forward(self, x):
        if self.classifier is None:
            flat_dim = self._get_flatten_dim(x)
            self.classifier = nn.Sequential(
                nn.Flatten(),
                nn.Linear(flat_dim, 128), nn.ReLU(),
                nn.Dropout(0.4),
                nn.Linear(128, self.n_classes)
            ).to(x.device)
        out = self.features(x)
        out = self.classifier(out)
        return out

In [None]:
# --- DeepCNN ---
print("\n### Evaluating DeepCNN ###")
deep_mean, deep_std = evaluate_model_nested_cv(
    X, y,
    model_builder=lambda: DeepCNN(num_classes),
    candidate_lr=[1e-3, 3e-4, 1e-4],
    k_outer=10,
    k_inner=3,
    epochs=3,
    device=device
)


### Evaluating DeepCNN ###

=== Outer Fold 1/10 ===
  Epoch 1/2, Train loss=0.8032
  Epoch 2/2, Train loss=0.6589
  Epoch 1/2, Train loss=0.8462
  Epoch 2/2, Train loss=0.6724
  Epoch 1/2, Train loss=0.8590
  Epoch 2/2, Train loss=0.6707
  Epoch 1/2, Train loss=0.9411
  Epoch 2/2, Train loss=0.7641
  Epoch 1/2, Train loss=0.9211
  Epoch 2/2, Train loss=0.7310
  Epoch 1/2, Train loss=0.9102
  Epoch 2/2, Train loss=0.7200
  Epoch 1/2, Train loss=1.0020
  Epoch 2/2, Train loss=0.8750
  Epoch 1/2, Train loss=1.0085
  Epoch 2/2, Train loss=0.8685
  Epoch 1/2, Train loss=1.0003
  Epoch 2/2, Train loss=0.8759
Best LR = 1e-03
  Epoch 1/3, Train loss=0.7936
  Epoch 2/3, Train loss=0.6336
  Epoch 3/3, Train loss=0.5724
Fold 1: Acc=0.816, P=0.822, R=0.816, F1=0.819

=== Outer Fold 2/10 ===
  Epoch 1/2, Train loss=0.8975
  Epoch 2/2, Train loss=0.7105
  Epoch 1/2, Train loss=0.8501
  Epoch 2/2, Train loss=0.6593
  Epoch 1/2, Train loss=0.8332
