In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/shoe-images/train_B.npz
/kaggle/input/shoe-images/test.npz
/kaggle/input/shoe-images/train_A.npz


In [2]:
import os, random, numpy as np
import torch
from torchvision import models
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import gc

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cuda


In [4]:
data_dir = "/kaggle/input/shoe-images"
data = np.load(os.path.join(data_dir, "train_B.npz"))  # or train_A.npz
X, y = data["X"], data["y"]
print("Loaded:", X.shape, y.shape)

# ---------- encode string labels to ints ----------
encoder = LabelEncoder()
y = encoder.fit_transform(y)            # e.g. Boot→0, Sandal→1, Shoe→2
print("Label mapping:", dict(zip(encoder.classes_,
                                 range(len(encoder.classes_)))))

# ---------- normalise images ----------
X = X.astype("float32") / 255.0

# ---------- reshape for PyTorch (N,C,H,W) ----------
X = np.transpose(X, (0,3,1,2))
y = y.astype("int64")
num_classes = len(np.unique(y))
print("Final tensors:", X.shape, "Classes:", num_classes)

Loaded: (12000, 224, 224, 3) (12000,)
Label mapping: {'Boot': 0, 'Sandal': 1, 'Shoe': 2}
Final tensors: (12000, 3, 224, 224) Classes: 3


In [9]:
def make_folds(n, k=10, seed=42, y=None):
    """
    Create stratified k-fold splits that preserve class distribution.
    
    Parameters:
    - n: Total number of samples
    - k: Number of folds (default=10)
    - seed: Random seed for reproducibility (default=42)
    - y: Labels for stratification (required)
    
    Returns:
    - List of k arrays containing indices for each fold
    """
    np.random.seed(seed)
    
    unique_classes = np.unique(y)
    folds = [[] for _ in range(k)]
    
    # For each class, split its samples across k folds
    for cls in unique_classes:
        cls_indices = np.where(y == cls)[0]
        np.random.shuffle(cls_indices)
        cls_splits = np.array_split(cls_indices, k)
        
        # Add class samples to each fold
        for fold_idx, split in enumerate(cls_splits):
            folds[fold_idx].extend(split)
    
    # Shuffle within each fold and convert to numpy arrays
    for i in range(k):
        np.random.shuffle(folds[i])
        folds[i] = np.array(folds[i])
    
    return folds

In [10]:
def confusion_matrix_manual(y_true, y_pred, labels):
    n = len(labels)
    label_to_idx = {lab: i for i, lab in enumerate(labels)}
    cm = np.zeros((n, n), dtype=int)
    for yt, yp in zip(y_true, y_pred):
        i = label_to_idx[yt]
        j = label_to_idx[yp]
        cm[i, j] += 1
    return cm

In [11]:
def calc_metrics(cm):
    TP = np.diag(cm)
    FP = cm.sum(0) - TP
    FN = cm.sum(1) - TP
    precision = np.mean(TP / (TP + FP + 1e-9))
    recall    = np.mean(TP / (TP + FN + 1e-9))
    f1 = 2 * precision * recall / (precision + recall + 1e-9)
    acc = TP.sum() / cm.sum()
    return acc, precision, recall, f1

In [12]:
def train_one_fold(X_train, y_train, X_val, y_val, model_builder,
                   lr=1e-3, epochs=5, batch=64, device="cpu"):
    """Train one fold and return model + predictions on validation set."""
    
    train_ds = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
    val_ds   = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))
    train_dl = DataLoader(train_ds, batch_size=batch, shuffle=True)
    val_dl   = DataLoader(val_ds, batch_size=batch, shuffle=False)

    # note: difference here — build model dynamically
    model = model_builder().to(device)
    loss_fn = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=lr)

    for ep in range(epochs):
        model.train()
        total_loss = 0.0
        for xb, yb in train_dl:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            loss = loss_fn(out, yb)
            opt.zero_grad()
            loss.backward()
            opt.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_dl)
        print(f"  Epoch {ep+1}/{epochs}, Train loss={avg_loss:.4f}")

    # ----- validation predictions -----
    model.eval()
    preds = []
    with torch.no_grad():
        for xb, _ in val_dl:
            xb = xb.to(device)
            probs = torch.softmax(model(xb), 1)
            preds.append(torch.argmax(probs, 1).cpu().numpy())
    preds = np.concatenate(preds)
    return model, preds

In [13]:
def evaluate_model_nested_cv(
    X, y, model_builder,
    candidate_lr=[1e-3, 3e-4, 1e-4],
    k_outer=10, k_inner=3, epochs=5,
    device="cpu"
):
    """Generic nested cross‑validation for any model."""

    folds = make_folds(len(X), k_outer, seed=42, y=y)
    metrics_all = []

    for i in range(k_outer):
        print(f"\n=== Outer Fold {i+1}/{k_outer} ===")

        test_idx = folds[i]
        train_idx = np.concatenate([folds[j] for j in range(k_outer) if j != i])
        X_train, y_train = X[train_idx], y[train_idx]
        X_test,  y_test  = X[test_idx],  y[test_idx]

        # ---- inner loop: tuning learning rate ----
        inner_folds = make_folds(len(X_train), k_inner, seed=42, y=y_train)
        mean_accs = []

        for lr in candidate_lr:
            inner_scores = []
            for j in range(k_inner):
                val_idx = inner_folds[j]
                tr_idx  = np.concatenate([inner_folds[m] for m in range(k_inner) if m != j])

                _, y_pred_val = train_one_fold(
                    X_train[tr_idx], y_train[tr_idx],
                    X_train[val_idx], y_train[val_idx],
                    model_builder=model_builder,
                    lr=lr, epochs=2, device=device
                )

                cm = confusion_matrix_manual(y_train[val_idx], y_pred_val, labels=np.unique(y))
                acc, prec, rec, f1 = calc_metrics(cm)
                inner_scores.append(acc)

            mean_accs.append(np.mean(inner_scores))

        best_lr = candidate_lr[int(np.argmax(mean_accs))]
        print(f"Best LR = {best_lr:.0e}")

        # ---- outer test fold ----
        model, y_pred = train_one_fold(
            X_train, y_train, X_test, y_test,
            model_builder=model_builder,
            lr=best_lr, epochs=epochs, device=device
        )

        cm = confusion_matrix_manual(y_test, y_pred, labels=np.unique(y))
        acc, prec, rec, f1 = calc_metrics(cm)
        metrics_all.append([acc, prec, rec, f1])

        print(f"Fold {i+1}: Acc={acc:.3f}, P={prec:.3f}, R={rec:.3f}, F1={f1:.3f}")

    # ---- summary ----
    metrics_all = np.array(metrics_all)
    mean, std = metrics_all.mean(0), metrics_all.std(0)

    print("\n=== Nested CV Results ===")
    print(f"Accuracy : {mean[0]:.3f} ± {std[0]:.3f}")
    print(f"Precision: {mean[1]:.3f} ± {std[1]:.3f}")
    print(f"Recall   : {mean[2]:.3f} ± {std[2]:.3f}")
    print(f"F1-score : {mean[3]:.3f} ± {std[3]:.3f}")

    return mean, std

In [14]:
class DeepCNN(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        # Feature extractor
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),  # 32 filters
            nn.ReLU(),
            nn.MaxPool2d(2),                 # Downsample

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.MaxPool2d(2)
        )

        self.flatten_dim = None
        self.classifier = None
        self.n_classes = n_classes

    def _get_flatten_dim(self, x):
        with torch.no_grad():
            f = self.features(x)
            return f.view(f.size(0), -1).shape[1]

    def forward(self, x):
        if self.classifier is None:
            flat_dim = self._get_flatten_dim(x)
            self.classifier = nn.Sequential(
                nn.Flatten(),
                nn.Linear(flat_dim, 128), nn.ReLU(),
                nn.Dropout(0.4),
                nn.Linear(128, self.n_classes)
            ).to(x.device)
        out = self.features(x)
        out = self.classifier(out)
        return out

In [11]:
# --- DeepCNN ---
print("\n### Evaluating DeepCNN ###")
deep_mean_B, deep_std_B = evaluate_model_nested_cv(
    X, y,
    model_builder=lambda: DeepCNN(num_classes),
    candidate_lr=[3e-3, 1e-3, 3e-4],
    k_outer=10,
    k_inner=3,
    epochs=3,
    device=device
)


### Evaluating DeepCNN ###

=== Outer Fold 1/10 ===
  Epoch 1/2, Train loss=0.8530
  Epoch 2/2, Train loss=0.6977
  Epoch 1/2, Train loss=0.8345
  Epoch 2/2, Train loss=0.6829
  Epoch 1/2, Train loss=0.8199
  Epoch 2/2, Train loss=0.6747
  Epoch 1/2, Train loss=0.8483
  Epoch 2/2, Train loss=0.6708
  Epoch 1/2, Train loss=0.8495
  Epoch 2/2, Train loss=0.6716
  Epoch 1/2, Train loss=0.8362
  Epoch 2/2, Train loss=0.6569
  Epoch 1/2, Train loss=0.9176
  Epoch 2/2, Train loss=0.7366
  Epoch 1/2, Train loss=0.9269
  Epoch 2/2, Train loss=0.7455
  Epoch 1/2, Train loss=0.9199
  Epoch 2/2, Train loss=0.7238
Best LR = 3e-04
  Epoch 1/3, Train loss=0.8641
  Epoch 2/3, Train loss=0.6765
  Epoch 3/3, Train loss=0.5997
Fold 1: Acc=0.787, P=0.791, R=0.787, F1=0.789

=== Outer Fold 2/10 ===
  Epoch 1/2, Train loss=0.8492
  Epoch 2/2, Train loss=0.7184
  Epoch 1/2, Train loss=0.8304
  Epoch 2/2, Train loss=0.6707
  Epoch 1/2, Train loss=0.8230
  Epoch 2/2, Train loss=0.6967
  Epoch 1/2, Train loss

In [None]:
import pickle

cv_results_B = {
    'mean': deep_mean_B,
    'std': deep_std_B,
    'accuracy': deep_mean_B[0],
    'precision': deep_mean_B[1],
    'recall': deep_mean_B[2],
    'f1': deep_mean_B[3]
}

with open('/kaggle/working/cv_results_trainB.pkl', 'wb') as f:
    pickle.dump(cv_results_B, f)

print("CV results saved!")

Training Final Model

In [7]:
def train_final_model(X, y, model_builder, lr=1e-3, epochs=5, batch=64, device="cpu"):
    """Train final model on full dataset (no validation split)."""
    
    train_ds = TensorDataset(torch.tensor(X), torch.tensor(y))
    train_dl = DataLoader(train_ds, batch_size=batch, shuffle=True)
    
    model = model_builder().to(device)
    loss_fn = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=lr)
    
    for ep in range(epochs):
        model.train()
        total_loss = 0.0
        for xb, yb in train_dl:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            loss = loss_fn(out, yb)
            opt.zero_grad()
            loss.backward()
            opt.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_dl)
        print(f"  Epoch {ep+1}/{epochs}, Train loss={avg_loss:.4f}")
    
    return model

In [15]:
# After nested CV
print("\n=== Training Final Model on Full Dataset ===")

final_model = train_final_model(
    X, y,
    model_builder=lambda: DeepCNN(num_classes),
    lr=1e-3,
    epochs=5,
    batch=64,
    device=device
)

torch.save(final_model.state_dict(), '/kaggle/working/deepcnn_final.pth')
print("Final model saved!")


=== Training Final Model on Full Dataset ===
  Epoch 1/5, Train loss=0.7666
  Epoch 2/5, Train loss=0.5881
  Epoch 3/5, Train loss=0.5281
  Epoch 4/5, Train loss=0.4872
  Epoch 5/5, Train loss=0.4560
Final model saved!


Testing Model

In [16]:
# ===== LOAD AND PREPARE TEST DATA =====
print("\n=== Loading Test Data ===")
test_data = np.load(os.path.join(data_dir, "test.npz"))
X_test, y_test = test_data["X"], test_data["y"]
print("Loaded test data:", X_test.shape, y_test.shape)

# Encode labels (same encoder from training)
y_test = encoder.transform(y_test)

# Normalize images
X_test = X_test.astype("float32") / 255.0

# Reshape for PyTorch (N,C,H,W)
X_test = np.transpose(X_test, (0,3,1,2))
y_test = y_test.astype("int64")

print("Test data prepared:", X_test.shape)

# ===== MAKE PREDICTIONS =====
print("\n=== Evaluating Final Model on Test Set ===")

test_ds = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))
test_dl = DataLoader(test_ds, batch_size=64, shuffle=False)

final_model.eval()
y_pred_test = []

with torch.no_grad():
    for xb, _ in test_dl:
        xb = xb.to(device)
        probs = torch.softmax(final_model(xb), 1)
        y_pred_test.append(torch.argmax(probs, 1).cpu().numpy())

y_pred_test = np.concatenate(y_pred_test)

# ===== CALCULATE METRICS =====
cm_test = confusion_matrix_manual(y_test, y_pred_test, labels=np.unique(y_test))
acc_test, prec_test, rec_test, f1_test = calc_metrics(cm_test)

print(f"\n=== Test Set Results ===")
print(f"Accuracy : {acc_test:.3f}")
print(f"Precision: {prec_test:.3f}")
print(f"Recall   : {rec_test:.3f}")
print(f"F1-score : {f1_test:.3f}")

print(f"\nConfusion Matrix:")
print(cm_test)
print(f"Classes: {encoder.classes_}")

# ===== COMPARISON WITH CV =====
print(f"\n=== Performance Comparison ===")
print(f"CV Performance:   {0.808:.3f} ± {0.024:.3f}")
print(f"Test Performance: {acc_test:.3f}")
print(f"Difference:       {acc_test - 0.808:.3f}")


=== Loading Test Data ===
Loaded test data: (3000, 224, 224, 3) (3000,)
Test data prepared: (3000, 3, 224, 224)

=== Evaluating Final Model on Test Set ===

=== Test Set Results ===
Accuracy : 0.856
Precision: 0.860
Recall   : 0.856
F1-score : 0.858

Confusion Matrix:
[[874  67  59]
 [ 25 866 109]
 [ 22 150 828]]
Classes: ['Boot' 'Sandal' 'Shoe']

=== Performance Comparison ===
CV Performance:   0.808 ± 0.024
Test Performance: 0.856
Difference:       0.048


### Repeating the above, but this time using train_A (which is the image set with only clean images)

In [4]:
data_dir = "/kaggle/input/shoe-images"
data = np.load(os.path.join(data_dir, "train_A.npz"))  #changed to train_A.npz
X, y = data["X"], data["y"]
print("Loaded:", X.shape, y.shape)

# ---------- encode string labels to ints ----------
encoder = LabelEncoder()
y = encoder.fit_transform(y)            # e.g. Boot→0, Sandal→1, Shoe→2
print("Label mapping:", dict(zip(encoder.classes_,
                                 range(len(encoder.classes_)))))

# ---------- normalise images ----------
X = X.astype("float32") / 255.0

# ---------- reshape for PyTorch (N,C,H,W) ----------
X = np.transpose(X, (0,3,1,2))
y = y.astype("int64")
num_classes = len(np.unique(y))
print("Final tensors:", X.shape, "Classes:", num_classes)

Loaded: (12000, 224, 224, 3) (12000,)
Label mapping: {'Boot': 0, 'Sandal': 1, 'Shoe': 2}
Final tensors: (12000, 3, 224, 224) Classes: 3


In [11]:
# --- DeepCNN --- 
print("\n### Evaluating DeepCNN ###")
deep_mean_A, deep_std_A = evaluate_model_nested_cv(
    X, y,
    model_builder=lambda: DeepCNN(num_classes),
    candidate_lr=[3e-3, 1e-3, 3e-4],
    k_outer=10,
    k_inner=3,
    epochs=3,
    device=device
)


### Evaluating DeepCNN ###

=== Outer Fold 1/10 ===
  Epoch 1/2, Train loss=0.5666
  Epoch 2/2, Train loss=0.4016
  Epoch 1/2, Train loss=0.6100
  Epoch 2/2, Train loss=0.4277
  Epoch 1/2, Train loss=0.5683
  Epoch 2/2, Train loss=0.3877
  Epoch 1/2, Train loss=0.5922
  Epoch 2/2, Train loss=0.3824
  Epoch 1/2, Train loss=0.6348
  Epoch 2/2, Train loss=0.4321
  Epoch 1/2, Train loss=0.6062
  Epoch 2/2, Train loss=0.4086
  Epoch 1/2, Train loss=0.7221
  Epoch 2/2, Train loss=0.4885
  Epoch 1/2, Train loss=0.7342
  Epoch 2/2, Train loss=0.4893
  Epoch 1/2, Train loss=0.7527
  Epoch 2/2, Train loss=0.5159
Best LR = 1e-03
  Epoch 1/3, Train loss=0.5930
  Epoch 2/3, Train loss=0.3864
  Epoch 3/3, Train loss=0.3236
Fold 1: Acc=0.929, P=0.929, R=0.929, F1=0.929

=== Outer Fold 2/10 ===
  Epoch 1/2, Train loss=0.5528
  Epoch 2/2, Train loss=0.4022
  Epoch 1/2, Train loss=0.5897
  Epoch 2/2, Train loss=0.4258
  Epoch 1/2, Train loss=0.5813
  Epoch 2/2, Train loss=0.4219
  Epoch 1/2, Train loss

In [12]:
import pickle

cv_results_A = {
    'mean': deep_mean_A,
    'std': deep_std_A,
    'accuracy': deep_mean_A[0],
    'precision': deep_mean_A[1],
    'recall': deep_mean_A[2],
    'f1': deep_mean_A[3]
}

with open('/kaggle/working/cv_results_trainA.pkl', 'wb') as f:
    pickle.dump(cv_results_A, f)

print("CV results saved!")

CV results saved!


In [15]:
# After nested CV
print("\n=== Training Final Model on Full Dataset (Clean Images) ===")

final_model_clean = train_final_model(
    X, y,
    model_builder=lambda: DeepCNN(num_classes),
    lr=1e-3,
    epochs=5,
    batch=64,
    device=device
)

torch.save(final_model_clean.state_dict(), '/kaggle/working/deepcnn_final_clean.pth')
print("Final model saved!")


=== Training Final Model on Full Dataset (Clean Images) ===
  Epoch 1/5, Train loss=0.5589
  Epoch 2/5, Train loss=0.3616
  Epoch 3/5, Train loss=0.3032
  Epoch 4/5, Train loss=0.2716
  Epoch 5/5, Train loss=0.2443
Final model saved!


In [19]:
# ===== LOAD AND PREPARE TEST DATA =====
print("\n=== Loading Test Data (for Train_A Model) ===")
test_data = np.load(os.path.join(data_dir, "test.npz"))
X_test, y_test = test_data["X"], test_data["y"]
print("Loaded test data:", X_test.shape, y_test.shape)

# Encode labels (use encoder from train_A)
y_test = encoder.transform(y_test)  # Make sure this is the encoder fitted on train_A

# Normalize images
X_test = X_test.astype("float32") / 255.0

# Reshape for PyTorch (N,C,H,W)
X_test = np.transpose(X_test, (0,3,1,2))
y_test = y_test.astype("int64")

print("Test data prepared:", X_test.shape)

# ===== MAKE PREDICTIONS =====
print("\n=== Evaluating Train_A Model on Test Set ===")

test_ds = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))
test_dl = DataLoader(test_ds, batch_size=64, shuffle=False)

final_model_clean.eval()  # ← CHANGED: Use train_A model
y_pred_test = []

with torch.no_grad():
    for xb, _ in test_dl:
        xb = xb.to(device)
        probs = torch.softmax(final_model_clean(xb), 1)  # ← CHANGED
        y_pred_test.append(torch.argmax(probs, 1).cpu().numpy())

y_pred_test = np.concatenate(y_pred_test)

# ===== CALCULATE METRICS =====
cm_test = confusion_matrix_manual(y_test, y_pred_test, labels=np.unique(y_test))
acc_test, prec_test, rec_test, f1_test = calc_metrics(cm_test)

print(f"\n=== Test Set Results (Train_A Model - Clean Training) ===")
print(f"Accuracy : {acc_test:.3f}")
print(f"Precision: {prec_test:.3f}")
print(f"Recall   : {rec_test:.3f}")
print(f"F1-score : {f1_test:.3f}")

print(f"\nConfusion Matrix:")
print(cm_test)
print(f"Classes: {encoder.classes_}")

# ===== COMPARISON WITH CV =====

# Load the saved CV results
import pickle
with open('/kaggle/working/cv_results_trainA.pkl', 'rb') as f:
    cv_results_A = pickle.load(f)

# Extract the variables you need
deep_mean_A = cv_results_A['mean']
deep_std_A = cv_results_A['std']

print(f"\n=== Performance Comparison (Train_A) ===")
print(f"CV Performance:   {deep_mean_A[0]:.3f} ± {deep_std_A[0]:.3f}")  # ← Use train_A CV results
print(f"Test Performance: {acc_test:.3f}")
print(f"Difference:       {acc_test - deep_mean_A[0]:.3f}")


=== Loading Test Data (for Train_A Model) ===
Loaded test data: (3000, 224, 224, 3) (3000,)
Test data prepared: (3000, 3, 224, 224)

=== Evaluating Train_A Model on Test Set ===

=== Test Set Results (Train_A Model - Clean Training) ===
Accuracy : 0.696
Precision: 0.711
Recall   : 0.696
F1-score : 0.704

Confusion Matrix:
[[661 251  88]
 [100 764 136]
 [ 84 252 664]]
Classes: ['Boot' 'Sandal' 'Shoe']

=== Performance Comparison (Train_A) ===
CV Performance:   0.907 ± 0.017
Test Performance: 0.696
Difference:       -0.211
