In [None]:
import os
import pickle
import numpy as np
import pandas as pd

from glob import glob
from datetime import datetime
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from torchvision import transforms

from PIL import Image

In [None]:
def get_cifar10_datasets(root_dir="cifar-10-batches-py"):
    train_blocks = []
    train_targets = []

    # ---- Load training batches ----
    for batch_id in range(1, 6):
        file_path = os.path.join(root_dir, f"data_batch_{batch_id}")

        with open(file_path, "rb") as handle:
            content = pickle.load(handle, encoding="bytes")

        train_blocks.append(content[b"data"])
        train_targets.extend(content[b"labels"])

    train_array = np.concatenate(train_blocks, axis=0).astype(np.float32)
    train_array /= 255.0
    train_targets = np.array(train_targets)

    # ---- Load test batch ----
    test_file = os.path.join(root_dir, "test_batch")

    with open(test_file, "rb") as handle:
        test_content = pickle.load(handle, encoding="bytes")

    test_array = test_content[b"data"].astype(np.float32) / 255.0
    test_targets = np.array(test_content[b"labels"])

    # ---- Reshape image format ----
    train_array = train_array.reshape(-1, 3, 32, 32)
    test_array = test_array.reshape(-1, 3, 32, 32)

    # ---- Convert to tensors ----
    train_tensor = torch.from_numpy(train_array)
    train_labels = torch.tensor(train_targets, dtype=torch.long)

    test_tensor = torch.from_numpy(test_array)
    test_labels = torch.tensor(test_targets, dtype=torch.long)

    # ---- Dataset creation ----
    full_train_dataset = TensorDataset(train_tensor, train_labels)
    test_dataset = TensorDataset(test_tensor, test_labels)

    split_index = int(len(full_train_dataset) * 0.8)
    train_dataset, validation_dataset = random_split(
        full_train_dataset,
        [split_index, len(full_train_dataset) - split_index]
    )

    return train_dataset, validation_dataset, test_dataset


def get_catdog_datasets(data_dir="dogs-vs-cats/train", target_size=(64, 64)):
    file_list = glob(os.path.join(data_dir, "*.jpg"))

    processed_images = []
    class_ids = []

    preprocessing = transforms.Compose([
        transforms.Resize(target_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5),
                             (0.5, 0.5, 0.5))
    ])

    for img_path in file_list:
        img_name = os.path.basename(img_path).lower()

        if "cat" in img_name:
            label_value = 0
        elif "dog" in img_name:
            label_value = 1
        else:
            continue

        image = Image.open(img_path).convert("RGB")
        tensor_img = preprocessing(image)

        processed_images.append(tensor_img)
        class_ids.append(label_value)

    image_stack = torch.stack(processed_images)
    label_tensor = torch.tensor(class_ids, dtype=torch.long)

    dataset = TensorDataset(image_stack, label_tensor)

    cutoff = int(0.8 * len(dataset))
    train_set, val_set = random_split(
        dataset,
        [cutoff, len(dataset) - cutoff]
    )

    return train_set, val_set


In [None]:
class ConvClassifier(nn.Module):
    def __init__(self, classes=10, activation_type="relu",
                 in_shape=(3, 32, 32)):
        super().__init__()

        self.activation = self._build_activation(activation_type)

        # ----- convolutional feature extractor -----
        self.block1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32)
        )

        self.block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64)
        )

        self.block3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128)
        )

        self.downsample = nn.MaxPool2d(2, 2)

        flattened = self._infer_flatten_dim(in_shape)

        # ----- classifier head -----
        self.hidden = nn.Linear(flattened, 256)
        self.regularizer = nn.Dropout(0.5)
        self.output = nn.Linear(256, classes)

    def _build_activation(self, name):
        name = name.lower()

        activation_map = {
            "relu": nn.ReLU(),
            "tanh": nn.Tanh(),
            "leaky_relu": nn.LeakyReLU()
        }

        if name not in activation_map:
            raise ValueError("Invalid activation")

        return activation_map[name]

    def _infer_flatten_dim(self, shape):
        with torch.no_grad():
            dummy = torch.zeros(1, *shape)

            dummy = self.downsample(
                self.activation(self.block1(dummy))
            )
            dummy = self.downsample(
                self.activation(self.block2(dummy))
            )
            dummy = self.downsample(
                self.activation(self.block3(dummy))
            )

            return dummy.numel()

    def _feature_pass(self, x):
        x = self.downsample(self.activation(self.block1(x)))
        x = self.downsample(self.activation(self.block2(x)))
        x = self.downsample(self.activation(self.block3(x)))
        return x

    def forward(self, x):
        x = self._feature_pass(x)
        x = torch.flatten(x, start_dim=1)
        x = self.regularizer(self.activation(self.hidden(x)))
        return self.output(x)


# -------------------------------------------------

def apply_weight_initialization(net, scheme="xavier"):
    scheme = scheme.lower()

    for layer in net.modules():
        if isinstance(layer, (nn.Conv2d, nn.Linear)):

            if scheme == "xavier":
                nn.init.xavier_uniform_(layer.weight)

            elif scheme == "kaiming":
                nn.init.kaiming_uniform_(
                    layer.weight,
                    nonlinearity="relu"
                )

            elif scheme == "random":
                nn.init.normal_(layer.weight, mean=0.0, std=0.05)

            if layer.bias is not None:
                nn.init.constant_(layer.bias, 0.0)


# -------------------------------------------------

def build_optimizer(net, optimizer_name="adam", lr=1e-3):
    optimizer_name = optimizer_name.lower()

    options = {
        "sgd": lambda: optim.SGD(
            net.parameters(),
            lr=lr,
            momentum=0.9
        ),
        "adam": lambda: optim.Adam(
            net.parameters(),
            lr=lr
        ),
        "rmsprop": lambda: optim.RMSprop(
            net.parameters(),
            lr=lr
        )
    }

    if optimizer_name not in options:
        raise ValueError("Invalid optimizer")

    return options[optimizer_name]()


In [None]:
def run_training_loop(net, train_dl, val_dl, opt, loss_fn,
                      num_epochs=10,
                      dataset_tag="default",
                      tag="default",
                      device="cuda"):

    save_dir = os.path.join("models", dataset_tag)
    os.makedirs(save_dir, exist_ok=True)

    net.to(device)
    highest_acc = 0.0

    for _ in tqdm(range(num_epochs)):

        # ---------- training phase ----------
        net.train()
        epoch_loss = 0.0

        for batch_data in train_dl:
            x, y = batch_data[:2]
            x, y = x.to(device), y.to(device)

            opt.zero_grad()

            preds = net(x)
            loss = loss_fn(preds, y)

            loss.backward()
            opt.step()

            epoch_loss += loss.item()

        # ---------- validation phase ----------
        net.eval()
        correct_preds = 0
        sample_count = 0

        with torch.no_grad():
            for batch_data in val_dl:
                x, y = batch_data[:2]
                x, y = x.to(device), y.to(device)

                logits = net(x)
                _, pred_labels = torch.max(logits, dim=1)

                sample_count += y.size(0)
                correct_preds += (pred_labels == y).sum().item()

        accuracy = (100.0 * correct_preds / sample_count) if sample_count else 0.0

        if accuracy > highest_acc:
            highest_acc = accuracy
            best_path = os.path.join(
                save_dir,
                f"model_{tag}_best.pth"
            )
            torch.save(net.state_dict(), best_path)

    print(
        f"Best Validation Accuracy for {tag}: {highest_acc:.2f}% "
        f"\nFinished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
    )

    final_path = os.path.join(save_dir, f"model_{tag}_final.pth")
    torch.save(net.state_dict(), final_path)


# ------------------------------------------------------------


def benchmark_saved_models(model_ctor,
                           experiment_configs,
                           loaders,
                           device="mps",
                           output_csv="results.csv"):

    summary = []
    loss_fn = nn.CrossEntropyLoss()

    for cfg in experiment_configs:

        ds_name = cfg["dataset"]
        validation_dl = loaders[ds_name]["val"]

        model = model_ctor(
            num_classes=cfg["num_classes"],
            activation=cfg["activation"],
            input_shape=cfg["input_shape"]
        ).to(device)

        weight_file = os.path.join(
            "models",
            ds_name,
            f"model_{cfg['activation']}_{cfg['init']}_{cfg['optimizer']}_best.pth"
        )

        if not os.path.exists(weight_file):
            print(f"Model file not found: {weight_file}, skipping...")
            continue

        model.load_state_dict(
            torch.load(weight_file, map_location=device)
        )
        model.eval()

        total_loss = 0.0
        total_correct = 0
        total_samples = 0

        with torch.no_grad():
            for batch_data in validation_dl:
                x, y = batch_data[:2]
                x, y = x.to(device), y.to(device)

                logits = model(x)
                batch_loss = loss_fn(logits, y)

                total_loss += batch_loss.item()

                _, pred = torch.max(logits, dim=1)
                total_samples += y.size(0)
                total_correct += (pred == y).sum().item()

        mean_loss = total_loss / len(validation_dl)
        acc = 100.0 * total_correct / total_samples

        dataset_label = (
            "Cifar-10" if ds_name == "cifar"
            else "Dogs vs Cats"
        )

        summary.append({
            "dataset": dataset_label,
            "activation": cfg["activation"],
            "init": cfg["init"],
            "optimizer": cfg["optimizer"],
            "accuracy": acc,
            "val_loss": mean_loss
        })

        print(
            f"[{ds_name}] Config: "
            f"{cfg['activation']}_{cfg['init']}_{cfg['optimizer']} | "
            f"Accuracy: {acc:.2f}% | Loss: {mean_loss:.4f}"
        )

    df = pd.DataFrame(summary)
    df.to_csv(output_csv, index=False)

    print(f"Results saved to {output_csv}")


In [None]:
# ---------- dataset preparation ----------

cifar_train_set, cifar_val_set, cifar_test_set = get_cifar10_datasets()

cifar_train_dl = DataLoader(
    cifar_train_set,
    batch_size=64,
    shuffle=True
)

cifar_val_dl = DataLoader(
    cifar_val_set,
    batch_size=64,
    shuffle=False
)


catdog_train_set, catdog_val_set = get_catdog_datasets()

catdog_train_dl = DataLoader(
    catdog_train_set,
    batch_size=64,
    shuffle=True
)

catdog_val_dl = DataLoader(
    catdog_val_set,
    batch_size=64,
    shuffle=False
)


# ---------- experiment configuration ----------

activation_choices = [
    "relu",
    "tanh",
    "leaky_relu"
]

initialization_methods = [
    "xavier",
    "kaiming",
    "random"
]

optimizer_choices = [
    "sgd",
    "adam",
    "rmsprop"
]


# ---------- compute device selection ----------

if torch.backends.mps.is_available():
    compute_device = "mps"
elif torch.cuda.is_available():
    compute_device = "cuda"
else:
    compute_device = "cpu"

print(f"Using device: {compute_device}")


  batch = pickle.load(f, encoding='bytes')
  batch = pickle.load(f, encoding='bytes')


Using device: mps


In [None]:
for activation_name in activation_choices:
    for init_scheme in initialization_methods:
        for optimizer_name in optimizer_choices:
            for dataset_id in ["cifar", "dvc"]:

                run_tag = f"{activation_name}_{init_scheme}_{optimizer_name}"

                checkpoint_path = os.path.join(
                    "models",
                    dataset_id,
                    f"model_{run_tag}_best.pth"
                )

                if os.path.exists(checkpoint_path):
                    print(
                        f"✅ Model already trained: {dataset_id} "
                        f"with config {run_tag}, skipping..."
                    )
                    continue

                print(
                    f"\n✅ Running config: {run_tag} on {dataset_id}\n"
                    f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
                )

                if dataset_id == "cifar":
                    class_count = 10
                    train_dl = cifar_train_dl
                    val_dl = cifar_val_dl
                    input_dims = (3, 32, 32)
                else:
                    class_count = 2
                    train_dl = catdog_train_dl
                    val_dl = catdog_val_dl
                    input_dims = (3, 64, 64)

                net = ConvClassifier(
                    classes=class_count,
                    activation_type=activation_name,
                    in_shape=input_dims
                )

                apply_weight_initialization(
                    net,
                    scheme=init_scheme
                )

                optimizer = build_optimizer(
                    net,
                    optimizer_name=optimizer_name,
                    lr=1e-3
                )

                loss_function = nn.CrossEntropyLoss()

                run_training_loop(
                    net,
                    train_dl,
                    val_dl,
                    optimizer,
                    loss_function,
                    num_epochs=10,
                    dataset_tag=dataset_id,
                    tag=run_tag,
                    device=compute_device
                )


✅ Model already trained: cifar with config relu_xavier_sgd, skipping...
✅ Model already trained: dvc with config relu_xavier_sgd, skipping...
✅ Model already trained: cifar with config relu_xavier_adam, skipping...
✅ Model already trained: dvc with config relu_xavier_adam, skipping...
✅ Model already trained: cifar with config relu_xavier_rmsprop, skipping...
✅ Model already trained: dvc with config relu_xavier_rmsprop, skipping...
✅ Model already trained: cifar with config relu_kaiming_sgd, skipping...
✅ Model already trained: dvc with config relu_kaiming_sgd, skipping...
✅ Model already trained: cifar with config relu_kaiming_adam, skipping...
✅ Model already trained: dvc with config relu_kaiming_adam, skipping...
✅ Model already trained: cifar with config relu_kaiming_rmsprop, skipping...
✅ Model already trained: dvc with config relu_kaiming_rmsprop, skipping...
✅ Model already trained: cifar with config relu_random_sgd, skipping...
✅ Model already trained: dvc with config relu_rand

In [None]:
# ---------- validation loader registry ----------

validation_registry = {
    "cifar": {"val": cifar_val_dl},
    "dvc": {"val": catdog_val_dl}
}


# ---------- experiment configuration builder ----------

experiment_grid = []

for activation_name in activation_choices:
    for init_scheme in initialization_methods:
        for optimizer_name in optimizer_choices:

            experiment_grid.append({
                "dataset": "cifar",
                "activation": activation_name,
                "init": init_scheme,
                "optimizer": optimizer_name,
                "num_classes": 10,
                "input_shape": (3, 32, 32)
            })

            experiment_grid.append({
                "dataset": "dvc",
                "activation": activation_name,
                "init": init_scheme,
                "optimizer": optimizer_name,
                "num_classes": 2,
                "input_shape": (3, 64, 64)
            })


# ---------- evaluation execution ----------

benchmark_saved_models(
    ConvClassifier,
    experiment_grid,
    validation_registry,
    device=compute_device,
    output_csv="experiment_results.csv"
)


[cifar] Config: relu_xavier_sgd | Accuracy: 73.53% | Loss: 0.7427
[dvc] Config: relu_xavier_sgd | Accuracy: 87.50% | Loss: 0.3020
[cifar] Config: relu_xavier_adam | Accuracy: 77.60% | Loss: 0.6383
[dvc] Config: relu_xavier_adam | Accuracy: 87.70% | Loss: 0.2929
[cifar] Config: relu_xavier_rmsprop | Accuracy: 80.74% | Loss: 0.5714
[dvc] Config: relu_xavier_rmsprop | Accuracy: 86.14% | Loss: 0.3036
[cifar] Config: relu_kaiming_sgd | Accuracy: 70.41% | Loss: 0.8383
[dvc] Config: relu_kaiming_sgd | Accuracy: 87.68% | Loss: 0.3176
[cifar] Config: relu_kaiming_adam | Accuracy: 78.00% | Loss: 0.6295
[dvc] Config: relu_kaiming_adam | Accuracy: 88.64% | Loss: 0.2590
[cifar] Config: relu_kaiming_rmsprop | Accuracy: 80.14% | Loss: 0.5628
[dvc] Config: relu_kaiming_rmsprop | Accuracy: 86.48% | Loss: 0.3128
[cifar] Config: relu_random_sgd | Accuracy: 70.15% | Loss: 0.8540
[dvc] Config: relu_random_sgd | Accuracy: 83.46% | Loss: 0.3798
[cifar] Config: relu_random_adam | Accuracy: 80.31% | Loss: 0.55

In [None]:
df = pd.DataFrame(pd.read_csv('experiment_results.csv'))
df

Unnamed: 0,dataset,activation,init,optimizer,accuracy,val_loss
0,Cifar-10,relu,xavier,sgd,73.53,0.742685
1,Dogs vs Cats,relu,xavier,sgd,87.5,0.302032
2,Cifar-10,relu,xavier,adam,77.6,0.638299
3,Dogs vs Cats,relu,xavier,adam,87.7,0.292867
4,Cifar-10,relu,xavier,rmsprop,80.74,0.571371
5,Dogs vs Cats,relu,xavier,rmsprop,86.14,0.303589
6,Cifar-10,relu,kaiming,sgd,70.41,0.838343
7,Dogs vs Cats,relu,kaiming,sgd,87.68,0.317576
8,Cifar-10,relu,kaiming,adam,78.0,0.629464
9,Dogs vs Cats,relu,kaiming,adam,88.64,0.259028


In [None]:
best_cifar = df[df.dataset == "Cifar-10"].nlargest(1, "accuracy")
best_cifar

Unnamed: 0,dataset,activation,init,optimizer,accuracy,val_loss
52,Cifar-10,leaky_relu,random,rmsprop,84.03,0.463258


In [None]:
best_dvc = df[df.dataset == "Dogs vs Cats"].nlargest(1, "accuracy")
best_dvc

Unnamed: 0,dataset,activation,init,optimizer,accuracy,val_loss
45,Dogs vs Cats,leaky_relu,kaiming,adam,89.74,0.234952
