In [1]:
import os
import pickle
import numpy as np
import pandas as pd

from glob import glob
from datetime import datetime
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from torchvision import transforms

from PIL import Image

In [None]:
def load_cifar10(data_path="cifar-10-batches-py"):
    X_train_list, y_train_list = [], []

    for i in range(1, 6):
        with open(os.path.join(data_path, f"data_batch_{i}"), "rb") as f:
            batch = pickle.load(f, encoding='bytes')
            X_train_list.append(batch[b'data'])
            y_train_list.append(batch[b'labels'])

    X_train = np.vstack(X_train_list).astype(np.float32) / 255.0
    y_train = np.hstack(y_train_list)

    with open(os.path.join(data_path, "test_batch"), "rb") as f:
        batch = pickle.load(f, encoding='bytes')
        X_test = batch[b'data'].astype(np.float32) / 255.0
        y_test = np.array(batch[b'labels'])

    X_train = X_train.reshape(-1, 3, 32, 32)
    X_test = X_test.reshape(-1, 3, 32, 32)

    X_train_tensor = torch.tensor(X_train)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)

    X_test_tensor = torch.tensor(X_test)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    train_size = int(0.8 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(
        train_dataset, [train_size, val_size])

    return train_dataset, val_dataset, test_dataset


def load_cats_dogs(folder_path="dogs-vs-cats/train", img_size=(64, 64)):
    image_paths = glob(os.path.join(folder_path, "*.jpg"))
    images, labels = [], []

    transform = transforms.Compose([
        transforms.Resize(img_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    for path in image_paths:
        img = Image.open(path).convert("RGB")
        img_tensor = transform(img)
        images.append(img_tensor)

        if "cat" in os.path.basename(path).lower():
            labels.append(0)
        elif "dog" in os.path.basename(path).lower():
            labels.append(1)
        else:
            continue

    images_tensor = torch.stack(images)
    labels_tensor = torch.tensor(labels, dtype=torch.long)
    dataset = TensorDataset(images_tensor, labels_tensor)

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    return train_dataset, val_dataset

In [3]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes=10, activation="relu", input_shape=(3, 32, 32)):
        super().__init__()

        act = activation.lower()
        if act == "relu":
            self.act = nn.ReLU()
        elif act == "tanh":
            self.act = nn.Tanh()
        elif act == "leaky_relu":
            self.act = nn.LeakyReLU()
        else:
            raise ValueError("Invalid activation")

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool = nn.MaxPool2d(2, 2)

        self.flatten_size = self._get_flatten_size(input_shape)

        self.fc1 = nn.Linear(self.flatten_size, 256)
        self.drop = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, num_classes)

    def _get_flatten_size(self, input_shape):
        with torch.no_grad():
            x = torch.zeros(1, *input_shape)
            x = self.pool(self.act(self.bn1(self.conv1(x))))
            x = self.pool(self.act(self.bn2(self.conv2(x))))
            x = self.pool(self.act(self.bn3(self.conv3(x))))
            return x.numel()

    def forward(self, x):
        x = self.pool(self.act(self.bn1(self.conv1(x))))
        x = self.pool(self.act(self.bn2(self.conv2(x))))
        x = self.pool(self.act(self.bn3(self.conv3(x))))
        x = x.view(x.size(0), -1)
        x = self.drop(self.act(self.fc1(x)))
        x = self.fc2(x)
        return x


def init_weights(model, method="xavier"):
    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            if method == "xavier":
                nn.init.xavier_uniform_(m.weight)
            elif method == "kaiming":
                nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
            elif method == "random":
                nn.init.normal_(m.weight, 0.0, 0.05)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)


def get_optimizer(model, method="adam", lr=0.001):
    method = method.lower()
    if method == "sgd":
        return optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif method == "adam":
        return optim.Adam(model.parameters(), lr=lr)
    elif method == "rmsprop":
        return optim.RMSprop(model.parameters(), lr=lr)
    else:
        raise ValueError("Invalid optimizer")

In [4]:
def train(model, train_loader, val_loader, optimizer, criterion, epochs=10,
          data="default", config_name="default", device="cuda"):
    os.makedirs(f"models/{data}", exist_ok=True)
    model.to(device)
    best_val_acc = 0.0

    for _ in tqdm(range(epochs)):
        model.train()
        running_loss = 0
        for batch in train_loader:
            inputs, labels = batch[:2]
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, labels = batch[:2]
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = 100 * correct / total if total > 0 else 0
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(),
                       f"models/{data}/model_{config_name}_best.pth")

    print(
        f"Best Validation Accuracy for {config_name}: {best_val_acc:.2f}% \nFinished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    torch.save(model.state_dict(),
               f"models/{data}/model_{config_name}_final.pth")


def evaluate_models(model_class, configs, dataset_loaders, device='mps', save_csv='results.csv'):
    results = []
    criterion = nn.CrossEntropyLoss()

    for config in configs:
        dataset_name = config['dataset']
        val_loader = dataset_loaders[dataset_name]['val']

        model = model_class(num_classes=config['num_classes'],
                            activation=config['activation'],
                            input_shape=config['input_shape'])
        model.to(device)

        model_path = os.path.join(
            "models", dataset_name, f"model_{config['activation']}_{config['init']}_{config['optimizer']}_best.pth")
        if not os.path.exists(model_path):
            print(f"Model file not found: {model_path}, skipping...")
            continue

        model.load_state_dict(torch.load(model_path, map_location=device))
        model.eval()

        running_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for batch in val_loader:
                inputs, labels = batch[:2]
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        avg_loss = running_loss / len(val_loader)
        accuracy = 100 * correct / total

        results.append({
            'dataset': 'Cifar-10' if dataset_name == 'cifar' else 'Dogs vs Cats',
            'activation': config['activation'],
            'init': config['init'],
            'optimizer': config['optimizer'],
            'accuracy': accuracy,
            'val_loss': avg_loss
        })

        print(f"[{dataset_name}] Config: {config['activation']}_{config['init']}_{config['optimizer']} | Accuracy: {accuracy:.2f}% | Loss: {avg_loss:.4f}")

    df = pd.DataFrame(results)
    df.to_csv(save_csv, index=False)
    print(f"Results saved to {save_csv}")


In [5]:
cifar_train, cifar_val, cifar_test = load_cifar10()
cifar_trainloader = DataLoader(cifar_train, batch_size=64, shuffle=True)
cifar_valloader = DataLoader(cifar_val, batch_size=64, shuffle=False)

dvc_train, dvc_val = load_cats_dogs()
dvc_trainloader = DataLoader(dvc_train, batch_size=64, shuffle=True)
dvc_valloader = DataLoader(dvc_val, batch_size=64, shuffle=False)

activations = ["relu", "tanh", "leaky_relu"]
inits = ["xavier", "kaiming", "random"]
optimizers = ["sgd", "adam", "rmsprop"]

device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: mps


In [6]:
for act in activations:
    for init in inits:
        for opt in optimizers:
            for dataset_name in ["cifar", "dvc"]:
                config_name = f"{act}_{init}_{opt}"
                if os.path.exists(f"models/{dataset_name}/model_{config_name}_best.pth"):
                    print(
                        f"✅ Model already trained: {dataset_name} with config {config_name}, skipping...")
                    continue
                
                print(
                    f"\n✅ Running config: {config_name} on {dataset_name} \nStarted: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
                if dataset_name == "cifar":
                    num_classes = 10
                    train_loader, val_loader = cifar_trainloader, cifar_valloader
                else:
                    num_classes = 2
                    train_loader, val_loader = dvc_trainloader, dvc_valloader

                model = CustomCNN(num_classes=num_classes, activation=act, input_shape=(
                    3, 32, 32) if dataset_name == "cifar" else (3, 64, 64))
                init_weights(model, method=init)
                optimizer = get_optimizer(model, method=opt, lr=0.001)
                criterion = nn.CrossEntropyLoss()

                train(model, train_loader, val_loader, optimizer, criterion,
                        epochs=10, data=dataset_name, config_name=config_name, device=device)


✅ Running config: relu_xavier_sgd on cifar 
Started: 2026-02-06 19:27:56


100%|██████████| 10/10 [03:45<00:00, 22.55s/it]


Best Validation Accuracy for relu_xavier_sgd: 68.57% 
Finished: 2026-02-06 19:31:42

✅ Running config: relu_xavier_sgd on dvc 
Started: 2026-02-06 19:31:42


100%|██████████| 10/10 [04:53<00:00, 29.32s/it]


Best Validation Accuracy for relu_xavier_sgd: 81.24% 
Finished: 2026-02-06 19:36:35

✅ Running config: relu_xavier_adam on cifar 
Started: 2026-02-06 19:36:35


100%|██████████| 10/10 [02:06<00:00, 12.68s/it]


Best Validation Accuracy for relu_xavier_adam: 74.61% 
Finished: 2026-02-06 19:38:42

✅ Running config: relu_xavier_adam on dvc 
Started: 2026-02-06 19:38:42


100%|██████████| 10/10 [03:27<00:00, 20.78s/it]


Best Validation Accuracy for relu_xavier_adam: 85.26% 
Finished: 2026-02-06 19:42:10

✅ Running config: relu_xavier_rmsprop on cifar 
Started: 2026-02-06 19:42:10


100%|██████████| 10/10 [01:57<00:00, 11.74s/it]


Best Validation Accuracy for relu_xavier_rmsprop: 74.24% 
Finished: 2026-02-06 19:44:08

✅ Running config: relu_xavier_rmsprop on dvc 
Started: 2026-02-06 19:44:08


100%|██████████| 10/10 [03:12<00:00, 19.28s/it]


Best Validation Accuracy for relu_xavier_rmsprop: 84.28% 
Finished: 2026-02-06 19:47:21

✅ Running config: relu_kaiming_sgd on cifar 
Started: 2026-02-06 19:47:21


100%|██████████| 10/10 [02:04<00:00, 12.49s/it]


Best Validation Accuracy for relu_kaiming_sgd: 66.86% 
Finished: 2026-02-06 19:49:26

✅ Running config: relu_kaiming_sgd on dvc 
Started: 2026-02-06 19:49:26


100%|██████████| 10/10 [03:43<00:00, 22.31s/it]


Best Validation Accuracy for relu_kaiming_sgd: 80.66% 
Finished: 2026-02-06 19:53:09

✅ Running config: relu_kaiming_adam on cifar 
Started: 2026-02-06 19:53:09


100%|██████████| 10/10 [01:59<00:00, 11.97s/it]


Best Validation Accuracy for relu_kaiming_adam: 72.04% 
Finished: 2026-02-06 19:55:09

✅ Running config: relu_kaiming_adam on dvc 
Started: 2026-02-06 19:55:09


100%|██████████| 10/10 [03:15<00:00, 19.58s/it]


Best Validation Accuracy for relu_kaiming_adam: 84.84% 
Finished: 2026-02-06 19:58:25

✅ Running config: relu_kaiming_rmsprop on cifar 
Started: 2026-02-06 19:58:25


100%|██████████| 10/10 [01:51<00:00, 11.17s/it]


Best Validation Accuracy for relu_kaiming_rmsprop: 74.31% 
Finished: 2026-02-06 20:00:16

✅ Running config: relu_kaiming_rmsprop on dvc 
Started: 2026-02-06 20:00:16


100%|██████████| 10/10 [03:13<00:00, 19.35s/it]


Best Validation Accuracy for relu_kaiming_rmsprop: 83.90% 
Finished: 2026-02-06 20:03:30

✅ Running config: relu_random_sgd on cifar 
Started: 2026-02-06 20:03:30


100%|██████████| 10/10 [01:44<00:00, 10.49s/it]


Best Validation Accuracy for relu_random_sgd: 63.47% 
Finished: 2026-02-06 20:05:15

✅ Running config: relu_random_sgd on dvc 
Started: 2026-02-06 20:05:15


100%|██████████| 10/10 [03:14<00:00, 19.46s/it]


Best Validation Accuracy for relu_random_sgd: 78.24% 
Finished: 2026-02-06 20:08:30

✅ Running config: relu_random_adam on cifar 
Started: 2026-02-06 20:08:30


100%|██████████| 10/10 [01:47<00:00, 10.73s/it]


Best Validation Accuracy for relu_random_adam: 73.08% 
Finished: 2026-02-06 20:10:17

✅ Running config: relu_random_adam on dvc 
Started: 2026-02-06 20:10:17


100%|██████████| 10/10 [03:17<00:00, 19.75s/it]


Best Validation Accuracy for relu_random_adam: 83.88% 
Finished: 2026-02-06 20:13:34

✅ Running config: relu_random_rmsprop on cifar 
Started: 2026-02-06 20:13:34


100%|██████████| 10/10 [01:52<00:00, 11.24s/it]


Best Validation Accuracy for relu_random_rmsprop: 74.43% 
Finished: 2026-02-06 20:15:27

✅ Running config: relu_random_rmsprop on dvc 
Started: 2026-02-06 20:15:27


100%|██████████| 10/10 [03:13<00:00, 19.32s/it]


Best Validation Accuracy for relu_random_rmsprop: 84.96% 
Finished: 2026-02-06 20:18:40

✅ Running config: tanh_xavier_sgd on cifar 
Started: 2026-02-06 20:18:40


100%|██████████| 10/10 [02:10<00:00, 13.08s/it]


Best Validation Accuracy for tanh_xavier_sgd: 68.63% 
Finished: 2026-02-06 20:20:51

✅ Running config: tanh_xavier_sgd on dvc 
Started: 2026-02-06 20:20:51


100%|██████████| 10/10 [02:54<00:00, 17.42s/it]


Best Validation Accuracy for tanh_xavier_sgd: 78.90% 
Finished: 2026-02-06 20:23:45

✅ Running config: tanh_xavier_adam on cifar 
Started: 2026-02-06 20:23:46


100%|██████████| 10/10 [01:27<00:00,  8.72s/it]


Best Validation Accuracy for tanh_xavier_adam: 72.53% 
Finished: 2026-02-06 20:25:13

✅ Running config: tanh_xavier_adam on dvc 
Started: 2026-02-06 20:25:13


100%|██████████| 10/10 [01:58<00:00, 11.81s/it]


Best Validation Accuracy for tanh_xavier_adam: 79.80% 
Finished: 2026-02-06 20:27:11

✅ Running config: tanh_xavier_rmsprop on cifar 
Started: 2026-02-06 20:27:11


100%|██████████| 10/10 [01:52<00:00, 11.28s/it]


Best Validation Accuracy for tanh_xavier_rmsprop: 70.88% 
Finished: 2026-02-06 20:29:04

✅ Running config: tanh_xavier_rmsprop on dvc 
Started: 2026-02-06 20:29:04


100%|██████████| 10/10 [01:59<00:00, 11.99s/it]


Best Validation Accuracy for tanh_xavier_rmsprop: 75.72% 
Finished: 2026-02-06 20:31:04

✅ Running config: tanh_kaiming_sgd on cifar 
Started: 2026-02-06 20:31:04


100%|██████████| 10/10 [01:49<00:00, 10.91s/it]


Best Validation Accuracy for tanh_kaiming_sgd: 64.92% 
Finished: 2026-02-06 20:32:53

✅ Running config: tanh_kaiming_sgd on dvc 
Started: 2026-02-06 20:32:53


100%|██████████| 10/10 [02:00<00:00, 12.02s/it]


Best Validation Accuracy for tanh_kaiming_sgd: 78.36% 
Finished: 2026-02-06 20:34:53

✅ Running config: tanh_kaiming_adam on cifar 
Started: 2026-02-06 20:34:53


100%|██████████| 10/10 [01:41<00:00, 10.12s/it]


Best Validation Accuracy for tanh_kaiming_adam: 71.08% 
Finished: 2026-02-06 20:36:35

✅ Running config: tanh_kaiming_adam on dvc 
Started: 2026-02-06 20:36:35


100%|██████████| 10/10 [02:02<00:00, 12.29s/it]


Best Validation Accuracy for tanh_kaiming_adam: 79.20% 
Finished: 2026-02-06 20:38:38

✅ Running config: tanh_kaiming_rmsprop on cifar 
Started: 2026-02-06 20:38:38


100%|██████████| 10/10 [01:52<00:00, 11.29s/it]


Best Validation Accuracy for tanh_kaiming_rmsprop: 70.63% 
Finished: 2026-02-06 20:40:31

✅ Running config: tanh_kaiming_rmsprop on dvc 
Started: 2026-02-06 20:40:31


100%|██████████| 10/10 [02:02<00:00, 12.26s/it]


Best Validation Accuracy for tanh_kaiming_rmsprop: 77.26% 
Finished: 2026-02-06 20:42:33

✅ Running config: tanh_random_sgd on cifar 
Started: 2026-02-06 20:42:34


100%|██████████| 10/10 [01:49<00:00, 10.90s/it]


Best Validation Accuracy for tanh_random_sgd: 64.16% 
Finished: 2026-02-06 20:44:23

✅ Running config: tanh_random_sgd on dvc 
Started: 2026-02-06 20:44:23


100%|██████████| 10/10 [01:59<00:00, 11.98s/it]


Best Validation Accuracy for tanh_random_sgd: 73.94% 
Finished: 2026-02-06 20:46:23

✅ Running config: tanh_random_adam on cifar 
Started: 2026-02-06 20:46:23


100%|██████████| 10/10 [01:29<00:00,  8.96s/it]


Best Validation Accuracy for tanh_random_adam: 69.71% 
Finished: 2026-02-06 20:47:52

✅ Running config: tanh_random_adam on dvc 
Started: 2026-02-06 20:47:52


100%|██████████| 10/10 [01:59<00:00, 11.90s/it]


Best Validation Accuracy for tanh_random_adam: 76.76% 
Finished: 2026-02-06 20:49:51

✅ Running config: tanh_random_rmsprop on cifar 
Started: 2026-02-06 20:49:51


100%|██████████| 10/10 [01:50<00:00, 11.10s/it]


Best Validation Accuracy for tanh_random_rmsprop: 70.34% 
Finished: 2026-02-06 20:51:42

✅ Running config: tanh_random_rmsprop on dvc 
Started: 2026-02-06 20:51:42


100%|██████████| 10/10 [01:59<00:00, 11.96s/it]


Best Validation Accuracy for tanh_random_rmsprop: 76.30% 
Finished: 2026-02-06 20:53:42

✅ Running config: leaky_relu_xavier_sgd on cifar 
Started: 2026-02-06 20:53:42


100%|██████████| 10/10 [01:40<00:00, 10.02s/it]


Best Validation Accuracy for leaky_relu_xavier_sgd: 68.77% 
Finished: 2026-02-06 20:55:23

✅ Running config: leaky_relu_xavier_sgd on dvc 
Started: 2026-02-06 20:55:23


100%|██████████| 10/10 [01:55<00:00, 11.54s/it]


Best Validation Accuracy for leaky_relu_xavier_sgd: 82.06% 
Finished: 2026-02-06 20:57:18

✅ Running config: leaky_relu_xavier_adam on cifar 
Started: 2026-02-06 20:57:18


100%|██████████| 10/10 [01:41<00:00, 10.17s/it]


Best Validation Accuracy for leaky_relu_xavier_adam: 77.13% 
Finished: 2026-02-06 20:59:00

✅ Running config: leaky_relu_xavier_adam on dvc 
Started: 2026-02-06 20:59:00


100%|██████████| 10/10 [01:55<00:00, 11.52s/it]


Best Validation Accuracy for leaky_relu_xavier_adam: 84.04% 
Finished: 2026-02-06 21:00:55

✅ Running config: leaky_relu_xavier_rmsprop on cifar 
Started: 2026-02-06 21:00:55


100%|██████████| 10/10 [01:37<00:00,  9.72s/it]


Best Validation Accuracy for leaky_relu_xavier_rmsprop: 77.47% 
Finished: 2026-02-06 21:02:32

✅ Running config: leaky_relu_xavier_rmsprop on dvc 
Started: 2026-02-06 21:02:32


100%|██████████| 10/10 [01:52<00:00, 11.23s/it]


Best Validation Accuracy for leaky_relu_xavier_rmsprop: 84.40% 
Finished: 2026-02-06 21:04:25

✅ Running config: leaky_relu_kaiming_sgd on cifar 
Started: 2026-02-06 21:04:25


100%|██████████| 10/10 [01:41<00:00, 10.16s/it]


Best Validation Accuracy for leaky_relu_kaiming_sgd: 66.30% 
Finished: 2026-02-06 21:06:06

✅ Running config: leaky_relu_kaiming_sgd on dvc 
Started: 2026-02-06 21:06:06


100%|██████████| 10/10 [01:54<00:00, 11.46s/it]


Best Validation Accuracy for leaky_relu_kaiming_sgd: 80.56% 
Finished: 2026-02-06 21:08:01

✅ Running config: leaky_relu_kaiming_adam on cifar 
Started: 2026-02-06 21:08:01


100%|██████████| 10/10 [01:38<00:00,  9.88s/it]


Best Validation Accuracy for leaky_relu_kaiming_adam: 74.94% 
Finished: 2026-02-06 21:09:40

✅ Running config: leaky_relu_kaiming_adam on dvc 
Started: 2026-02-06 21:09:40


100%|██████████| 10/10 [01:52<00:00, 11.28s/it]


Best Validation Accuracy for leaky_relu_kaiming_adam: 86.46% 
Finished: 2026-02-06 21:11:33

✅ Running config: leaky_relu_kaiming_rmsprop on cifar 
Started: 2026-02-06 21:11:33


100%|██████████| 10/10 [01:37<00:00,  9.77s/it]


Best Validation Accuracy for leaky_relu_kaiming_rmsprop: 74.51% 
Finished: 2026-02-06 21:13:10

✅ Running config: leaky_relu_kaiming_rmsprop on dvc 
Started: 2026-02-06 21:13:10


100%|██████████| 10/10 [01:51<00:00, 11.19s/it]


Best Validation Accuracy for leaky_relu_kaiming_rmsprop: 85.06% 
Finished: 2026-02-06 21:15:02

✅ Running config: leaky_relu_random_sgd on cifar 
Started: 2026-02-06 21:15:02


100%|██████████| 10/10 [01:39<00:00,  9.97s/it]


Best Validation Accuracy for leaky_relu_random_sgd: 64.96% 
Finished: 2026-02-06 21:16:42

✅ Running config: leaky_relu_random_sgd on dvc 
Started: 2026-02-06 21:16:42


100%|██████████| 10/10 [01:53<00:00, 11.31s/it]


Best Validation Accuracy for leaky_relu_random_sgd: 78.32% 
Finished: 2026-02-06 21:18:35

✅ Running config: leaky_relu_random_adam on cifar 
Started: 2026-02-06 21:18:35


100%|██████████| 10/10 [01:43<00:00, 10.30s/it]


Best Validation Accuracy for leaky_relu_random_adam: 75.52% 
Finished: 2026-02-06 21:20:18

✅ Running config: leaky_relu_random_adam on dvc 
Started: 2026-02-06 21:20:18


100%|██████████| 10/10 [02:04<00:00, 12.41s/it]


Best Validation Accuracy for leaky_relu_random_adam: 82.94% 
Finished: 2026-02-06 21:22:23

✅ Running config: leaky_relu_random_rmsprop on cifar 
Started: 2026-02-06 21:22:23


100%|██████████| 10/10 [01:42<00:00, 10.28s/it]


Best Validation Accuracy for leaky_relu_random_rmsprop: 74.51% 
Finished: 2026-02-06 21:24:06

✅ Running config: leaky_relu_random_rmsprop on dvc 
Started: 2026-02-06 21:24:06


100%|██████████| 10/10 [01:57<00:00, 11.79s/it]

Best Validation Accuracy for leaky_relu_random_rmsprop: 84.56% 
Finished: 2026-02-06 21:26:04





In [7]:
dataset_loaders = {
    'cifar': {'val': cifar_valloader},
    'dvc': {'val': dvc_valloader}
}

configs = []
activations = ["relu", "tanh", "leaky_relu"]
inits = ["xavier", "kaiming", "random"]
optimizers = ["sgd", "adam", "rmsprop"]

for act in activations:
    for init in inits:
        for opt in optimizers:
            configs.append({
                'dataset': 'cifar',
                'activation': act,
                'init': init,
                'optimizer': opt,
                'num_classes': 10,
                'input_shape': (3, 32, 32)
            })
            configs.append({
                'dataset': 'dvc',
                'activation': act,
                'init': init,
                'optimizer': opt,
                'num_classes': 2,
                'input_shape': (3, 64, 64)
            })

evaluate_models(CustomCNN, configs, dataset_loaders,
                device=device, save_csv='experiment_results.csv')

[cifar] Config: relu_xavier_sgd | Accuracy: 68.57% | Loss: 0.8959
[dvc] Config: relu_xavier_sgd | Accuracy: 81.24% | Loss: 0.4110
[cifar] Config: relu_xavier_adam | Accuracy: 74.61% | Loss: 0.7277
[dvc] Config: relu_xavier_adam | Accuracy: 85.26% | Loss: 0.3229
[cifar] Config: relu_xavier_rmsprop | Accuracy: 74.24% | Loss: 0.7441
[dvc] Config: relu_xavier_rmsprop | Accuracy: 84.28% | Loss: 0.3769
[cifar] Config: relu_kaiming_sgd | Accuracy: 66.86% | Loss: 0.9477
[dvc] Config: relu_kaiming_sgd | Accuracy: 80.66% | Loss: 0.4338
[cifar] Config: relu_kaiming_adam | Accuracy: 72.04% | Loss: 0.8423
[dvc] Config: relu_kaiming_adam | Accuracy: 84.84% | Loss: 0.3449
[cifar] Config: relu_kaiming_rmsprop | Accuracy: 74.31% | Loss: 0.7376
[dvc] Config: relu_kaiming_rmsprop | Accuracy: 83.90% | Loss: 0.3566
[cifar] Config: relu_random_sgd | Accuracy: 63.47% | Loss: 1.0293
[dvc] Config: relu_random_sgd | Accuracy: 78.24% | Loss: 0.4684
[cifar] Config: relu_random_adam | Accuracy: 73.08% | Loss: 0.77

In [8]:
df = pd.DataFrame(pd.read_csv('experiment_results.csv'))
df

Unnamed: 0,dataset,activation,init,optimizer,accuracy,val_loss
0,Cifar-10,relu,xavier,sgd,68.57,0.895872
1,Dogs vs Cats,relu,xavier,sgd,81.24,0.410965
2,Cifar-10,relu,xavier,adam,74.61,0.72774
3,Dogs vs Cats,relu,xavier,adam,85.26,0.32286
4,Cifar-10,relu,xavier,rmsprop,74.24,0.744053
5,Dogs vs Cats,relu,xavier,rmsprop,84.28,0.376858
6,Cifar-10,relu,kaiming,sgd,66.86,0.94766
7,Dogs vs Cats,relu,kaiming,sgd,80.66,0.433828
8,Cifar-10,relu,kaiming,adam,72.04,0.84234
9,Dogs vs Cats,relu,kaiming,adam,84.84,0.344865


In [9]:
best_cifar = df[df.dataset == "Cifar-10"].nlargest(1, "accuracy")
best_cifar

Unnamed: 0,dataset,activation,init,optimizer,accuracy,val_loss
40,Cifar-10,leaky_relu,xavier,rmsprop,77.47,0.682075


In [10]:
best_dvc = df[df.dataset == "Dogs vs Cats"].nlargest(1, "accuracy")
best_dvc

Unnamed: 0,dataset,activation,init,optimizer,accuracy,val_loss
45,Dogs vs Cats,leaky_relu,kaiming,adam,86.46,0.315837
