In [None]:
import os
import pickle
import numpy as np
import pandas as pd

from glob import glob
from datetime import datetime
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from torchvision import transforms

from PIL import Image

In [None]:
def load_cifar10(data_path="cifar-10-batches-py"):
    X_train_list, y_train_list = [], []

    for i in range(1, 6):
        with open(os.path.join(data_path, f"data_batch_{i}"), "rb") as f:
            batch = pickle.load(f, encoding='bytes')
            X_train_list.append(batch[b'data'])
            y_train_list.append(batch[b'labels'])

    X_train = np.vstack(X_train_list).astype(np.float32) / 255.0
    y_train = np.hstack(y_train_list)

    with open(os.path.join(data_path, "test_batch"), "rb") as f:
        batch = pickle.load(f, encoding='bytes')
        X_test = batch[b'data'].astype(np.float32) / 255.0
        y_test = np.array(batch[b'labels'])

    X_train = X_train.reshape(-1, 3, 32, 32)
    X_test = X_test.reshape(-1, 3, 32, 32)

    X_train_tensor = torch.tensor(X_train)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)

    X_test_tensor = torch.tensor(X_test)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    train_size = int(0.8 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(
        train_dataset, [train_size, val_size])

    return train_dataset, val_dataset, test_dataset


def load_cats_dogs(folder_path="dogs-vs-cats/train", img_size=(64, 64)):
    image_paths = glob(os.path.join(folder_path, "*.jpg"))
    images, labels = [], []

    transform = transforms.Compose([
        transforms.Resize(img_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    for path in image_paths:
        img = Image.open(path).convert("RGB")
        img_tensor = transform(img)
        images.append(img_tensor)

        if "cat" in os.path.basename(path).lower():
            labels.append(0)
        elif "dog" in os.path.basename(path).lower():
            labels.append(1)
        else:
            continue

    images_tensor = torch.stack(images)
    labels_tensor = torch.tensor(labels, dtype=torch.long)
    dataset = TensorDataset(images_tensor, labels_tensor)

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    return train_dataset, val_dataset

In [None]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes=10, activation="relu", input_shape=(3, 32, 32)):
        super().__init__()

        act = activation.lower()
        if act == "relu":
            self.act = nn.ReLU()
        elif act == "tanh":
            self.act = nn.Tanh()
        elif act == "leaky_relu":
            self.act = nn.LeakyReLU()
        else:
            raise ValueError("Invalid activation")

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool = nn.MaxPool2d(2, 2)

        self.flatten_size = self._get_flatten_size(input_shape)

        self.fc1 = nn.Linear(self.flatten_size, 256)
        self.drop = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, num_classes)

    def _get_flatten_size(self, input_shape):
        with torch.no_grad():
            x = torch.zeros(1, *input_shape)
            x = self.pool(self.act(self.bn1(self.conv1(x))))
            x = self.pool(self.act(self.bn2(self.conv2(x))))
            x = self.pool(self.act(self.bn3(self.conv3(x))))
            return x.numel()

    def forward(self, x):
        x = self.pool(self.act(self.bn1(self.conv1(x))))
        x = self.pool(self.act(self.bn2(self.conv2(x))))
        x = self.pool(self.act(self.bn3(self.conv3(x))))
        x = x.view(x.size(0), -1)
        x = self.drop(self.act(self.fc1(x)))
        x = self.fc2(x)
        return x


def init_weights(model, method="xavier"):
    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            if method == "xavier":
                nn.init.xavier_uniform_(m.weight)
            elif method == "kaiming":
                nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
            elif method == "random":
                nn.init.normal_(m.weight, 0.0, 0.05)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)


def get_optimizer(model, method="adam", lr=0.001):
    method = method.lower()
    if method == "sgd":
        return optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif method == "adam":
        return optim.Adam(model.parameters(), lr=lr)
    elif method == "rmsprop":
        return optim.RMSprop(model.parameters(), lr=lr)
    else:
        raise ValueError("Invalid optimizer")

In [None]:
def train(model, train_loader, val_loader, optimizer, criterion, epochs=10,
          data="default", config_name="default", device="cuda"):
    os.makedirs(f"models/{data}", exist_ok=True)
    model.to(device)
    best_val_acc = 0.0

    for _ in tqdm(range(epochs)):
        model.train()
        running_loss = 0
        for batch in train_loader:
            inputs, labels = batch[:2]
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, labels = batch[:2]
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = 100 * correct / total if total > 0 else 0
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(),
                       f"models/{data}/model_{config_name}_best.pth")

    print(
        f"Best Validation Accuracy for {config_name}: {best_val_acc:.2f}% \nFinished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    torch.save(model.state_dict(),
               f"models/{data}/model_{config_name}_final.pth")


def evaluate_models(model_class, configs, dataset_loaders, device='mps', save_csv='results.csv'):
    results = []
    criterion = nn.CrossEntropyLoss()

    for config in configs:
        dataset_name = config['dataset']
        val_loader = dataset_loaders[dataset_name]['val']

        model = model_class(num_classes=config['num_classes'],
                            activation=config['activation'],
                            input_shape=config['input_shape'])
        model.to(device)

        model_path = os.path.join(
            "models", dataset_name, f"model_{config['activation']}_{config['init']}_{config['optimizer']}_best.pth")
        if not os.path.exists(model_path):
            print(f"Model file not found: {model_path}, skipping...")
            continue

        model.load_state_dict(torch.load(model_path, map_location=device))
        model.eval()

        running_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for batch in val_loader:
                inputs, labels = batch[:2]
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        avg_loss = running_loss / len(val_loader)
        accuracy = 100 * correct / total

        results.append({
            'dataset': 'Cifar-10' if dataset_name == 'cifar' else 'Dogs vs Cats',
            'activation': config['activation'],
            'init': config['init'],
            'optimizer': config['optimizer'],
            'accuracy': accuracy,
            'val_loss': avg_loss
        })

        print(f"[{dataset_name}] Config: {config['activation']}_{config['init']}_{config['optimizer']} | Accuracy: {accuracy:.2f}% | Loss: {avg_loss:.4f}")

    df = pd.DataFrame(results)
    df.to_csv(save_csv, index=False)
    print(f"Results saved to {save_csv}")


In [None]:
cifar_train, cifar_val, cifar_test = load_cifar10()
cifar_trainloader = DataLoader(cifar_train, batch_size=64, shuffle=True)
cifar_valloader = DataLoader(cifar_val, batch_size=64, shuffle=False)

dvc_train, dvc_val = load_cats_dogs()
dvc_trainloader = DataLoader(dvc_train, batch_size=64, shuffle=True)
dvc_valloader = DataLoader(dvc_val, batch_size=64, shuffle=False)

activations = ["relu", "tanh", "leaky_relu"]
inits = ["xavier", "kaiming", "random"]
optimizers = ["sgd", "adam", "rmsprop"]

device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

In [None]:
for act in activations:
    for init in inits:
        for opt in optimizers:
            for dataset_name in ["cifar", "dvc"]:
                config_name = f"{act}_{init}_{opt}"
                if os.path.exists(f"models/{dataset_name}/model_{config_name}_best.pth"):
                    print(
                        f"✅ Model already trained: {dataset_name} with config {config_name}, skipping...")
                    continue
                
                print(
                    f"\n✅ Running config: {config_name} on {dataset_name} \nStarted: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
                if dataset_name == "cifar":
                    num_classes = 10
                    train_loader, val_loader = cifar_trainloader, cifar_valloader
                else:
                    num_classes = 2
                    train_loader, val_loader = dvc_trainloader, dvc_valloader

                model = CustomCNN(num_classes=num_classes, activation=act, input_shape=(
                    3, 32, 32) if dataset_name == "cifar" else (3, 64, 64))
                init_weights(model, method=init)
                optimizer = get_optimizer(model, method=opt, lr=0.001)
                criterion = nn.CrossEntropyLoss()

                train(model, train_loader, val_loader, optimizer, criterion,
                        epochs=10, data=dataset_name, config_name=config_name, device=device)

In [None]:
dataset_loaders = {
    'cifar': {'val': cifar_valloader},
    'dvc': {'val': dvc_valloader}
}

configs = []
activations = ["relu", "tanh", "leaky_relu"]
inits = ["xavier", "kaiming", "random"]
optimizers = ["sgd", "adam", "rmsprop"]

for act in activations:
    for init in inits:
        for opt in optimizers:
            configs.append({
                'dataset': 'cifar',
                'activation': act,
                'init': init,
                'optimizer': opt,
                'num_classes': 10,
                'input_shape': (3, 32, 32)
            })
            configs.append({
                'dataset': 'dvc',
                'activation': act,
                'init': init,
                'optimizer': opt,
                'num_classes': 2,
                'input_shape': (3, 64, 64)
            })

evaluate_models(CustomCNN, configs, dataset_loaders,
                device=device, save_csv='experiment_results.csv')

In [None]:
df = pd.DataFrame(pd.read_csv('experiment_results.csv'))
df

In [None]:
best_cifar = df[df.dataset == "Cifar-10"].nlargest(1, "accuracy")
best_cifar

In [None]:
best_dvc = df[df.dataset == "Dogs vs Cats"].nlargest(1, "accuracy")
best_dvc