#### Imports


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
import numpy as np
import os
import json
from datetime import datetime

#### Device Init

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Using device:", device)

#### CNN Def

In [None]:
class Net(nn.Module):
    def __init__(self,
                 activation="relu",
                 dropout_rate=0.0,
                 num_filters=[16, 32],
                 kernel_sizes=[3, 3]):

        super(Net, self).__init__()

        # select activation func
        if activation == "relu":
            self.act = nn.ReLU()
        elif activation == "leaky_relu":
            self.act = nn.LeakyReLU()
        elif activation == "gelu":
            self.act = nn.GELU()
        else:
            raise ValueError("Unknown activation", activation)

        # conv stack
        self.conv1 = nn.Conv2d(3, num_filters[0], kernel_size=kernel_sizes[0])
        self.conv2 = nn.Conv2d(num_filters[0], num_filters[1], kernel_size=kernel_sizes[1])

        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(dropout_rate)

        # final flat size
        example = torch.zeros(1, 3, 32, 32)
        with torch.no_grad():
            example = self.pool(self.act(self.conv1(example)))
            example = self.pool(self.act(self.conv2(example)))
            flat_size = example.numel()

        self.fc1 = nn.Linear(flat_size, 128)

        # 26 ASL letters total
        self.fc2 = nn.Linear(128, 26)

    def forward(self, x):
        x = self.pool(self.act(self.conv1(x)))
        x = self.pool(self.act(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.act(self.fc1(x))
        x = self.fc2(x)
        return x

#### Hyperparamter Test Set

In [None]:
# dropout modifications
group_A = [
    {"activation": "relu", "dropout": 0.0,  "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
    {"activation": "relu", "dropout": 0.1,  "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
    {"activation": "relu", "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
    {"activation": "relu", "dropout": 0.5,  "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
    {"activation": "relu", "dropout": 0.75, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
]

# learning rate modifications
group_B = [
    {"activation": "relu", "dropout": 0.25, "lr": 0.0001, "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
    {"activation": "relu", "dropout": 0.25, "lr": 0.0005, "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
    {"activation": "relu", "dropout": 0.25, "lr": 0.001,  "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
    {"activation": "relu", "dropout": 0.25, "lr": 0.005,  "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
    {"activation": "relu", "dropout": 0.25, "lr": 0.01,   "optimizer": "adam", "weight_decay": 0.0,   "filters": [16,32]},
]

# activation function modifications
group_C = [
    {"activation": "relu",       "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.001, "filters": [32,64]},
    {"activation": "leaky_relu", "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.001, "filters": [32,64]},
    {"activation": "gelu",       "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.001, "filters": [32,64]},
    {"activation": "tanh",       "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.001, "filters": [32,64]},
    {"activation": "sigmoid",    "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.001, "filters": [32,64]},
]

# filter size modifications
group_D = [
    {"activation": "relu", "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0001, "filters": [8,16]},
    {"activation": "relu", "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0001, "filters": [16,32]},
    {"activation": "relu", "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0001, "filters": [32,64]},
    {"activation": "relu", "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0001, "filters": [64,128]},
    {"activation": "relu", "dropout": 0.25, "lr": 0.001, "optimizer": "adam", "weight_decay": 0.0001, "filters": [96,192]},
]

hyperparameter_sets = group_A + group_B + group_C + group_D



#### run_id gen

In [None]:

def make_run_id(params):
    base = (
        f"a-{params['activation']}"
        f"_d-{params['dropout']}"
        f"_lr-{params['lr']}"
        f"_opt-{params['optimizer']}"
        f"_wd-{params['weight_decay']}"
        f"_f-{params['filters']}"
    )

    return f"{base}"

#### Directory for Results


In [None]:
if not os.path.exists("experiment_runs"):
    os.makedirs("experiment_runs")

#### Optimizer

In [None]:
def create_optimizer(params, name, lr, weight_decay):
    if name == "adam":
        return optim.Adam(params, lr=lr, weight_decay=weight_decay)
    elif name == "sgd":
        return optim.SGD(params, lr=lr, momentum=0.9, weight_decay=weight_decay)
    else:
        raise ValueError("Unknown optimizer:", name)

#### Training Func

In [None]:
def train_model(run_id, params, trainloader, testloader, epochs=15):

    # record storage
    train_losses = []
    train_accs = []
    test_accs = []

    # model
    net = Net(
        activation=params["activation"],
        dropout_rate=params["dropout"],
        num_filters=params["filters"]
    ).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = create_optimizer(net.parameters(), params["optimizer"],
                                 params["lr"], params["weight_decay"])

    for epoch in range(epochs):

        # for analytics
        running_loss = 0.0
        temp_running_loss = 0.0
        correct = 0
        total = 0

        net.train()
        for i, data in enumerate(trainloader, 0):

            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            temp_running_loss += loss.item()

            # accuracy
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            if i % 2000 == 1999:
                print(f"[Run {run_id}] Epoch {epoch+1}, Batch {i+1} — Loss: {temp_running_loss/2000:.3f}")
                temp_running_loss = 0.0

        # epoch metrics
        train_losses.append(running_loss / len(trainloader))
        train_accs.append(100 * correct / total)

        # test loop
        net.eval()
        correct_test = 0
        total_test = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data[0].to(device), data[1].to(device)
                outputs = net(images)
                _, predicted = outputs.max(1)
                total_test += labels.size(0)
                correct_test += predicted.eq(labels).sum().item()

        test_acc = 100 * correct_test / total_test
        test_accs.append(test_acc)

        print(f"[Run {run_id}] Epoch {epoch+1} — Train Acc: {train_accs[-1]:.2f}%  Test Acc: {test_acc:.2f}%")

    print("Finished Training Run", run_id)

    # save model
    model_path = f"experiment_runs/model_run_{run_id}.pt"
    torch.save(net.state_dict(), model_path)

    # save metrics
    history = {
        "params": params,
        "train_loss": train_losses,
        "train_acc": train_accs,
        "test_acc": test_accs
    }

    json_path = f"experiment_runs/history_run_{run_id}.json"
    with open(json_path, "w") as f:
        json.dump(history, f, indent=4)

    return history

#### Run All

In [None]:
all_histories = {}

for run_id, hp in enumerate(hyperparameter_sets):

    run_id = make_run_id(hp)

    print("\n------------------------------------")
    print("Starting Run", run_id)
    print("Hyperparameters:", hp)
    print("------------------------------------\n")

    history = train_model(run_id, hp, trainloader, testloader, epochs=50)
    all_histories[run_id] = history

#### Graphs

In [None]:
import json

def load_all_histories():
    histories = {}
    for file in os.listdir("experiment_runs"):
        if file.startswith("history_run_") and file.endswith(".json"):
            run_id = int(file.split("_")[2].split(".")[0])
            with open(os.path.join("experiment_runs", file), "r") as f:
                histories[run_id] = json.load(f)
    return histories

histories = load_all_histories()

# plot
for run_id, h in histories.items():
    plt.figure(figsize=(10,4))

    # loss
    plt.subplot(1,2,1)
    plt.plot(h["train_loss"])
    plt.title(f"Run {run_id} – Train Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")

    # accuracy
    plt.subplot(1,2,2)
    plt.plot(h["train_acc"], label="Train Acc")
    plt.plot(h["test_acc"], label="Test Acc")
    plt.title(f"Run {run_id} – Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy (%)")
    plt.legend()

    plt.show()
