In [1]:
import os
import sys
import time
import pickle

sys.path.append("../src")

import torch
from torch import nn

import torchvision
import torchvision.transforms as transforms


from baseline_model import BaselineModel
from enhanced_model import EnhancedModel
from helper_functions import run_model


device = "cuda" if torch.cuda.is_available() else "cpu"


data_loader_args = {
    "batch_size": 64,
    "shuffle": True,
    "num_workers": 6,
    "pin_memory": True,
    "persistent_workers": True,
}


seed = 213
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

### Try to capture Gpu utilization from btop during those test

In [2]:
# initial setup
metadata = """"""
epochs = 30
lr = 0.01
num_of_repeats = 5
dropout = 0.75
regularization = 0.001
hidden_units = [10, 20]  # [10, 20, 30, 40, 50, 60, 70, 80]
learning_rates = [0.01, 0.001]  # [0.01, 0.001, 0.0001]
experiment_dict = {
    "experiment name": "general_parameters",
    "experiments metadata": metadata,
}
if "general_parameters.pkl" not in os.listdir("../results"):
    pickle.dump(experiment_dict, open("../results/general_parameters.pkl", "wb"))
else:
    raise FileExistsError(
        "File already exists, this notebook is not intended to run multiple times"
    )

# time and progress
start_time = time.time()
total_number_of_runs = num_of_repeats * len(hidden_units) * len(learning_rates)
number_of_completed_runs = 0

cinic_directory = "../data"
cinic_train = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(
        cinic_directory + "/train", transform=transforms.ToTensor()
    ),
    **data_loader_args,
)

cinic_test = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(
        cinic_directory + "/test", transform=transforms.ToTensor()
    ),
    **data_loader_args,
)

for hidden_unit in hidden_units:
    for lr in learning_rates:
        experiment_dict = pickle.load(open("../results/general_parameters.pkl", "rb"))
        metrics_list = []
        total_time_list = []
        for i in range(num_of_repeats):
            model = BaselineModel(
                input_shape=3,
                hidden_units=hidden_unit,
                output_shape=10,
                dropout_p=dropout,
            ).to(device)

            loss_fn = nn.CrossEntropyLoss()
            optimizer = torch.optim.SGD(
                params=model.parameters(),
                lr=lr,
                momentum=0.9,
                weight_decay=regularization,
            )

            metrics, total_time = run_model(
                cinic_train,
                cinic_test,
                model,
                loss_fn,
                optimizer,
                device=device,
                epochs=epochs,
            )

            metrics_list.append(metrics)
            total_time_list.append(total_time)
            number_of_completed_runs += 1

        # add info about number of parameters
        total_number_params = sum(p.numel() for p in model.parameters())
        experiment_dict[(hidden_unit, lr)] = (
            metrics_list,  # accuracy and loss for each epoch
            total_time_list,  # time it took to train
            total_number_params,  # number of parameters
        )
        pickle.dump(experiment_dict, open("../results/general_parameters.pkl", "wb"))
        seconds = time.time() - start_time
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        print(
            f"{number_of_completed_runs}/{total_number_of_runs} ({round(100* number_of_completed_runs/total_number_of_runs)}%) done, runs {hours}h:{minutes}min"
        )

5/20 (25%) done, runs 0h:10min
10/20 (50%) done, runs 0h:20min
15/20 (75%) done, runs 0h:31min
20/20 (100%) done, runs 0h:42min


In [None]:
# initial setup
metadata = """"""
epochs = 30
lr = 0.01
num_of_repeats = 3
dropout = 0.75
regularization = 0.001
hidden_units = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
learning_rates = [0.01]
experiment_dict = {
    "experiment name": "enhanced_model_time",
    "experiments metadata": metadata,
}
if "enhanced_model_time.pkl" not in os.listdir("../results"):
    pickle.dump(experiment_dict, open("../results/enhanced_model_time.pkl", "wb"))
else:
    raise FileExistsError(
        "File already exists, this notebook is not intended to run multiple times"
    )

# time and progress
start_time = time.time()
total_number_of_runs = num_of_repeats * len(hidden_units) * len(learning_rates)
number_of_completed_runs = 0

cinic_directory = "../data"
cinic_train = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(
        cinic_directory + "/train", transform=transforms.ToTensor()
    ),
    **data_loader_args,
)

cinic_test = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(
        cinic_directory + "/test", transform=transforms.ToTensor()
    ),
    **data_loader_args,
)

for hidden_unit in hidden_units:
    for lr in learning_rates:
        experiment_dict = pickle.load(open("../results/enhanced_model_time.pkl", "rb"))
        metrics_list = []
        total_time_list = []
        for i in range(num_of_repeats):
            model = EnhancedModel(
                input_shape=3,
                hidden_units=hidden_unit,
                output_shape=10,
                dropout_p=dropout,
            ).to(device)

            loss_fn = nn.CrossEntropyLoss()
            optimizer = torch.optim.SGD(
                params=model.parameters(),
                lr=lr,
                momentum=0.9,
                weight_decay=regularization,
            )

            metrics, total_time = run_model(
                cinic_train,
                cinic_test,
                model,
                loss_fn,
                optimizer,
                device=device,
                epochs=epochs,
            )

            metrics_list.append(metrics)
            total_time_list.append(total_time)
            number_of_completed_runs += 1

        # add info about number of parameters
        total_number_params = sum(p.numel() for p in model.parameters())
        experiment_dict[(hidden_unit, lr)] = (
            metrics_list,  # accuracy and loss for each epoch
            total_time_list,  # time it took to train
            total_number_params,  # number of parameters
        )
        pickle.dump(experiment_dict, open("../results/enhanced_model_time.pkl", "wb"))
        seconds = time.time() - start_time
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        print(
            f"{number_of_completed_runs}/{total_number_of_runs} ({round(100* number_of_completed_runs/total_number_of_runs)}%) done, runs {hours}h:{minutes}min"
        )