# Training Loop for Hyperparameter Experimentation

- A class to figure out the best hyperparameters
- The calss will contain a static method get_runs() that will organize the hyperparameters into pairs of values, making it easier to conduct experiments

<blockquote>Static methods, much like class methods, are methods that are bound to a class rather than its object. They do not require a class instance creation. So, they are not dependent on the state of the object.

    ...
When you need a utility function that doesn't access any properties of a class but makes sense that it belongs to the class, we use static functions.
</blockquote>

- The class returns a combination of hyperparameters using an ordered dictionary

In [16]:
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms 
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# print format
torch.set_printoptions(linewidth=120) 
from torch.utils.tensorboard import SummaryWriter
#! tensorboard --version

In [2]:
from collections import OrderedDict
from collections import namedtuple
from itertools import product

In [3]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        
        Run = namedtuple('Run', params.keys())
        
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
        
        return runs

In [11]:
params = OrderedDict(
    lr = [0.01, 0.001],
    batch_size = [1000, 10000],
    device = ['cpu', 'gpu']
)

runs = RunBuilder.get_runs(params)

for run in runs:
    print(run, run.lr, run.batch_size, run.device)

Run(lr=0.01, batch_size=1000, device='cpu') 0.01 1000 cpu
Run(lr=0.01, batch_size=1000, device='gpu') 0.01 1000 gpu
Run(lr=0.01, batch_size=10000, device='cpu') 0.01 10000 cpu
Run(lr=0.01, batch_size=10000, device='gpu') 0.01 10000 gpu
Run(lr=0.001, batch_size=1000, device='cpu') 0.001 1000 cpu
Run(lr=0.001, batch_size=1000, device='gpu') 0.001 1000 gpu
Run(lr=0.001, batch_size=10000, device='cpu') 0.001 10000 cpu
Run(lr=0.001, batch_size=10000, device='gpu') 0.001 10000 gpu


In [24]:
import my_model


# Additional items required
train_set = torchvision.datasets.FashionMNIST(
    root='./Documents/data'
     ,train=True
    ,download=True # downloads it locally (checks existence beforehand)
    ,transform=transforms.Compose([
        transforms.ToTensor() # butilt in tensor transformer
    ])
)

def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [28]:
# Define Parameters
params = OrderedDict(
    lr = [0.01, 0.001],
    batch_size = [1000, 10000]
)

for run in RunBuilder.get_runs(params):
    #break
    network = my_model.Network()

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=run.batch_size)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)

    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)

    # Modify SummaryWriter with comment
    comment = f' -{run}'
    tb = SummaryWriter(comment=comment)

    tb.add_image('images', grid)
    tb.add_graph(network, images)

    num_epochs = 5
    # loop over all epochs
    for epoch in range(num_epochs):

        # variables to track
        total_loss = 0
        total_correct = 0

        # loop over all batches in the train loader
        for batch in train_loader:
            images, labels = batch

            preds = network(images)
            loss = F.cross_entropy(preds, labels)

            optimizer.zero_grad() # zero grad because pytorch accumulates gradient
            loss.backward() # calculate gradients
            optimizer.step() # update weights

            # update variables
            # account for loss variation with respect to batch_size
            total_loss += loss.item() * run.batch_size
            total_correct += get_num_correct(preds, labels)

        # Add metrics to TensorBoard
        # scalar -> tag, value, epoch
        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct/len(train_set), epoch)
        # histograms 
        tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
        tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
        tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)

        # print information for selected epochs
        if (epoch+1) % 2 == 0:
            print("Epoch: ", epoch+1, "\n\tAccuracy (%):", total_correct/len(train_set),
              "\n\tLoss ", total_loss)


    print("\nNumber of steps taken towards the loss minimum:", len(train_set)/run.batch_size)

# Resources

- https://www.programiz.com/python-programming/methods/built-in/staticmethod
- https://deeplizard.com/learn/video/NSKghk0pcco