# Hyperparameter Tuning Optimization

In [2]:
## Setting up environment

# Main imports
import numpy as np
import pandas as pd
import torch
import time
import matplotlib.pyplot as plt
import itertools
from itertools import product
from collections import OrderedDict
from collections import namedtuple
from IPython.display import clear_output
import json
import torchvision
import msms

# PyTorch shorthands
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# For viewing different hyperparameter combinations
from torch.utils.tensorboard import SummaryWriter

# Set print options
torch.set_printoptions(linewidth=120)

In [3]:
# Initialize dataset
test_set = msms.Dataset("data", "metadata/lookup.npy")

In [4]:
# Initialize data loader with dataset
test_loader = torch.utils.data.DataLoader(test_set, batch_size=2, shuffle=True, num_workers=0)

In [102]:
# Load data with dataloader
for snp, pos, label in test_loader:
    
    snp = snp.reshape(-1, test_set.num_indivs, test_set.num_sites) #raw_sim
    pos = pos.reshape(-1, test_set.num_sites) #pos
    label = label.reshape(-1) # raw_param

In [103]:
snp = (snp * 255).double()

In [104]:
snp[0][0]

tensor([  0.,   0.,   0.,  ..., 255.,   0.,   0.], device='cuda:0', dtype=torch.float64)

In [105]:
snp = snp.unsqueeze(1)

In [123]:
snp.shape

torch.Size([20, 1, 10, 1263])

In [67]:
label[0]

tensor(0, device='cuda:0', dtype=torch.int32)

In [106]:
# Specify training dataset
train = torch.utils.data.TensorDataset(snp, label)

In [56]:
train[0][0].shape

torch.Size([10, 1263])

In [134]:
# Design a standard network
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.batch1 = nn.BatchNorm2d(6)
        self.pool = nn.MaxPool2d(4, 4)
        self.conv2 = nn.Conv2d(6, 16, 5)

        self.fc1 = nn.Linear(16 * 3 * 77, 120)
        self.batch2 = nn.BatchNorm1d(120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 5)

    def forward(self, x):
        #x = self.batch1(self.pool(F.relu(self.conv1(x))))
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        x = x.view(-1, 16 * 3 * 77)

        #x = self.batch2(F.relu(self.fc1(x)))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [140]:
# Create helper classes

# Builds run with specified hyperparameters to feed into network
class RunBuilder():
  @staticmethod
  def get_runs(params):
    
    Run = namedtuple('Run', params.keys())

    runs = []
    for v in product(*params.values()):
        runs.append(Run(*v))

    return runs

# Store epoch information
class Epoch():
    def __init__(self):
        self.count = 0
        self.loss = 0
        self.start_time = None

# Stores run information
class Run():
    def __init__(self):
        self.params = None
        self.count = 0
        self.data = []
        self.start_time = None

# Collects information for tensorboard to compare between hyperparameters
# Convert to just a dictionary to store information b/c can't use tensorboard on cluster
class RunManager():
    def __init__(self):

        self.epoch = Epoch()

        self.run = Run()

        self.network = None
        self.loader = None
        #self.tb = None

    def begin_run(self, run, network, loader):

        self.run.start_time = time.time()

        self.run.params = run
        self.run.count += 1

        self.network = network
        self.loader = loader
        #self.tb = SummaryWriter(comment = f' -{run}')

        #images, labels = next(iter(self.loader))
        #grid = torchvision.utils.make_grid(images)

        #self.tb.add_image('images', grid)
        #self.tb.add_graph(self.network, images)

    def end_run(self):
        #self.tb.close()
        self.epoch.count = 0

    def begin_epoch(self):
        self.epoch.start_time = time.time()

        self.epoch.count += 1
        self.epoch.loss = 0

    def end_epoch(self):

        epoch_duration = time.time() - self.epoch.start_time
        run_duration = time.time() - self.run.start_time

        loss = self.epoch.loss / len(self.loader.dataset)

        #self.tb.add_scalar('Loss', loss, self.epoch.count)

        results = OrderedDict()
        results["run"] = self.run.count
        results["epoch"] = self.epoch.count
        results["loss"] = loss
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration
        for k,v in self.run.params._asdict().items():
            results[k] = v
        self.run.data.append(results)
        df = pd.DataFrame.from_dict(self.run.data, orient='columns')

        # Specific to jupyter notebook
        clear_output(wait=True)
        display(df)

    def track_loss(self, loss):
        self.epoch.loss += loss.item() * self.loader.batch_size

    def save(self, fileName):

    # Save to csv
        pd.DataFrame.from_dict(
            self.run.data,
            orient='columns'
        ).to_csv(f'{fileName}.csv')

        # Save to json - use to build tensorboards
        #with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
        #    json.dump(self.run.data, f, ensure_ascii=False, indent=4)

In [141]:
images.shape

torch.Size([20, 1, 10, 1263])

In [142]:
# Change these params depending on what you want to test
params = OrderedDict(
    lr = [0.01, 0.001, 0.0001],
    batch_size = [20, 100],
    shuffle = [True],
    optim_type = ['Adam', 'SGD'],
    eps = [1e-8, 1e-4, 0.1],
    momentum = [0.9, 0.6, 0.3]
)
m = RunManager()
for run in RunBuilder.get_runs(params):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Using device: %s" % device)
    net = Net().double().to(device)
    # Access hyperparameters using the run
    loader = torch.utils.data.DataLoader(train,
                                         batch_size=run.batch_size,
                                         num_workers=0,
                                         shuffle=run.shuffle)
    if run.optim_type == 'Adam':
        optimizer = optim.Adam(net.parameters(),
                               lr=run.lr,
                               eps=run.eps)
    if run.optim_type == 'SGD':
        optimizer = optim.SGD(net.parameters(),
                              lr=run.lr,
                              momentum=run.momentum)

    m.begin_run(run, net, loader)
  # Set number of epochs in range
    for epoch in range(20):
        m.begin_epoch()
        for images, labels in loader:

            images, labels = images.to(device), labels.to(device)
            print(images.shape)
            preds = net(images)
            #print(preds.shape)
            #print(labels.shape)
            loss = F.mse_loss(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            m.track_loss(loss)
            # m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save('results')

Using device: cuda:0
torch.Size([20, 1, 10, 1263])


RuntimeError: Calculated padded input size per channel: (1 x 314). Kernel size: (5 x 5). Kernel size can't be greater than actual input size