# Dependencies

In [1]:
import itertools as it
from collections import OrderedDict
import time
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torchvision.utils 
import torchvision.models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from helpers import *
from models import *
from train import *

# set_seed(0)
PYTHONHASHSEED=0
%load_ext autoreload
%autoreload 2

# VGG16

In [2]:
vgg16_trained = torchvision.models.vgg16(pretrained=True)
vgg16_untrained = torchvision.models.vgg16()

In [1]:
def modify_model(model, input_channels, output_units):
    '''
    Parameters
    
    model: instance of a pytorch model to be modified
    input_channels: channels of input tensor
    output_units: number of units in the last layer
    '''
    model.features[0] = nn.Conv2d(input_channels, 64, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
    model.classifier[6] = nn.Linear(4096, output_units)

In [4]:
modify_model(vgg16_trained, 2, 512)

Test with a random input tensor:

In [5]:
x = torch.randn(1, 2, 256, 256) # (256, 256, 3)
output = vgg16_trained(x)
print(output.shape)

torch.Size([1, 512])


In [7]:
vgg16_trained

VGG(
  (features): Sequential(
    (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

Features:

In [8]:
vgg16_trained.features

Sequential(
  (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [9]:
vgg16_trained.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=512, bias=True)
)

Do the same with the untrained version:

In [10]:
modify_model(vgg16_untrained, 2, 512)

# Settings

Create a dictionary with settings:

In [3]:
criterion = nn.NLLLoss()
params = OrderedDict(
        lr=[0.01, 0.001],
        batch_size=[128, 256],
        patience=[1]
)

Move model to cuda:

In [4]:
use_cuda = torch.cuda.is_available() # True if cuda is available

In [41]:
# if use_cuda:
#     model = model.cuda()

## RunBuilder() functionality:

In [4]:
runs = RunBuilder.get_runs(params)
runs 

[Run(lr=0.01, batch_size=128),
 Run(lr=0.01, batch_size=256),
 Run(lr=0.001, batch_size=128),
 Run(lr=0.001, batch_size=256)]

In [5]:
# example 
run = runs[0]
run

Run(lr=0.01, batch_size=128)

In [6]:
print(run.lr, run.batch_size)

0.01 128


In [7]:
for run in runs:
    print(run, run.lr, run.batch_size)

Run(lr=0.01, batch_size=128) 0.01 128
Run(lr=0.01, batch_size=256) 0.01 256
Run(lr=0.001, batch_size=128) 0.001 128
Run(lr=0.001, batch_size=256) 0.001 256


In [8]:
for run in RunBuilder.get_runs(params):
    # do stuff 
    pass

## RunManager() functionality:

In [5]:
transform = transforms.Compose([transforms.ToTensor()])

dataset = datasets.FashionMNIST(
                    root='./data',
                    train=True,
                    download=True,
                    transform=transform)

train_set, valid_set = torch.utils.data.random_split(dataset, [50000, 10000])

In [6]:
set_seed(0)
network = Tester()
train(5, train_set, valid_set, Tester(), params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.589115,0.78408,0.460994,0.8306,7.279961,7.804944,0.01,128
1,1,2,0.411284,0.85142,0.414401,0.8492,7.298037,15.204984,0.01,128
2,1,3,0.37428,0.86508,0.401429,0.8574,7.274001,22.583943,0.01,128
3,1,4,0.362278,0.86878,0.380946,0.864,7.285668,29.972612,0.01,128
4,1,5,0.340657,0.87564,0.396597,0.8607,7.568052,37.63962,0.01,128
5,2,1,0.33075,0.8817,0.393873,0.8644,6.442004,6.744039,0.01,256
6,2,2,0.306686,0.88924,0.374132,0.8732,6.412036,13.262038,0.01,256
7,2,3,0.305773,0.8892,0.370928,0.8718,6.630085,20.034085,0.01,256
8,2,4,0.300116,0.8908,0.372921,0.8718,6.493041,26.631087,0.01,256
9,3,1,0.255227,0.90546,0.333829,0.8869,7.575001,7.762,0.001,128


In [7]:
set_seed(0)
network = Tester()
train(5, train_set, valid_set, Tester(), params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.589115,0.78408,0.460994,0.8306,7.536038,7.719034,0.01,128
1,1,2,0.411284,0.85142,0.414401,0.8492,7.628034,15.447034,0.01,128
2,1,3,0.37428,0.86508,0.401429,0.8574,7.635037,23.185036,0.01,128
3,1,4,0.362278,0.86878,0.380946,0.864,7.608467,30.89447,0.01,128
4,1,5,0.340657,0.87564,0.396597,0.8607,7.693042,38.69651,0.01,128
5,2,1,0.33075,0.8817,0.393873,0.8644,6.395995,6.701038,0.01,256
6,2,2,0.306686,0.88924,0.374132,0.8732,6.434001,13.237001,0.01,256
7,2,3,0.305773,0.8892,0.370928,0.8718,6.443194,19.7802,0.01,256
8,2,4,0.300116,0.8908,0.372921,0.8718,6.357043,26.23524,0.01,256
9,3,1,0.255227,0.90546,0.333829,0.8869,7.370038,7.552039,0.001,128


In [8]:
set_seed(0)
network = Tester()
train(5, train_set, valid_set, Tester(), params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.589115,0.78408,0.460994,0.8306,7.747185,7.921819,0.01,128
1,1,2,0.411284,0.85142,0.414401,0.8492,7.632035,15.654853,0.01,128
2,1,3,0.37428,0.86508,0.401429,0.8574,7.750005,23.496822,0.01,128
3,1,4,0.362278,0.86878,0.380946,0.864,7.629,31.227815,0.01,128
4,1,5,0.340657,0.87564,0.396597,0.8607,7.593642,38.92646,0.01,128
5,2,1,0.33075,0.8817,0.393873,0.8644,6.467028,6.775038,0.01,256
6,2,2,0.306686,0.88924,0.374132,0.8732,6.480982,13.357982,0.01,256
7,2,3,0.305773,0.8892,0.370928,0.8718,6.403417,19.871399,0.01,256
8,2,4,0.300116,0.8908,0.372921,0.8718,6.124031,26.098393,0.01,256
9,3,1,0.255227,0.90546,0.333829,0.8869,7.138931,7.332999,0.001,128


KeyboardInterrupt: 