# Dependencies

In [1]:
import itertools as it
from collections import OrderedDict
import time
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torchvision.utils 
import torchvision.models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from helpers import *
from models import *
from train import *

set_seed(0) # important

%load_ext autoreload
%autoreload 2

# VGG16

In [2]:
vgg16_trained = torchvision.models.vgg16(pretrained=True)
vgg16_untrained = torchvision.models.vgg16()

In [3]:
def modify_model(model, input_channels, output_units):
    '''
    Parameters
    
    model: instance of a pytorch model to be modified
    input_channels: channels of input tensor
    output_units: number of units in the last layer
    '''
    model.features[0] = nn.Conv2d(input_channels, 64, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
    model.classifier[6] = nn.Linear(4096, output_units)

In [4]:
modify_model(vgg16_trained, 2, 512)

Test with a random input tensor:

In [5]:
x = torch.randn(1, 2, 256, 256) # (256, 256, 3)
output = vgg16_trained(x)
print(output.shape)

torch.Size([1, 512])


In [7]:
vgg16_trained

VGG(
  (features): Sequential(
    (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

Features:

In [8]:
vgg16_trained.features

Sequential(
  (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [9]:
vgg16_trained.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=512, bias=True)
)

Do the same with the untrained version:

In [10]:
modify_model(vgg16_untrained, 2, 512)

# Settings

Create a dictionary with settings:

In [4]:
criterion = nn.NLLLoss()
params = OrderedDict(
        lr=[0.01, 0.001],
        batch_size=[128, 256],
        patience=[1]
)

Move model to cuda:

In [3]:
use_cuda = torch.cuda.is_available() # True if cuda is available

In [41]:
# if use_cuda:
#     model = model.cuda()

## RunBuilder() functionality:

In [4]:
runs = RunBuilder.get_runs(params)
runs 

[Run(lr=0.01, batch_size=128),
 Run(lr=0.01, batch_size=256),
 Run(lr=0.001, batch_size=128),
 Run(lr=0.001, batch_size=256)]

In [5]:
# example 
run = runs[0]
run

Run(lr=0.01, batch_size=128)

In [6]:
print(run.lr, run.batch_size)

0.01 128


In [7]:
for run in runs:
    print(run, run.lr, run.batch_size)

Run(lr=0.01, batch_size=128) 0.01 128
Run(lr=0.01, batch_size=256) 0.01 256
Run(lr=0.001, batch_size=128) 0.001 128
Run(lr=0.001, batch_size=256) 0.001 256


In [8]:
for run in RunBuilder.get_runs(params):
    # do stuff 
    pass

## RunManager() functionality:

In [6]:
transform = transforms.Compose([transforms.ToTensor()])

dataset = datasets.FashionMNIST(
                    root='./data',
                    train=True,
                    download=True,
                    transform=transform)

train_set, valid_set = torch.utils.data.random_split(dataset, [50000, 10000])

In [6]:
m = RunManager()
for run in RunBuilder.get_runs(params):
    network = Tester().cuda() if use_cuda else Tester()
    train_loader = torch.utils.data.DataLoader(train_set, num_workers=0, batch_size=run.batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid_set, num_workers=0, batch_size=run.batch_size, shuffle=True)
    loaders = OrderedDict(train=train_loader, valid=valid_loader)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loaders, stop_early=True, save_best_model=False)
    network.train() # keep grads
    for epoch in range(5):
        m.begin_epoch()
        
        # Train
        for batch_idx, (images, labels) in enumerate(loaders['train']):
            
            images, labels = images.cuda(), labels.cuda()
            optimizer.zero_grad()
            preds = network(images)
            loss = criterion(preds, labels)
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss, 'train')
            m.track_num_correct(preds, labels, 'train')
        
        # Validation
        network.eval() # skips dropout and batch_norm 
        for batch_idx, (images, labels) in enumerate(loaders['valid']):

            images, labels = images.cuda(), labels.cuda()
            preds = network(images)
            loss = criterion(preds, labels)

            m.track_loss(loss, 'valid')
            m.track_num_correct(preds, labels, 'valid')
            
        m.end_epoch()
        if m._get_early_stopping():
            break
        
    m.end_run()
    
m.save_results('results')

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size,patience
0,1,1,0.566757,0.79274,0.420409,0.8486,10.911892,11.1494,0.01,128,1
1,1,2,0.406573,0.85468,0.435747,0.8424,10.433193,21.797973,0.01,128,1
2,2,1,0.620359,0.76804,0.511703,0.8248,8.098631,8.580635,0.01,256,1
3,2,2,0.405826,0.8526,0.389906,0.8582,7.992383,16.766012,0.01,256,1
4,2,3,0.361857,0.8672,0.366714,0.8642,8.411379,25.376378,0.01,256,1
5,2,4,0.345572,0.87376,0.366822,0.8698,8.129065,33.706468,0.01,256,1
6,3,1,0.642201,0.77724,0.472499,0.8341,11.062692,11.34593,0.001,128,1
7,3,2,0.414406,0.85356,0.4051,0.858,10.991151,22.598069,0.001,128,1
8,3,3,0.367667,0.86694,0.393372,0.8597,10.765774,33.591837,0.001,128,1


KeyboardInterrupt: 

In [7]:
train(5, train_set, valid_set, Tester(), params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.563706,0.7896,0.451975,0.8383,10.005036,10.789036,0.01,128
1,1,2,0.410346,0.85158,0.421728,0.8462,11.065007,22.094037,0.01,128
2,1,3,0.375602,0.86336,0.371908,0.8653,10.583821,32.889851,0.01,128
3,1,4,0.357481,0.86948,0.361633,0.8694,10.513258,43.602103,0.01,128
4,1,5,0.338986,0.87558,0.410348,0.8538,11.606247,55.388344,0.01,128
5,2,1,0.321635,0.88432,0.35507,0.8733,8.211949,8.701956,0.01,256
6,2,2,0.303294,0.88896,0.37441,0.8711,8.528903,17.447852,0.01,256
7,3,1,0.250761,0.90472,0.331485,0.8859,11.549921,11.815923,0.001,128
8,3,2,0.23977,0.90848,0.327325,0.8882,10.818786,22.820709,0.001,128
9,3,3,0.23231,0.91138,0.332776,0.887,10.374007,33.379709,0.001,128


In [8]:
train(5, train_set, valid_set, Tester(), params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.566757,0.79274,0.420409,0.8486,9.732938,10.01,0.01,128
1,1,2,0.406573,0.85468,0.435747,0.8424,10.004054,20.223058,0.01,128
2,2,1,0.366097,0.86704,0.376634,0.8643,7.768,8.185004,0.01,256
3,2,2,0.338971,0.87756,0.380391,0.8638,7.206,15.572944,0.01,256
4,3,1,0.28177,0.89426,0.332029,0.8836,9.949938,10.200008,0.001,128
5,3,2,0.268156,0.89898,0.325463,0.8859,9.605062,20.005071,0.001,128
6,3,3,0.259982,0.90196,0.328347,0.8854,9.947004,30.124006,0.001,128
7,4,1,0.25001,0.90518,0.339482,0.887,7.455935,7.881999,0.001,256
8,4,2,0.243001,0.90714,0.342818,0.8862,7.239057,15.342057,0.001,256
