# Dependencies

In [3]:
import itertools as it
from collections import OrderedDict
import time
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torchvision.utils 
import torchvision.models as models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from helpers import RunBuilder, RunManager

%load_ext autoreload
%autoreload 2

# VGG16

In [2]:
vgg16_trained = models.vgg16(pretrained=True)
vgg16_untrained = models.vgg16()

In [3]:
def modify_model(model, input_channels, output_units):
    '''
    Parameters
    
    model: instance of a pytorch model to be modified
    input_channels: channels of input tensor
    output_units: number of units in the last layer
    '''
    model.features[0] = nn.Conv2d(input_channels, 64, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
    model.classifier[6] = nn.Linear(4096, output_units)

In [4]:
modify_model(vgg16_trained, 2, 512)

Test with a random input tensor:

In [5]:
x = torch.randn(1, 2, 256, 256) # (256, 256, 3)
output = vgg16_trained(x)
print(output.shape)

torch.Size([1, 512])


In [7]:
vgg16_trained

VGG(
  (features): Sequential(
    (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

Features:

In [8]:
vgg16_trained.features

Sequential(
  (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [9]:
vgg16_trained.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=512, bias=True)
)

Do the same with the untrained version:

In [10]:
modify_model(vgg16_untrained, 2, 512)

# Settings

Create a dictionary with settings:

In [12]:
criterion = nn.NLLLoss()
params = OrderedDict(
        lr=[0.01, 0.001],
        batch_size=[128, 256]#, 512]
)

Move model to cuda:

In [5]:
use_cuda = torch.cuda.is_available() # True if cuda is available

In [41]:
# if use_cuda:
#     model = model.cuda()

## RunBuilder() functionality:

In [4]:
runs = RunBuilder.get_runs(params)
runs 

[Run(lr=0.01, batch_size=10),
 Run(lr=0.01, batch_size=128),
 Run(lr=0.01, batch_size=256),
 Run(lr=0.01, batch_size=512),
 Run(lr=0.001, batch_size=10),
 Run(lr=0.001, batch_size=128),
 Run(lr=0.001, batch_size=256),
 Run(lr=0.001, batch_size=512)]

In [7]:
# example 
run = runs[0]
run

Run(lr=0.01, batch_size=10)

In [8]:
print(run.lr, run.batch_size)

0.01 10


In [9]:
for run in runs:
    print(run, run.lr, run.batch_size)

Run(lr=0.01, batch_size=10) 0.01 10
Run(lr=0.01, batch_size=128) 0.01 128
Run(lr=0.01, batch_size=256) 0.01 256
Run(lr=0.01, batch_size=512) 0.01 512
Run(lr=0.001, batch_size=10) 0.001 10
Run(lr=0.001, batch_size=128) 0.001 128
Run(lr=0.001, batch_size=256) 0.001 256
Run(lr=0.001, batch_size=512) 0.001 512


In [10]:
for run in RunBuilder.get_runs(params):
    # do stuff 
    pass

## RunManager() functionality:

In [6]:
transform = transforms.Compose([transforms.ToTensor()])

dataset = datasets.FashionMNIST(
                    root='./data',
                    train=True,
                    download=True,
                    transform=transform)

train_set, valid_set = torch.utils.data.random_split(dataset, [50000, 10000])

In [14]:
m = RunManager()
for run in RunBuilder.get_runs(params):
    
    network = Classifier().cuda() if use_cuda else Classifier()
    train_loader = torch.utils.data.DataLoader(train_set, num_workers=1, batch_size=run.batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid_set, num_workers=1, batch_size=run.batch_size, shuffle=True)
    loaders = OrderedDict(train=train_loader, valid=valid_loader)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loaders)
    network.train() # keep grads
    for epoch in range(5):
        m.begin_epoch()
        
        # Train
        for batch_idx, (images, labels) in enumerate(loaders['train']):
            
            images, labels = images.cuda(), labels.cuda()
            optimizer.zero_grad()
            preds = network(images)
            loss = criterion(preds, labels)
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss, 'train')
            m.track_num_correct(preds, labels, 'train')
        
        # Validation
        network.eval()
        for batch_idx, (images, labels) in enumerate(loaders['valid']):
            
            images, labels = images.cuda(), labels.cuda()
            preds = network(images)
            loss = criterion(preds, labels)
            
            m.track_loss(loss, 'valid')
            m.track_num_correct(preds, labels, 'valid')
            
        m.end_epoch()
    m.end_run()
m.save_results('results')

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.559358,0.79278,0.500068,0.8389,6.943191,9.17567,0.01,128
1,1,2,0.408982,0.8522,0.416265,0.852,7.270731,16.551406,0.01,128
2,1,3,0.387214,0.85982,0.393767,0.8625,7.123136,23.775504,0.01,128
3,1,4,0.357659,0.87004,0.376747,0.8689,6.945115,30.820619,0.01,128
4,1,5,0.344398,0.8756,0.406553,0.8553,6.959184,37.875805,0.01,128
5,2,1,0.599918,0.78016,0.520792,0.8226,6.7255,9.065559,0.01,256
6,2,2,0.409569,0.85288,0.438792,0.8478,8.216111,17.380623,0.01,256
7,2,3,0.369095,0.86372,0.427583,0.8481,7.819015,25.362638,0.01,256
8,2,4,0.343454,0.87448,0.356983,0.877,7.720002,33.191637,0.01,256
9,2,5,0.325467,0.87994,0.379268,0.8679,7.920476,41.234078,0.01,256


# Test a simple model on Fashion MNIST dataset 

In [7]:
# TODO: Define your network architecture here
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)

        return x
    
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

# Train model

Define a train and test function: