# Dependencies

In [1]:
import itertools as it
from collections import OrderedDict
import time
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torchvision.utils 
import torchvision.models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from helpers import *
from models import *
from train import *

set_seed(0) # important

%load_ext autoreload
%autoreload 2

# VGG16

In [2]:
vgg16_trained = torchvision.models.vgg16(pretrained=True)
vgg16_untrained = torchvision.models.vgg16()

In [3]:
def modify_model(model, input_channels, output_units):
    '''
    Parameters
    
    model: instance of a pytorch model to be modified
    input_channels: channels of input tensor
    output_units: number of units in the last layer
    '''
    model.features[0] = nn.Conv2d(input_channels, 64, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
    model.classifier[6] = nn.Linear(4096, output_units)

In [4]:
modify_model(vgg16_trained, 2, 512)

Test with a random input tensor:

In [5]:
x = torch.randn(1, 2, 256, 256) # (256, 256, 3)
output = vgg16_trained(x)
print(output.shape)

torch.Size([1, 512])


In [7]:
vgg16_trained

VGG(
  (features): Sequential(
    (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

Features:

In [8]:
vgg16_trained.features

Sequential(
  (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [9]:
vgg16_trained.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=512, bias=True)
)

Do the same with the untrained version:

In [10]:
modify_model(vgg16_untrained, 2, 512)

# Settings

Create a dictionary with settings:

In [2]:
criterion = nn.NLLLoss()
params = OrderedDict(
        lr=[0.01],#, 0.001],
        batch_size=[128],#, 256],
        patience=[1]
)

Move model to cuda:

In [3]:
use_cuda = torch.cuda.is_available() # True if cuda is available

In [41]:
# if use_cuda:
#     model = model.cuda()

## RunBuilder() functionality:

In [4]:
runs = RunBuilder.get_runs(params)
runs 

[Run(lr=0.01, batch_size=128),
 Run(lr=0.01, batch_size=256),
 Run(lr=0.001, batch_size=128),
 Run(lr=0.001, batch_size=256)]

In [5]:
# example 
run = runs[0]
run

Run(lr=0.01, batch_size=128)

In [6]:
print(run.lr, run.batch_size)

0.01 128


In [7]:
for run in runs:
    print(run, run.lr, run.batch_size)

Run(lr=0.01, batch_size=128) 0.01 128
Run(lr=0.01, batch_size=256) 0.01 256
Run(lr=0.001, batch_size=128) 0.001 128
Run(lr=0.001, batch_size=256) 0.001 256


In [8]:
for run in RunBuilder.get_runs(params):
    # do stuff 
    pass

## RunManager() functionality:

In [4]:
transform = transforms.Compose([transforms.ToTensor()])

dataset = datasets.FashionMNIST(
                    root='./data',
                    train=True,
                    download=True,
                    transform=transform)

train_set, valid_set = torch.utils.data.random_split(dataset, [50000, 10000])

In [8]:
# m = RunManager()
# for run in RunBuilder.get_runs(params):
#     network = Tester().cuda() if use_cuda else Tester()
#     train_loader = torch.utils.data.DataLoader(train_set, num_workers=0, batch_size=run.batch_size, shuffle=True)
#     valid_loader = torch.utils.data.DataLoader(valid_set, num_workers=0, batch_size=run.batch_size, shuffle=True)
#     loaders = OrderedDict(train=train_loader, valid=valid_loader)
#     optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
#     m.begin_run(run, network, loaders, stop_early=True, save_best_model=False)
#     network.train() # keep grads
#     for epoch in range(2):
#         m.begin_epoch()
        
#         # Train
#         for batch_idx, (images, labels) in enumerate(loaders['train']):
            
#             images, labels = images.cuda(), labels.cuda()
#             optimizer.zero_grad()
#             preds = network(images)
#             loss = criterion(preds, labels)
#             loss.backward()
#             optimizer.step()
            
#             m.track_loss(loss, 'train')
#             m.track_num_correct(preds, labels, 'train')
        
#         # Validation
#         network.eval() # skips dropout and batch_norm 
#         for batch_idx, (images, labels) in enumerate(loaders['valid']):

#             images, labels = images.cuda(), labels.cuda()
#             preds = network(images)
#             loss = criterion(preds, labels)

#             m.track_loss(loss, 'valid')
#             m.track_num_correct(preds, labels, 'valid')
            
#         m.end_epoch()
#         if m._get_early_stopping():
#             break
        
#     m.end_run()
    
# m.save_results('results')

In [6]:
network = Tester()

In [5]:
train(1, train_set, valid_set, network, params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.578018,0.78402,0.420826,0.8497,7.216822,7.732822,0.01,128


In [7]:
train(1, train_set, valid_set, network, params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.563706,0.7896,0.451975,0.8383,7.741999,8.260997,0.01,128


In [8]:
train(1, train_set, valid_set, network, params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.427253,0.84558,0.38384,0.8566,7.768963,7.937996,0.01,128


In [9]:
train(1, train_set, valid_set, network, params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.394611,0.85992,0.432699,0.8475,7.681118,7.857155,0.01,128


In [10]:
train(1, train_set, valid_set, network, params)

Unnamed: 0,run,epoch,train loss,train accuracy,valid loss,valid accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.379779,0.86736,0.407654,0.8517,7.839986,8.025997,0.01,128
