# Dependencies

In [7]:
import itertools as it
import time
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision.utils 
import torchvision.models as models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

# VGG16

In [2]:
vgg16_trained = models.vgg16(pretrained=True)
vgg16_untrained = models.vgg16()

In [3]:
def modify_model(model, input_channels, output_units):
    '''
    Parameters
    
    model: instance of a pytorch model to be modified
    input_channels: channels of input tensor
    output_units: number of units in the last layer
    '''
    model.features[0] = nn.Conv2d(input_channels, 64, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, bias=True)
    model.classifier[6] = nn.Linear(4096, output_units)

In [4]:
modify_model(vgg16_trained, 2, 512)

Test with a random input tensor:

In [5]:
x = torch.randn(1, 2, 256, 256) # (256, 256, 3)
output = vgg16_trained(x)
print(output.shape)

torch.Size([1, 512])


In [7]:
vgg16_trained

VGG(
  (features): Sequential(
    (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

Features:

In [8]:
vgg16_trained.features

Sequential(
  (0): Conv2d(2, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [9]:
vgg16_trained.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=512, bias=True)
)

Do the same with the untrained version:

In [10]:
modify_model(vgg16_untrained, 2, 512)

# Settings

Create a dictionary with settings:

In [108]:
# settings = {
#                 'criterion': nn.MSELoss,
#                 'optimizer': optim.Adam,
#                 'lr': 0.001
#            }

In [3]:
use_cuda = torch.cuda.is_available() # True if cuda is available

Move model to cuda:

In [41]:
# if use_cuda:
#     model = model.cuda()

# Test a simple model on Fashion MNIST dataset and setup tensorboard 

In [4]:
# TODO: Define your network architecture here
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)

        return x
    
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [5]:
train_set = datasets.FashionMNIST(
            root='./data',
            train=True,
            download=True,
            transform=transforms.Compose([transforms.ToTensor()]))

In [9]:
# train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

Test tensorboard setup:

In [11]:
criterion = nn.NLLLoss()
parameters = dict(
    lr_list=[0.01, 0.001],
    batch_size_list=[10, 128, 256, 512]
    )

In [12]:
for (lr, batch_size) in it.product(*(parameter for parameter in parameters.values())):
    
    print('\nlr={}, batch_size={}:'.format(lr, batch_size))
    
    # initialize model
    model = Classifier().cuda() if use_cuda else Classifier()
    
    # create loaders
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    
    # initialize tensorboard
    experiment = f' batch_size={batch_size} lr={lr}'
    tb = SummaryWriter(comment=experiment) # tensorboard instance
    images, labels = next(iter(train_loader))
    images, labels = images.cuda(), labels.cuda()
    grid = torchvision.utils.make_grid(images).cuda() # create a grid of images
    
    # add some graphs
    tb.add_image('images', grid) # add batch of 100 images to tensorboard
    tb.add_graph(model, images) # visualization of network inside tensorboard
    
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(10):
        train_loss = 0
        train_correct = 0

        # train 
        model.train() # keeps the gradients
        for batch_idx, (data, target) in enumerate(train_loader):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            optimizer.zero_grad() # reset gradients after each batch
            output = model(data) # make prediction
            loss = criterion(output, target) # calculate MSE
            loss.backward() # backpropagation
            optimizer.step() # update weights
            train_loss += loss.data * batch_size # rescale back to compare different batch sizes
            train_correct += get_num_correct(output, target)

        accuracy = train_correct / len(train_set)
        tb.add_scalar('Loss', train_loss, epoch)
        tb.add_scalar('Number Correct', train_correct, epoch)
        tb.add_scalar('Accuracy', accuracy, epoch)

        tb.add_histogram('fc1.bias', model.fc1.bias, epoch)
        tb.add_histogram('fc1.weight', model.fc1.weight, epoch)
        tb.add_histogram(
            'fc1.weight.grad'
            ,model.fc1.weight.grad
            ,epoch
        )

        print('\tEpoch {}, Training Loss: {:.6f}, Accuracy: {:.4f}%'.format(epoch + 1, train_loss, accuracy * 100))

    tb.close()


lr=0.01, batch_size=10:
	Epoch 1, Training Loss: 39341.925781, Accuracy: 76.2967%
	Epoch 2, Training Loss: 32931.246094, Accuracy: 80.8117%
	Epoch 3, Training Loss: 33065.984375, Accuracy: 81.2100%
	Epoch 4, Training Loss: 31483.708984, Accuracy: 82.2083%
	Epoch 5, Training Loss: 32361.390625, Accuracy: 81.9967%
	Epoch 6, Training Loss: 33995.960938, Accuracy: 80.9950%
	Epoch 7, Training Loss: 34648.332031, Accuracy: 80.9800%
	Epoch 8, Training Loss: 32875.812500, Accuracy: 81.2833%
	Epoch 9, Training Loss: 32861.453125, Accuracy: 81.9000%
	Epoch 10, Training Loss: 32138.419922, Accuracy: 82.3583%

lr=0.01, batch_size=128:
	Epoch 1, Training Loss: 32581.386719, Accuracy: 80.1950%
	Epoch 2, Training Loss: 23812.414062, Accuracy: 85.7583%
	Epoch 3, Training Loss: 22048.087891, Accuracy: 86.8033%
	Epoch 4, Training Loss: 21340.814453, Accuracy: 87.1100%
	Epoch 5, Training Loss: 20188.761719, Accuracy: 87.9833%
	Epoch 6, Training Loss: 19922.248047, Accuracy: 88.0533%
	Epoch 7, Training L

# Train model

Define a train and test function:

In [21]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    '''
    Returns
    
    The trained model
    '''
    valid_loss_min = np.Inf
    
    for epoch in range(1, num_epochs + 1):
        start_time = time.time()
        train_loss = 0.
        valid_loss = 0.
        
        # train 
        model.train() # keeps the gradients
        for batch_idx, (data, target) in enumerate(loaders['train']):
            if use_cuda:
                data, target = data.torch(), target.cuda()
            optimizer = zero_grad() # reset gradients after each batch
            output = model(data) # make prediction
            loss = criterion(output, target) # calculate MSE
            loss.backward() # backpropagation
            optimizer.step() # update weights
            mean_train_error = loss.data()
            train_loss += ((1 / (batch_idx + 1))) * (loss.data() - train_loss)
            
        # validation
        model.eval() # no need to keep grads
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            output = model(data) # make prediction
            loss = criterion(output, target) # calculate MSE
            mean_valid_error = loss.data()
            valid_loss += ((1 / (batch_idx + 1))) * (loss.data() - valid_loss)
            
        print('Epoch {}, Training Loss: {:.6f}, Mean Phase Error: {:.6f}, Validation Loss: {:.6f}, Mean Phase Error: {:.6f}'.format(train_loss, 
                                                                                                                                    mean_train_error,
                                                                                                                                    valid_loss,
                                                                                                                                    mean_valid_error))
        print('Computation time: {:.4f} sec'.format(time.time() - start_time))
        
        if valid_loss < valid_loss_min:
            print('Validation loss has decreased from {:.6f} -> {:.6f}'.format(valid_loss_min, valid_loss))
            valid_loss_min = valid_loss
            torch.save(model_state_dict(), model)
            
    return model