# Transfer Learning

This notebook explores different efforts for transfer learning applied to MNIST and CIFAR with augmented images.

In [1]:
from IPython.display import display
from ipywidgets import FloatProgress

import sys
sys.path.append('../src')

import torch
import neuralnetworkclassifier as nnc
import dataset_manipulations as dm
import perturb as per
import numpy as np
import mlutils as ml
import pickle
import os

import matplotlib.pyplot as plt

In [41]:
Xtrain, Ttrain = dm.load_cifar_10('./cifar-10-batches-py/data_batch_*')
Xtest, Ttest = dm.load_cifar_10('./cifar-10-batches-py/test_batch')

In [42]:
noise_Xtrain = dm.apply_manipulations(Xtrain, per_func=lambda x: per.add_image_noise(x, variance=0.05))
noise_Xtest = dm.apply_manipulations(Xtest, per_func=lambda x: per.add_image_noise(x, variance=0.05))

In [43]:
morenoise_Xtrain = dm.apply_manipulations(Xtrain, per_func=lambda x: per.add_image_noise(x, variance=0.1))
morenoise_Xtest = dm.apply_manipulations(Xtest, per_func=lambda x: per.add_image_noise(x, variance=0.1)) 

In [44]:
lessnoise_Xtrain = dm.apply_manipulations(Xtrain, per_func=lambda x: per.add_image_noise(x, variance=0.025))
lessnoise_Xtest = dm.apply_manipulations(Xtest, per_func=lambda x: per.add_image_noise(x, variance=0.025)) 

In [45]:
import imp
imp.reload(nnc)
imp.reload(per)

<module 'perturb' from '../src/perturb.py'>

## Load before you Train

FYI, if this clean model has already been trained, you should just load it instead of training again!

In [79]:
if os.path.exists('./pretrained_cifar_clean.pkl'):
    with open('./pretrained_cifar_clean.pkl', 'rb') as f:
        nnet = torch.load(f)
        nnet.cuda()
else:
    nnet = per.train_cifar(Xtrain, Ttrain, verbose=True, random_seed=12)

In [80]:
nnet

NeuralNetwork_Convolutional(
                            n_channels_in_image=3,
                            image_size=32,
                            n_units_in_conv_layers=[64, 64, 128, 128, 256, 256, 512, 512],
                            kernels_size_and_stride=[(3, 1, 1), (3, 1, 1), (3, 1, 1), (3, 1, 1), (3, 1, 1), (3, 1, 1), (3, 1, 1), (3, 1, 1)],
                            max_pooling_kernels_and_stride=[(), (2, 2), (), (2, 2), (), (2, 2), (), (2, 2)],
                            n_units_in_fc_hidden_layers=[],
                            classes=[0 1 2 3 4 5 6 7 8 9],
                            use_gpu=True)
Sequential(
  (conv_0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (norm_0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output_0): ReLU()
  (conv_1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [78]:
if not os.path.exists('./pretrained_cifar_clean.pkl'):
    with open('./pretrained_cifar_clean.pkl', 'wb') as f:
        torch.save(nnet, f)

In [71]:
clean_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, Xtest, 100))
noise_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, noise_Xtest, 100))
lessnoise_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, lessnoise_Xtest, 100))
morenoise_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, morenoise_Xtest, 100))
(clean_pct, noise_pct, lessnoise_pct, morenoise_pct)

(86.92, 66.69, 83.72, 28.48)

In [72]:
nnet.nnet[-1].in_features

2048

In [73]:
other_network = nnet.transfer_learn_setup([torch.nn.Linear(2048, 256), torch.nn.ReLU(), torch.nn.Linear(256, 512), torch.nn.ReLU(), torch.nn.Linear(512, 10)])
other_network

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU()
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Dropout(p=0.2, inplace=False)
  (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (13): ReLU()
  (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (15): Dropout(p=0.2, inplace=False)
  (16): Conv2d(128, 256, kernel_size=(3, 3),

In [36]:
transfer_network = nnet.transfer_learn_setup([torch.nn.Linear(2048, 256), torch.nn.ReLU(), torch.nn.Linear(256, 10)])
transfer_network

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU()
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Dropout(p=0.2, inplace=False)
  (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (13): ReLU()
  (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (15): Dropout(p=0.2, inplace=False)
  (16): Conv2d(128, 256, kernel_size=(3, 3),

In [74]:
nnet.nnet = other_network

In [75]:
nnet.train(morenoise_Xtrain, Ttrain, n_epochs=10, batch_size=200,
           optim='Adam', learning_rate=0.0005, verbose=True)

Epoch 1 error 0.70836
Epoch 2 error 0.63564
Epoch 3 error 0.59420
Epoch 4 error 0.56575
Epoch 5 error 0.54048
Epoch 6 error 0.52803
Epoch 7 error 0.50638
Epoch 8 error 0.49487
Epoch 9 error 0.48905
Epoch 10 error 0.47610


In [55]:
clean_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, Xtest, 100))
noise_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, noise_Xtest, 100))
lessnoise_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, lessnoise_Xtest, 100))
morenoise_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, morenoise_Xtest, 100))
(clean_pct, noise_pct, lessnoise_pct, morenoise_pct)

(84.15, 81.85, 85.06, 53.16)

In [76]:
clean_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, Xtest, 100))
noise_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, noise_Xtest, 100))
lessnoise_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, lessnoise_Xtest, 100))
morenoise_pct = ml.percent_correct(Ttest, ml.batched_use(nnet, morenoise_Xtest, 100))
(clean_pct, noise_pct, lessnoise_pct, morenoise_pct)

(79.75, 78.13, 80.30000000000001, 70.24000000000001)