https://arxiv.org/pdf/1409.1556.pdf


### Notes
- architecture:
    - Conv layers with 3x3 kernel, stride 1 and padding 1 to keep resolution
    - Max-pooling is performed over a 2 × 2 pixel window, with stride 2.
    - Conv layers are followed by 3 fc layers 4096-4096-1000
    - No Local Reponse Normalization
- Pre-processing:
    - Subtracting the mean RGB value, computed on the training set, from each pixel.
- Training
    - mini-batch gradient descent
        - b_size = 256
        - momentum = 0.9
        - regulerized weight_decay = 0.0005
        - dropout (p=0.5) to the first two fc layers.
        - learning rate - initialized to 0.01 and then decreased by a factor of 10 when the validation set accuracy stopped improving. In total, the learning rate was decreased 3 times
    - Initialization
- Evaluation
    - "we observe that the classification error decreases with the increased ConvNet depth: from 11 layers in A to 19 layers in E"
    

In [1]:
import os
import re
import sys
import glob
import numpy as np
import matplotlib.pyplot as plt
import unittest
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as T

%matplotlib inline
%load_ext autoreload
%autoreload 2

seed = 42
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'
plt.rcParams.update({'font.size': 12})
test = unittest.TestCase()

In [2]:
data_dir = os.path.expanduser('~/.pytorch-datasets')

# resizing only to fit sizes of original paper
transform = T.Compose([
    T.Resize((224,224)),
    T.ToTensor()
])

# ds_train = torchvision.datasets.ImageNet(root=data_dir, download=True, train=True, transform=tvtf.ToTensor())
ds_train = torchvision.datasets.CIFAR10(root=data_dir, download=True, train=True, transform=transform)
ds_test = torchvision.datasets.CIFAR10(root=data_dir, download=True, train=False, transform=transform)

print(f'Train: {len(ds_train)} samples')
print(f'Test: {len(ds_test)} samples')

x0,_ = ds_train[0]
in_size = x0.shape
num_classes = 10
print('input image size =', in_size)

Files already downloaded and verified
Files already downloaded and verified
Train: 50000 samples
Test: 10000 samples
input image size = torch.Size([3, 224, 224])


In [3]:
import utils.training as training
import vgg
torch.manual_seed(seed)

# Define a tiny part of the CIFAR-10 dataset to overfit it
batch_size = 256
max_batches = 25
dl_train = torch.utils.data.DataLoader(ds_train, batch_size, shuffle=False)
dl_test = torch.utils.data.DataLoader(ds_test, batch_size, shuffle=False)

# Create model, loss and optimizer instances
model = vgg.VGG11(in_size=(3,224,224), out_classes=10)

In [4]:
# weights initialization
model = model.apply(vgg.weights_init)
# conv_2_4_5 = [4,11,14]
# for i in conv_2_4_5:
#     model.feature_extractor[i].bias.data.fill_(1)

In [5]:
# print amount of parameters
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{pytorch_total_params:,}") 

128,807,306


In [6]:
from utils.train_results import FitResult

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005)

# Use TorchTrainer to run only the training loop a few times.
trainer = training.TorchTrainer(model, loss_fn, optimizer, device)
best_acc = 0
# for i in range(30):
#     res = trainer.train_epoch(dl_train, max_batches=max_batches, verbose=(i%2==0))
#     best_acc = res.accuracy if res.accuracy > best_acc else best_acc
    
    
fit_res = trainer.fit(dl_train, dl_test, num_epochs=30, checkpoints=None, early_stopping=3, max_batches=25)
epo, loss_tr, acc_tr, loss_te, acc_te = fit_res
fit_res = FitResult(epo, [loss.item() for loss in loss_tr], acc_tr, [loss.item() for loss in loss_te], acc_te)

--- EPOCH 1/30 ---
train_batch:   0%|                                                                                                                                                                                                    | 0/25 [01:05<?, ?it/s]


KeyboardInterrupt: 