In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from tqdm import tqdm

In [2]:
from model3 import Net
from train_test_loop import *
from data_utils import *

In [3]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f569b75f530>

In [4]:
BATCH_SIZE=128
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
              ReLU-2            [-1, 8, 26, 26]               0
       BatchNorm2d-3            [-1, 8, 26, 26]              16
           Dropout-4            [-1, 8, 26, 26]               0
            Conv2d-5           [-1, 16, 24, 24]           1,152
              ReLU-6           [-1, 16, 24, 24]               0
       BatchNorm2d-7           [-1, 16, 24, 24]              32
           Dropout-8           [-1, 16, 24, 24]               0
            Conv2d-9           [-1, 10, 24, 24]             160
        MaxPool2d-10           [-1, 10, 12, 12]               0
           Conv2d-11           [-1, 16, 10, 10]           1,440
             ReLU-12           [-1, 16, 10, 10]               0
      BatchNorm2d-13           [-1, 16, 10, 10]              32
          Dropout-14           [-1, 16,

In [12]:
default_train_transforms = transforms.Compose([
                                       transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                    transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))]) #None
default_test_transforms = transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]) #None

In [13]:
train_dataloader, test_dataloader = get_augmented_MNIST_dataset("~/work/data/", 
                                    train_tfms=default_train_transforms, test_tfms=default_test_transforms,
                                    batch_sz=BATCH_SIZE)

In [16]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9)
scheduler = get_schedulder(optimizer, "StepLR", step_size=8, gamma=0.20)

In [17]:
EPOCHS=15
for i in range(EPOCHS):
    print("Running EPOCH: "+str(i+1))
    train(model, device, train_dataloader, optimizer=optimizer, epoch=i)
    test(model, device, test_dataloader)

Running EPOCH: 1


Loss=0.0029229503124952316 Batch_id=468 Accuracy=99.12: 100%|██████████| 469/469 [00:02<00:00, 158.72it/s]



Test set: Average loss: 0.0216, Accuracy: 9933/10000 (99.33%)

Running EPOCH: 2


Loss=0.011236213147640228 Batch_id=468 Accuracy=99.19: 100%|██████████| 469/469 [00:02<00:00, 159.12it/s] 



Test set: Average loss: 0.0203, Accuracy: 9937/10000 (99.37%)

Running EPOCH: 3


Loss=0.023615345358848572 Batch_id=468 Accuracy=99.19: 100%|██████████| 469/469 [00:02<00:00, 161.13it/s] 



Test set: Average loss: 0.0220, Accuracy: 9929/10000 (99.29%)

Running EPOCH: 4


Loss=0.021831637248396873 Batch_id=468 Accuracy=99.14: 100%|██████████| 469/469 [00:02<00:00, 164.17it/s] 



Test set: Average loss: 0.0186, Accuracy: 9942/10000 (99.42%)

Running EPOCH: 5


Loss=0.0024384830612689257 Batch_id=468 Accuracy=99.19: 100%|██████████| 469/469 [00:02<00:00, 160.82it/s]



Test set: Average loss: 0.0241, Accuracy: 9930/10000 (99.30%)

Running EPOCH: 6


Loss=0.018069881945848465 Batch_id=468 Accuracy=99.22: 100%|██████████| 469/469 [00:02<00:00, 160.37it/s] 



Test set: Average loss: 0.0189, Accuracy: 9944/10000 (99.44%)

Running EPOCH: 7


Loss=0.009719417430460453 Batch_id=468 Accuracy=99.20: 100%|██████████| 469/469 [00:02<00:00, 160.93it/s] 



Test set: Average loss: 0.0215, Accuracy: 9931/10000 (99.31%)

Running EPOCH: 8


Loss=0.032488591969013214 Batch_id=468 Accuracy=99.21: 100%|██████████| 469/469 [00:02<00:00, 159.50it/s] 



Test set: Average loss: 0.0211, Accuracy: 9937/10000 (99.37%)

Running EPOCH: 9


Loss=0.03466048464179039 Batch_id=468 Accuracy=99.19: 100%|██████████| 469/469 [00:02<00:00, 158.92it/s]  



Test set: Average loss: 0.0203, Accuracy: 9939/10000 (99.39%)

Running EPOCH: 10


Loss=0.013729765079915524 Batch_id=468 Accuracy=99.19: 100%|██████████| 469/469 [00:02<00:00, 162.57it/s] 



Test set: Average loss: 0.0205, Accuracy: 9941/10000 (99.41%)

Running EPOCH: 11


Loss=0.05354632809758186 Batch_id=468 Accuracy=99.17: 100%|██████████| 469/469 [00:02<00:00, 160.97it/s]  



Test set: Average loss: 0.0190, Accuracy: 9944/10000 (99.44%)

Running EPOCH: 12


Loss=0.060401156544685364 Batch_id=468 Accuracy=99.24: 100%|██████████| 469/469 [00:02<00:00, 159.94it/s] 



Test set: Average loss: 0.0199, Accuracy: 9938/10000 (99.38%)

Running EPOCH: 13


Loss=0.03345354273915291 Batch_id=468 Accuracy=99.23: 100%|██████████| 469/469 [00:02<00:00, 159.69it/s]  



Test set: Average loss: 0.0196, Accuracy: 9943/10000 (99.43%)

Running EPOCH: 14


Loss=0.008826501667499542 Batch_id=468 Accuracy=99.24: 100%|██████████| 469/469 [00:02<00:00, 158.82it/s] 



Test set: Average loss: 0.0193, Accuracy: 9947/10000 (99.47%)

Running EPOCH: 15


Loss=0.06246912479400635 Batch_id=468 Accuracy=99.19: 100%|██████████| 469/469 [00:02<00:00, 160.97it/s]  



Test set: Average loss: 0.0180, Accuracy: 9945/10000 (99.45%)



#### Target:

Get a consistent accuracy of 99.4% on the test set

#### Result:

The accuracy on the test set for the last few epochs is above 99.4%

#### Analysis: 

We added some augmentations in  addition to random rotation of 7degrees, _i.e._ ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1) following the example.  I have tweaked the LR and the scheduler. I changed the LR to 0.05 and the shceduler with step size 8 and gamma 0.2 i.e. every 8 steps the LR is multipled the gamma (0.2). This follows the thought in step-2 that we should increase step size or decrease gamma (or both) to get a more stable train/test error rates. If we check the training logs, we see that the test accuracy is consistently above 99% and for the last 3 epochs it is consistently above 99.4% (99.43, 99.47, 99.45)