In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from tqdm import tqdm

In [2]:
from model1 import Net
from train_test_loop import *
from data_utils import *

In [4]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f1e45257550>

In [6]:
BATCH_SIZE=128
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
              ReLU-2           [-1, 16, 26, 26]               0
       BatchNorm2d-3           [-1, 16, 26, 26]              32
           Dropout-4           [-1, 16, 26, 26]               0
            Conv2d-5           [-1, 32, 24, 24]           4,608
              ReLU-6           [-1, 32, 24, 24]               0
       BatchNorm2d-7           [-1, 32, 24, 24]              64
           Dropout-8           [-1, 32, 24, 24]               0
            Conv2d-9           [-1, 10, 24, 24]             320
        MaxPool2d-10           [-1, 10, 12, 12]               0
           Conv2d-11           [-1, 16, 10, 10]           1,440
             ReLU-12           [-1, 16, 10, 10]               0
      BatchNorm2d-13           [-1, 16, 10, 10]              32
          Dropout-14           [-1, 16,

In [9]:
default_train_transforms = transforms.Compose([
                                        transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))]) #None
default_test_transforms = transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]) #None

In [10]:
train_dataloader, test_dataloader = get_augmented_MNIST_dataset("~/work/data/", 
                                    train_tfms=default_train_transforms, test_tfms=default_test_transforms,
                                    batch_sz=BATCH_SIZE)

In [12]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = get_schedulder(optimizer, "StepLR", step_size=6, gamma=0.5)

In [13]:
EPOCHS=15
for i in range(EPOCHS):
    print("Running EPOCH: "+str(i+1))
    train(model, device, train_dataloader, optimizer=optimizer, epoch=i)
    test(model, device, test_dataloader)

Running EPOCH: 1


Loss=0.119404636323452 Batch_id=468 Accuracy=86.80: 100%|██████████| 469/469 [00:01<00:00, 281.35it/s]  



Test set: Average loss: 0.0655, Accuracy: 9809/10000 (98.09%)

Running EPOCH: 2


Loss=0.09325933456420898 Batch_id=468 Accuracy=97.68: 100%|██████████| 469/469 [00:01<00:00, 304.56it/s] 



Test set: Average loss: 0.0465, Accuracy: 9858/10000 (98.58%)

Running EPOCH: 3


Loss=0.045991700142621994 Batch_id=468 Accuracy=98.20: 100%|██████████| 469/469 [00:01<00:00, 298.68it/s]



Test set: Average loss: 0.0367, Accuracy: 9886/10000 (98.86%)

Running EPOCH: 4


Loss=0.03604603558778763 Batch_id=468 Accuracy=98.50: 100%|██████████| 469/469 [00:01<00:00, 293.03it/s]  



Test set: Average loss: 0.0355, Accuracy: 9887/10000 (98.87%)

Running EPOCH: 5


Loss=0.0760471299290657 Batch_id=468 Accuracy=98.56: 100%|██████████| 469/469 [00:01<00:00, 300.48it/s]   



Test set: Average loss: 0.0269, Accuracy: 9916/10000 (99.16%)

Running EPOCH: 6


Loss=0.01415489986538887 Batch_id=468 Accuracy=98.61: 100%|██████████| 469/469 [00:01<00:00, 304.48it/s]  



Test set: Average loss: 0.0288, Accuracy: 9906/10000 (99.06%)

Running EPOCH: 7


Loss=0.054149847477674484 Batch_id=468 Accuracy=98.79: 100%|██████████| 469/469 [00:01<00:00, 305.41it/s] 



Test set: Average loss: 0.0259, Accuracy: 9921/10000 (99.21%)

Running EPOCH: 8


Loss=0.04851537570357323 Batch_id=468 Accuracy=98.83: 100%|██████████| 469/469 [00:01<00:00, 293.50it/s] 



Test set: Average loss: 0.0256, Accuracy: 9925/10000 (99.25%)

Running EPOCH: 9


Loss=0.019215712323784828 Batch_id=468 Accuracy=98.87: 100%|██████████| 469/469 [00:01<00:00, 305.26it/s] 



Test set: Average loss: 0.0273, Accuracy: 9915/10000 (99.15%)

Running EPOCH: 10


Loss=0.010558630339801311 Batch_id=468 Accuracy=98.86: 100%|██████████| 469/469 [00:01<00:00, 296.18it/s] 



Test set: Average loss: 0.0218, Accuracy: 9927/10000 (99.27%)

Running EPOCH: 11


Loss=0.005652337800711393 Batch_id=468 Accuracy=98.92: 100%|██████████| 469/469 [00:01<00:00, 300.60it/s] 



Test set: Average loss: 0.0216, Accuracy: 9931/10000 (99.31%)

Running EPOCH: 12


Loss=0.06214965879917145 Batch_id=468 Accuracy=98.94: 100%|██████████| 469/469 [00:01<00:00, 301.26it/s]  



Test set: Average loss: 0.0215, Accuracy: 9931/10000 (99.31%)

Running EPOCH: 13


Loss=0.10331372171640396 Batch_id=468 Accuracy=98.94: 100%|██████████| 469/469 [00:01<00:00, 297.39it/s]  



Test set: Average loss: 0.0200, Accuracy: 9944/10000 (99.44%)

Running EPOCH: 14


Loss=0.017529616132378578 Batch_id=468 Accuracy=98.99: 100%|██████████| 469/469 [00:01<00:00, 304.89it/s] 



Test set: Average loss: 0.0191, Accuracy: 9940/10000 (99.40%)

Running EPOCH: 15


Loss=0.014216586016118526 Batch_id=468 Accuracy=98.98: 100%|██████████| 469/469 [00:01<00:00, 303.24it/s] 



Test set: Average loss: 0.0241, Accuracy: 9927/10000 (99.27%)



#### Target: 

Check the accuracy and size of the model discussed in class

#### Result: 

99.44% accuracy but not consistent, model size 13808

#### Analysis: 

Step 1 is a naive code taken directly from the class, just to see how well the CNN performs, without paying attention to the model size. We just take the MNIST data and convert to Tensor and normalize it and added random rotation from -7 degrees to +7 degrees. It has close to 13,808 model params which are 1.73X our ideal size, but we get a max test accuracy of 99.44 in 13th epoch (but dips down by 15th epoch). The basic backbone is input block -> 1 conv blocks (conv + ReLU) -> transition block (2x2 maxpool) -> 4 conv blocks (conv + ReLU) -> GAP layer -> final conv2d layer. We set the LR to 0.01 with SGD, and a StepLR scheduler with step size 6 and gamma 0.5.