In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                       transforms.RandomRotation((-5.0, 5.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values.
                                       # Note the difference between (0.1307) and (0.1307,)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])


In [3]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 223060021.27it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 39573895.40it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 71489470.71it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 23007884.99it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
from utils import is_cuda_available

SEED = 1

cuda = is_cuda_available()
print("CUDA Available?", cuda)

torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

dataloader_args = dict(shuffle=True, batch_size=64, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True




In [9]:

!pip install torchsummary
from model import Model_2, model_summary

use_cuda = is_cuda_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Model_2().to(device)
model_summary(model, input_size=(1, 28, 28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
              ReLU-2            [-1, 8, 26, 26]               0
       BatchNorm2d-3            [-1, 8, 26, 26]              16
           Dropout-4            [-1, 8, 26, 26]               0
            Conv2d-5           [-1, 16, 24, 24]           1,152
              ReLU-6           [-1, 16, 24, 24]               0
       BatchNorm2d-7           [-1, 16, 24, 24]              32
           Dropout-8           [-1, 16, 24, 24]               0
            Conv2d-9           [-1, 10, 24, 24]             160
        MaxPool2d-10           [-1, 10, 12, 12]               0
           Conv2d-11           [-1, 16, 10, 10]           1,440
             ReLU-12           [-1, 16, 10, 10]               0

In [10]:
from model import model_train, model_test

In [11]:
from torch.optim.lr_scheduler import StepLR

model =  Model_2().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.015, momentum=0.9)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
EPOCHS = 15
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    model_train(model, device, train_loader, optimizer, epoch)
    scheduler.step()
    model_test(model, device, test_loader)

EPOCH: 0


Loss=0.08024477958679199 Batch_id=937 Accuracy=90.27: 100%|██████████| 938/938 [00:30<00:00, 30.60it/s]



Test set: Average loss: 0.0544, Accuracy: 9840/10000 (98.40%)

EPOCH: 1


Loss=0.017837243154644966 Batch_id=937 Accuracy=97.47: 100%|██████████| 938/938 [00:28<00:00, 32.42it/s]



Test set: Average loss: 0.0361, Accuracy: 9893/10000 (98.93%)

EPOCH: 2


Loss=0.09937196969985962 Batch_id=937 Accuracy=98.00: 100%|██████████| 938/938 [00:27<00:00, 34.56it/s]



Test set: Average loss: 0.0464, Accuracy: 9853/10000 (98.53%)

EPOCH: 3


Loss=0.0073743946850299835 Batch_id=937 Accuracy=98.26: 100%|██████████| 938/938 [00:26<00:00, 35.61it/s]



Test set: Average loss: 0.0318, Accuracy: 9901/10000 (99.01%)

EPOCH: 4


Loss=0.06977936625480652 Batch_id=937 Accuracy=98.42: 100%|██████████| 938/938 [00:26<00:00, 35.57it/s]



Test set: Average loss: 0.0300, Accuracy: 9910/10000 (99.10%)

EPOCH: 5


Loss=0.2841092646121979 Batch_id=937 Accuracy=98.78: 100%|██████████| 938/938 [00:26<00:00, 35.53it/s]



Test set: Average loss: 0.0212, Accuracy: 9942/10000 (99.42%)

EPOCH: 6


Loss=0.04237041622400284 Batch_id=937 Accuracy=98.78: 100%|██████████| 938/938 [00:26<00:00, 35.51it/s]



Test set: Average loss: 0.0213, Accuracy: 9938/10000 (99.38%)

EPOCH: 7


Loss=0.012989741750061512 Batch_id=937 Accuracy=98.90: 100%|██████████| 938/938 [00:27<00:00, 34.04it/s]



Test set: Average loss: 0.0209, Accuracy: 9942/10000 (99.42%)

EPOCH: 8


Loss=0.037813205271959305 Batch_id=937 Accuracy=98.88: 100%|██████████| 938/938 [00:26<00:00, 35.75it/s]



Test set: Average loss: 0.0188, Accuracy: 9944/10000 (99.44%)

EPOCH: 9


Loss=0.11521659791469574 Batch_id=937 Accuracy=98.96: 100%|██████████| 938/938 [00:26<00:00, 35.50it/s]



Test set: Average loss: 0.0191, Accuracy: 9944/10000 (99.44%)

EPOCH: 10


Loss=0.0036091140937060118 Batch_id=937 Accuracy=98.93: 100%|██████████| 938/938 [00:26<00:00, 35.80it/s]



Test set: Average loss: 0.0190, Accuracy: 9944/10000 (99.44%)

EPOCH: 11


Loss=0.015714243054389954 Batch_id=937 Accuracy=99.03: 100%|██████████| 938/938 [00:25<00:00, 36.36it/s]



Test set: Average loss: 0.0197, Accuracy: 9944/10000 (99.44%)

EPOCH: 12


Loss=0.12866824865341187 Batch_id=937 Accuracy=98.97: 100%|██████████| 938/938 [00:26<00:00, 35.73it/s]



Test set: Average loss: 0.0195, Accuracy: 9946/10000 (99.46%)

EPOCH: 13


Loss=0.14687387645244598 Batch_id=937 Accuracy=98.94: 100%|██████████| 938/938 [00:27<00:00, 34.59it/s]



Test set: Average loss: 0.0189, Accuracy: 9943/10000 (99.43%)

EPOCH: 14


Loss=0.005956384353339672 Batch_id=937 Accuracy=98.93: 100%|██████████| 938/938 [00:26<00:00, 35.78it/s]



Test set: Average loss: 0.0189, Accuracy: 9944/10000 (99.44%)



# Summary

Target :

Add augmentation that helps model learn more invariant and discriminative features.

By observing the dataset images we can see that the training could benefit from adding rotation augmentation.

Results :

Parameters: 7,720

Best Train Accuracy: 99.03

Best Test Accuracy: 99.46

Analysis :

Model shows good improvement and provides an accuracy of 99.40 + from Epoch 5 itself and except for one Epoch in between it maintains a validation accuracy of over 99.40%.