In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim 
from torchvision import datasets, transforms

In [2]:
train_transforms = transforms.Compose([transforms.RandomRotation((-7.0,7.0), fill = (1,)) ,  
                                      transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])
test_transforms = transforms.Compose([transforms.ToTensor(),
                                      transforms.Normalize((0.1307,), (0.3081, ))])


In [3]:
train = datasets.MNIST('./data', train = True, download = True, transform = train_transforms)
test = datasets.MNIST('./data', train = False, download = True, transform = test_transforms)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
SEED = 1

cuda = torch.cuda.is_available()
print("CUDA available" , cuda)

#for Reproducibilty
torch.manual_seed(SEED)

if cuda:
  torch.cuda.manual_seed(SEED)

# setting the dataloader arguments for both test and train data
dataloader_args = dict(shuffle = True, batch_size = 128, num_workers = 4, pin_memory = True) if cuda else dict(shuffle = True, batch_size = 64)

train_loader = torch.utils.data.DataLoader(train , **dataloader_args)
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)


CUDA available True




In [11]:
dropout_value = 0.07
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    ## INPUT BLOCK (BLOCK  1 )
    self.conv1 = nn.Sequential(
        nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size = (3,3), bias = False),
        nn.BatchNorm2d(8),
        nn.ReLU(),
        nn.Dropout(dropout_value)   
    )# input - 28, output - 26
    self.conv2 = nn.Sequential(
        nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = (3,3), padding = 1, bias = False),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        nn.Dropout(dropout_value)
    )#input - 26 , output - 26
    

    # TRANSITION BLOCK 
    self.conv3 = nn.Sequential(
        nn.Conv2d(in_channels = 16, out_channels = 8, kernel_size = (1,1), bias = False),
        # nn.BatchNorm2d(10),
        # nn.ReLU(),
        # nn.Dropout(dropout_value)
    )# input - 26 , output - 26
    self.pool1 = nn.MaxPool2d(2, 2)
    # input 26, output - 13

    # CONVOLUTION  BLOCK 2
    self.conv4 = nn.Sequential(
        nn.Conv2d(in_channels = 8, out_channels = 8 , kernel_size = (3,3), bias = False),
        nn.BatchNorm2d(8),
        nn.ReLU(),#input = 13 , output  = 11
        nn.Dropout(dropout_value)
    )
    self.conv5 = nn.Sequential(
        nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = (3,3), padding = 1, bias = False),
        nn.BatchNorm2d(16),
        nn.ReLU(),# input - 11 , output = 11
        nn.Dropout(dropout_value)
    )

    #   TRANSITION BLOCK 
    self.conv6 = nn.Sequential(
        nn.Conv2d(in_channels = 16, out_channels = 8, kernel_size = (1,1), bias = False),
        # nn.BatchNorm2d(10),
        # nn.ReLU(), # input - 11, output - 11
        # nn.Dropout(dropout_value)
    )
    # # self.pool2 = nn.MaxPool2d(2,2) #input 12, output 6

    # #   CONVOLUTION BLOCK 3

    self.conv7 = nn.Sequential(
        nn.Conv2d(in_channels = 8, out_channels = 8 , kernel_size = (3,3),bias = False),
        nn.BatchNorm2d(8),
        nn.ReLU(),#input = 11, output = 9
        nn.Dropout(dropout_value)
    )
    self.conv8 = nn.Sequential(
        nn.Conv2d(in_channels= 8, out_channels = 16, kernel_size = (3,3), bias = False),
        nn.BatchNorm2d(16),
        nn.ReLU(),#input = 9, output = 7
        nn.Dropout(dropout_value)
    )
    self.conv9 = nn.Sequential(
        nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = (3,3), bias = False),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        nn.Dropout(dropout_value) 
    )#input - 7 , output - 5 


    #      CONVOLUTION BLOCK 4
    self.gap = nn.Sequential(
        nn.AvgPool2d(kernel_size = 5)
    )# output 1
    self.conv10 = nn.Sequential(
        nn.Conv2d(in_channels = 16, out_channels = 10, kernel_size = (1,1), bias = False)
    )
    


  def forward(self, x ):
    x = self.conv1(x)
    x = self.conv2(x)
    x = self.conv3(x)
    x = self.pool1(x)

    x = self.conv4(x)
    x = self.conv5(x)
    x = self.conv6(x)

    x = self.conv7(x)
    x = self.conv8(x)

    x = self.conv9(x)

    x = self.gap(x)

    x = self.conv10(x)


    x = x.view(-1,10)
    return F.log_softmax(x, dim = -1)


In [12]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size = (1,28,28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
       BatchNorm2d-2            [-1, 8, 26, 26]              16
              ReLU-3            [-1, 8, 26, 26]               0
           Dropout-4            [-1, 8, 26, 26]               0
            Conv2d-5           [-1, 16, 26, 26]           1,152
       BatchNorm2d-6           [-1, 16, 26, 26]              32
              ReLU-7           [-1, 16, 26, 26]               0
           Dropout-8           [-1, 16, 26, 26]               0
            Conv2d-9            [-1, 8, 26, 26]             128
        MaxPool2d-10            [-1, 8, 13, 13]               0
           Conv2d-11            [-1, 8, 11, 11]             576
      BatchNorm2d-12            [-1, 8, 11, 11]              16

In [13]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimiser, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    #get samples
    data, target = data.to(device), target.to(device)

    #init
    optimiser.zero_grad()

    #prediction
    y_pred = model(data)

    #calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    #Backpropagation
    loss.backward()
    optimiser.step()

    # update pbar - tqdm

    pred = y_pred.argmax(dim = 1, keepdim = True) # gets the index of the max log-probabilirty
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
      pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
      correct += pred.eq(target.view_as(pred)).sum().item()

  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)

  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))
  
  test_acc.append(100. * correct / len(test_loader.dataset))


In [16]:
from torch.optim.lr_scheduler import StepLR

model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.04, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)


EPOCHS = 20
for epoch in range(EPOCHS):
    print("EPOCH:", epoch, "Learning Rate: ", scheduler.get_last_lr())
    train(model, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(model, device, test_loader)

EPOCH: 0 Learning Rate:  [0.04]


Loss=0.1202450767159462 Batch_id=468 Accuracy=90.59: 100%|██████████| 469/469 [00:17<00:00, 26.78it/s]



Test set: Average loss: 0.0808, Accuracy: 9739/10000 (97.39%)

EPOCH: 1 Learning Rate:  [0.04]


Loss=0.05749984458088875 Batch_id=468 Accuracy=97.49: 100%|██████████| 469/469 [00:17<00:00, 26.69it/s]



Test set: Average loss: 0.0476, Accuracy: 9841/10000 (98.41%)

EPOCH: 2 Learning Rate:  [0.04]


Loss=0.06992447376251221 Batch_id=468 Accuracy=98.05: 100%|██████████| 469/469 [00:17<00:00, 26.27it/s]



Test set: Average loss: 0.0327, Accuracy: 9898/10000 (98.98%)

EPOCH: 3 Learning Rate:  [0.04]


Loss=0.02567797712981701 Batch_id=468 Accuracy=98.20: 100%|██████████| 469/469 [00:17<00:00, 27.03it/s]



Test set: Average loss: 0.0322, Accuracy: 9906/10000 (99.06%)

EPOCH: 4 Learning Rate:  [0.04]


Loss=0.02320181578397751 Batch_id=468 Accuracy=98.47: 100%|██████████| 469/469 [00:17<00:00, 27.32it/s]



Test set: Average loss: 0.0247, Accuracy: 9916/10000 (99.16%)

EPOCH: 5 Learning Rate:  [0.04]


Loss=0.049136992543935776 Batch_id=468 Accuracy=98.39: 100%|██████████| 469/469 [00:17<00:00, 26.84it/s]



Test set: Average loss: 0.0266, Accuracy: 9914/10000 (99.14%)

EPOCH: 6 Learning Rate:  [0.004]


Loss=0.005780770909041166 Batch_id=468 Accuracy=98.84: 100%|██████████| 469/469 [00:17<00:00, 26.52it/s]



Test set: Average loss: 0.0193, Accuracy: 9940/10000 (99.40%)

EPOCH: 7 Learning Rate:  [0.004]


Loss=0.02239128202199936 Batch_id=468 Accuracy=98.89: 100%|██████████| 469/469 [00:17<00:00, 27.06it/s]



Test set: Average loss: 0.0183, Accuracy: 9944/10000 (99.44%)

EPOCH: 8 Learning Rate:  [0.004]


Loss=0.0025257328525185585 Batch_id=468 Accuracy=98.92: 100%|██████████| 469/469 [00:17<00:00, 27.20it/s]



Test set: Average loss: 0.0187, Accuracy: 9937/10000 (99.37%)

EPOCH: 9 Learning Rate:  [0.004]


Loss=0.012505347840487957 Batch_id=468 Accuracy=98.99: 100%|██████████| 469/469 [00:17<00:00, 26.80it/s]



Test set: Average loss: 0.0184, Accuracy: 9940/10000 (99.40%)

EPOCH: 10 Learning Rate:  [0.004]


Loss=0.027832476422190666 Batch_id=468 Accuracy=98.97: 100%|██████████| 469/469 [00:17<00:00, 27.14it/s]



Test set: Average loss: 0.0179, Accuracy: 9942/10000 (99.42%)

EPOCH: 11 Learning Rate:  [0.004]


Loss=0.00904789287596941 Batch_id=468 Accuracy=98.95: 100%|██████████| 469/469 [00:17<00:00, 26.95it/s]



Test set: Average loss: 0.0181, Accuracy: 9940/10000 (99.40%)

EPOCH: 12 Learning Rate:  [0.0004]


Loss=0.013427689671516418 Batch_id=468 Accuracy=99.00: 100%|██████████| 469/469 [00:17<00:00, 26.87it/s]



Test set: Average loss: 0.0179, Accuracy: 9938/10000 (99.38%)

EPOCH: 13 Learning Rate:  [0.0004]


Loss=0.023236772045493126 Batch_id=468 Accuracy=99.00: 100%|██████████| 469/469 [00:18<00:00, 25.45it/s]



Test set: Average loss: 0.0183, Accuracy: 9935/10000 (99.35%)

EPOCH: 14 Learning Rate:  [0.0004]


Loss=0.04036524146795273 Batch_id=468 Accuracy=99.00: 100%|██████████| 469/469 [00:17<00:00, 26.54it/s]



Test set: Average loss: 0.0172, Accuracy: 9945/10000 (99.45%)

EPOCH: 15 Learning Rate:  [0.0004]


Loss=0.007124283816665411 Batch_id=468 Accuracy=98.99: 100%|██████████| 469/469 [00:17<00:00, 26.70it/s]



Test set: Average loss: 0.0179, Accuracy: 9944/10000 (99.44%)

EPOCH: 16 Learning Rate:  [0.0004]


Loss=0.018567416816949844 Batch_id=468 Accuracy=99.03: 100%|██████████| 469/469 [00:17<00:00, 26.99it/s]



Test set: Average loss: 0.0171, Accuracy: 9948/10000 (99.48%)

EPOCH: 17 Learning Rate:  [0.0004]


Loss=0.015918608754873276 Batch_id=468 Accuracy=99.03: 100%|██████████| 469/469 [00:17<00:00, 26.95it/s]



Test set: Average loss: 0.0178, Accuracy: 9943/10000 (99.43%)

EPOCH: 18 Learning Rate:  [4e-05]


Loss=0.009703991003334522 Batch_id=468 Accuracy=98.99: 100%|██████████| 469/469 [00:17<00:00, 27.14it/s]



Test set: Average loss: 0.0176, Accuracy: 9942/10000 (99.42%)

EPOCH: 19 Learning Rate:  [4e-05]


Loss=0.015707187354564667 Batch_id=468 Accuracy=99.01: 100%|██████████| 469/469 [00:17<00:00, 26.98it/s]



Test set: Average loss: 0.0179, Accuracy: 9941/10000 (99.41%)

