# Import Libraries

In [44]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

# Data Transformations
We first start with defining our data transformations. We need to think what our data is and how can we augment it to correct represent images which it might not see otherwise.

In [45]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values. 
                                      ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])

# Dataset and Creating Train/Test Split

In [46]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

# Dataloader Arguments & Test/Train Dataloaders

In [47]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True


# Model
Let's start with the model we first saw

In [49]:
dropout_value = 0.03
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1), # Input - 28x28x1, Output - 28x28x16, RF - 3x3
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value),
            nn.Conv2d(16, 16, 3, padding=1), # Input - 28x28x16, Output - 28x28x16, RF - 5x5
            nn.ReLU(),
            nn.BatchNorm2d(16),
            #nn.Dropout(dropout_value),
            nn.MaxPool2d(2, 2) # Input - 28x28x16, Output - 14x14x16
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 16, 3, padding=1), # Input - 14x14x16, Output - 14x14x16, RF - 10x10
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value),
            nn.Conv2d(16, 16, 3, padding=1), # Input - 14x14x16, Output - 14x14x16, RF - 12x12
            nn.ReLU(),
            nn.BatchNorm2d(16),
            #nn.Dropout(dropout_value),
            nn.MaxPool2d(2, 2) # Input - 14x14x16, Output - 7x7x16
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(16, 16, 3, padding=1), # Input - 7x7x16, Output - 7x7x16, RF - 24x24
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value),
            nn.Conv2d(16, 32, 3, padding=1), # Input - 7x7x16, Output - 7x7x32, RF - 26x26
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value),
            nn.Conv2d(32, 10, 3, padding=1), # Input - 7x7x32, Output - 7x7x10, RF - 28x28
            nn.ReLU(),
            nn.BatchNorm2d(10),
            nn.AvgPool2d(7, 7) # Input - 7x7x10, Output - 1x1x10
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=1)

# Model Params

In [50]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             160
              ReLU-2           [-1, 16, 28, 28]               0
       BatchNorm2d-3           [-1, 16, 28, 28]              32
           Dropout-4           [-1, 16, 28, 28]               0
            Conv2d-5           [-1, 16, 28, 28]           2,320
              ReLU-6           [-1, 16, 28, 28]               0
       BatchNorm2d-7           [-1, 16, 28, 28]              32
         MaxPool2d-8           [-1, 16, 14, 14]               0
            Conv2d-9           [-1, 16, 14, 14]           2,320
             ReLU-10           [-1, 16, 14, 14]               0
      BatchNorm2d-11           [-1, 16, 14, 14]              32
          Dropout-12           [-1, 16, 14, 14]               0

# Training and Testing


Let's write train and test functions

In [51]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm
    
    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))
     

# Let's Train and test our model

In [52]:
model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 20
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

EPOCH: 0


Loss=0.15864364802837372 Batch_id=468 Accuracy=93.39: 100%|██████████| 469/469 [00:22<00:00, 21.13it/s]



Test set: Average loss: 0.0813, Accuracy: 9814/10000 (98.14%)

EPOCH: 1


Loss=0.12182533740997314 Batch_id=468 Accuracy=98.37: 100%|██████████| 469/469 [00:20<00:00, 22.41it/s]



Test set: Average loss: 0.0472, Accuracy: 9899/10000 (98.99%)

EPOCH: 2


Loss=0.037838950753211975 Batch_id=468 Accuracy=98.73: 100%|██████████| 469/469 [00:21<00:00, 21.37it/s]



Test set: Average loss: 0.0387, Accuracy: 9914/10000 (99.14%)

EPOCH: 3


Loss=0.042664870619773865 Batch_id=468 Accuracy=98.88: 100%|██████████| 469/469 [00:21<00:00, 21.91it/s]



Test set: Average loss: 0.0306, Accuracy: 9928/10000 (99.28%)

EPOCH: 4


Loss=0.016000309959053993 Batch_id=468 Accuracy=98.99: 100%|██████████| 469/469 [00:24<00:00, 19.39it/s]



Test set: Average loss: 0.0320, Accuracy: 9907/10000 (99.07%)

EPOCH: 5


Loss=0.06371597200632095 Batch_id=468 Accuracy=99.12: 100%|██████████| 469/469 [00:21<00:00, 21.70it/s]



Test set: Average loss: 0.0289, Accuracy: 9918/10000 (99.18%)

EPOCH: 6


Loss=0.05555548146367073 Batch_id=468 Accuracy=99.15: 100%|██████████| 469/469 [00:21<00:00, 22.29it/s]



Test set: Average loss: 0.0271, Accuracy: 9917/10000 (99.17%)

EPOCH: 7


Loss=0.01830575428903103 Batch_id=468 Accuracy=99.24: 100%|██████████| 469/469 [00:21<00:00, 22.27it/s]



Test set: Average loss: 0.0328, Accuracy: 9919/10000 (99.19%)

EPOCH: 8


Loss=0.029013989493250847 Batch_id=468 Accuracy=99.23: 100%|██████████| 469/469 [00:22<00:00, 21.05it/s]



Test set: Average loss: 0.0245, Accuracy: 9931/10000 (99.31%)

EPOCH: 9


Loss=0.008937672711908817 Batch_id=468 Accuracy=99.33: 100%|██████████| 469/469 [00:22<00:00, 21.06it/s]



Test set: Average loss: 0.0229, Accuracy: 9933/10000 (99.33%)

EPOCH: 10


Loss=0.02367362193763256 Batch_id=468 Accuracy=99.46: 100%|██████████| 469/469 [00:21<00:00, 22.15it/s]



Test set: Average loss: 0.0279, Accuracy: 9914/10000 (99.14%)

EPOCH: 11


Loss=0.012812796980142593 Batch_id=468 Accuracy=99.41: 100%|██████████| 469/469 [00:20<00:00, 22.41it/s]



Test set: Average loss: 0.0218, Accuracy: 9934/10000 (99.34%)

EPOCH: 12


Loss=0.0503528006374836 Batch_id=468 Accuracy=99.48: 100%|██████████| 469/469 [00:21<00:00, 21.93it/s]



Test set: Average loss: 0.0247, Accuracy: 9926/10000 (99.26%)

EPOCH: 13


Loss=0.0052206325344741344 Batch_id=468 Accuracy=99.46: 100%|██████████| 469/469 [00:22<00:00, 20.94it/s]



Test set: Average loss: 0.0233, Accuracy: 9931/10000 (99.31%)

EPOCH: 14


Loss=0.02741345949470997 Batch_id=468 Accuracy=99.55: 100%|██████████| 469/469 [00:22<00:00, 20.69it/s]



Test set: Average loss: 0.0221, Accuracy: 9932/10000 (99.32%)

EPOCH: 15


Loss=0.02204611711204052 Batch_id=468 Accuracy=99.56: 100%|██████████| 469/469 [00:22<00:00, 20.74it/s]



Test set: Average loss: 0.0236, Accuracy: 9936/10000 (99.36%)

EPOCH: 16


Loss=0.012274441309273243 Batch_id=468 Accuracy=99.56: 100%|██████████| 469/469 [00:21<00:00, 21.55it/s]



Test set: Average loss: 0.0215, Accuracy: 9929/10000 (99.29%)

EPOCH: 17


Loss=0.04882970452308655 Batch_id=468 Accuracy=99.55: 100%|██████████| 469/469 [00:20<00:00, 22.48it/s]



Test set: Average loss: 0.0189, Accuracy: 9943/10000 (99.43%)

EPOCH: 18


Loss=0.010073154233396053 Batch_id=468 Accuracy=99.60: 100%|██████████| 469/469 [00:21<00:00, 21.86it/s]



Test set: Average loss: 0.0194, Accuracy: 9931/10000 (99.31%)

EPOCH: 19


Loss=0.005589574109762907 Batch_id=468 Accuracy=99.60: 100%|██████████| 469/469 [00:21<00:00, 21.59it/s]



Test set: Average loss: 0.0224, Accuracy: 9928/10000 (99.28%)

