<a href="https://colab.research.google.com/github/peeyushsinghal/ERA/blob/main/S7/ERA_S7_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms



In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        dropout_value = 0.1
        # Attempt S7 2---------------Further reduce capacity and see impact, remove capacity from early layers
        # Conv Block 1
        self.convblock1 = nn.Sequential(
            nn.Conv2d(1, 8, 5, stride= 1, padding=1), #input -28 OUtput-26 RF 5
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(dropout_value),
            nn.Conv2d(8, 16, 3, stride= 1, padding=1), #input -26 OUtput-26 RF 7, Jump_in -1, Jump_out -1
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        )

        # Transition Block 1
        # STRIDED CONVOLUTION LAYER
        self.transitionblock1 = nn.Sequential(
            nn.Conv2d(16,8,3,stride=2,padding=1), # In- 26, Out- 13, RF-9 , Jump_in -1, Jump_out -2
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(dropout_value)
        )

        # Conv Block 2
        self.convblock2 = nn.Sequential(
            nn.Conv2d(8, 8, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 13, Jump_in -2, Jump_out -2
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(dropout_value),
            nn.Conv2d(8, 16, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 17, Jump_in -2, Jump_out -2
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        )

        # Transition Block 2
        # STRIDED CONVOLUTION LAYER
        self.transitionblock2 = nn.Sequential(
            nn.Conv2d(16,16,3,stride=2,padding=1), # In- 13, Out-7, RF- 21 , Jump_in - 2, Jump_out - 4
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        )

        # Conv Block 3
        self.convblock3 = nn.Sequential(
            nn.Conv2d(16, 10, 3, stride= 1), #input -7  OUtput-5  RF - 29, Jump_in -4, Jump_out -4
        )
        self.gap = nn.AvgPool2d(5) # input - 10x5x5 Output -10x1x1


        # # Attempt S7 1---------------Reduce the channel number throughout : Reduce capacity of model by reducing the number of channels
        # # Conv Block 1
        # self.convblock1 = nn.Sequential(
        #     nn.Conv2d(1, 16, 5, stride= 1, padding=1), #input -28 OUtput-26 RF 5
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value),
        #     nn.Conv2d(16, 16, 3, stride= 1, padding=1), #input -26 OUtput-26 RF 7, Jump_in -1, Jump_out -1
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Transition Block 1
        # # STRIDED CONVOLUTION LAYER
        # self.transitionblock1 = nn.Sequential(
        #     nn.Conv2d(16,16,3,stride=2,padding=1), # In- 26, Out- 13, RF-9 , Jump_in -1, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Conv Block 2
        # self.convblock2 = nn.Sequential(
        #     nn.Conv2d(16, 16, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 13, Jump_in -2, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value),
        #     nn.Conv2d(16, 16, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 17, Jump_in -2, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Transition Block 2
        # # STRIDED CONVOLUTION LAYER
        # self.transitionblock2 = nn.Sequential(
        #     nn.Conv2d(16,16,3,stride=2,padding=1), # In- 13, Out-7, RF- 21 , Jump_in - 2, Jump_out - 4
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Conv Block 3
        # self.convblock3 = nn.Sequential(
        #     nn.Conv2d(16, 10, 3, stride= 1), #input -7  OUtput-5  RF - 29, Jump_in -4, Jump_out -4
        # )
        # self.gap = nn.AvgPool2d(5) # input - 10x5x5 Output -10x1x1
        # # Attempt 2 ---------------
        # # Conv Block 1
        # self.convblock1 = nn.Sequential(
        #     nn.Conv2d(1, 32, 5, stride= 1, padding=1), #input -28 OUtput-26 RF 5
        #     nn.ReLU(),
        #     nn.BatchNorm2d(32),
        #     nn.Dropout(dropout_value),
        #     nn.Conv2d(32, 16, 3, stride= 1, padding=1), #input -26 OUtput-26 RF 7, Jump_in -1, Jump_out -1
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Transition Block 1
        # # STRIDED CONVOLUTION LAYER
        # self.transitionblock1 = nn.Sequential(
        #     nn.Conv2d(16,16,3,stride=2,padding=1), # In- 26, Out- 13, RF-9 , Jump_in -1, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Conv Block 2
        # self.convblock2 = nn.Sequential(
        #     nn.Conv2d(16, 24, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 13, Jump_in -2, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(24),
        #     nn.Dropout(dropout_value),
        #     nn.Conv2d(24, 16, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 17, Jump_in -2, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Transition Block 2
        # # STRIDED CONVOLUTION LAYER
        # self.transitionblock2 = nn.Sequential(
        #     nn.Conv2d(16,16,3,stride=2,padding=1), # In- 13, Out-7, RF- 21 , Jump_in - 2, Jump_out - 4
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Conv Block 3
        # self.convblock3 = nn.Sequential(
        #     nn.Conv2d(16, 10, 3, stride= 1), #input -7  OUtput-5  RF - 29, Jump_in -4, Jump_out -4
        # )
        # self.gap = nn.AvgPool2d(5) # input - 10x5x5 Output -10x1x1


        # Attempt 1-----------------------------
        # self.conv1 = nn.Conv2d(1, 32, 5, stride= 1, padding=1) #input -28 OUtput-26 RF 5
        # self.bn1 = nn.BatchNorm2d(32)
        # self.conv2 = nn.Conv2d(32, 16, 3, stride= 1, padding=1)#input -26 OUtput-26 RF 7
        # self.bn2 = nn.BatchNorm2d(16)

        # self.pool1 = nn.MaxPool2d(2, 2) #input -26 OUtput-13 RF 14
        # self.dropout1= nn.Dropout(0.1)

        # self.conv3 = nn.Conv2d(16, 32, 3, stride= 1, padding=1) #input -13 OUtput-13 RF 16
        # self.bn3 = nn.BatchNorm2d(32)
        # self.conv4 = nn.Conv2d( 32, 32, 3, stride= 1, padding=1)#input -13 OUtput-13 RF 18
        # self.bn4 = nn.BatchNorm2d(32)

        # self.pool2 = nn.MaxPool2d(2, 2) #input -13 OUtput-6 RF 36
        # self.dropout2 = nn.Dropout(0.1)

        # self.conv5 = nn.Conv2d(32, 10, 1, stride= 1) # use of 1x1 - input 6 output 6 RF 36
        # self.gap = nn.AvgPool2d(6) # input - 10x5x5 Output -10x1x1


        # Base attempt-----------------
        # self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input - 28 Output - 28  RF - 3, Jin -1 , Jout -1
        # self.conv2 = nn.Conv2d(32, 64, 3, padding=1) #input - 28 Output - 28  RF - 5, Jin -1 , Jout -1
        # self.pool1 = nn.MaxPool2d(2, 2) #input - 28 Output - 14, RF - 6, Jin -1 , Jout -2
        # self.conv3 = nn.Conv2d(64, 128, 3, padding=1) #input - 14 Output - 14, RF - 10, Jin - 2 , Jout -2
        # self.conv4 = nn.Conv2d(128, 256, 3, padding=1)  #input - 14 Output - 14, RF - 14, Jin - 2 , Jout -2
        # self.pool2 = nn.MaxPool2d(2, 2) #input - 14 Output - 7, RF - 16, Jin - 2 , Jout - 4
        # self.conv5 = nn.Conv2d(256, 512, 3) #input - 7 Output - 5, RF - 24, Jin - 4 , Jout - 4
        # self.conv6 = nn.Conv2d(512, 1024, 3) #input - 5 Output - 3, RF - 32, Jin - 4 , Jout - 4
        # self.conv7 = nn.Conv2d(1024, 10, 3) #input - 3 Output - 2, RF - 40, Jin - 4 , Jout - 4




    def forward(self, x):

      # Attempt 2 ---------------
      x = self.convblock1(x)
      x = self.transitionblock1(x)
      x = self.convblock2(x)
      x = self.transitionblock2(x)
      x = self.convblock3(x)
      x = self.gap(x)
      x = x.view(-1, 10)
      return F.log_softmax(x, dim = -1)

        # Attempt 1-------------------
        # x = F.relu(self.bn1(self.conv1(x)))
        # x = F.relu(self.bn2(self.conv2(x)))
        # x = self.dropout1(self.pool1(x))

        # x = F.relu(self.bn3(self.conv3(x)))
        # # print("after. bn 3", x.shape)
        # x = F.relu(self.bn4(self.conv4(x)))
        # # print("after. bn 4", x.shape)
        # x = self.dropout2(self.pool2(x))
        # # print(x.shape)

        # x = self.conv5(x)
        # # print(x.shape)
        # x = self.gap(x)
        # x = x.view(-1, 10)
        # return F.log_softmax(x)

        # base attempt------------------
        # x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        # x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        # x = F.relu(self.conv6(F.relu(self.conv5(x))))
        # x = F.relu(self.conv7(x))
        # x = x.view(-1, 10)
        # return F.log_softmax(x)

In [3]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
model

Net(
  (convblock1): Sequential(
    (0): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.1, inplace=False)
    (4): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Dropout(p=0.1, inplace=False)
  )
  (transitionblock1): Sequential(
    (0): Conv2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.1, inplace=False)
  )
  (convblock2): Sequential(
    (0): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.1, inplace=False)
    (4): Conv2d(8, 16, kernel_

In [4]:


torch.manual_seed(1)
batch_size = 128



kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}



train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-10.0, 10.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 285959909.51it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 9302387.79it/s]


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 120307756.75it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 22229321.78it/s]


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [5]:
next(iter(test_loader))[0].shape

torch.Size([128, 1, 28, 28])

In [6]:
from tqdm import tqdm

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))

In [7]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]             208
              ReLU-2            [-1, 8, 26, 26]               0
       BatchNorm2d-3            [-1, 8, 26, 26]              16
           Dropout-4            [-1, 8, 26, 26]               0
            Conv2d-5           [-1, 16, 26, 26]           1,168
              ReLU-6           [-1, 16, 26, 26]               0
       BatchNorm2d-7           [-1, 16, 26, 26]              32
           Dropout-8           [-1, 16, 26, 26]               0
            Conv2d-9            [-1, 8, 13, 13]           1,160
             ReLU-10            [-1, 8, 13, 13]               0
      BatchNorm2d-11            [-1, 8, 13, 13]              16
          Dropout-12            [-1, 8, 13, 13]               0
    

In [8]:

train_losses = []
test_losses = []
train_acc = []
test_acc = []

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 15
for epoch in range(1, EPOCHS+1):
  print("EPOCH: ", epoch)
  train(model, device, train_loader, optimizer, epoch)
  test(model, device, test_loader)

EPOCH:  1


Loss=0.17998136579990387 Batch_id=468 Accuracy=81.77: 100%|██████████| 469/469 [00:28<00:00, 16.69it/s]



Test set: Average loss: 0.1066, Accuracy: 9691/10000 (96.91%)

EPOCH:  2


Loss=0.24679578840732574 Batch_id=468 Accuracy=96.03: 100%|██████████| 469/469 [00:24<00:00, 19.41it/s]



Test set: Average loss: 0.0673, Accuracy: 9783/10000 (97.83%)

EPOCH:  3


Loss=0.07339576631784439 Batch_id=468 Accuracy=96.90: 100%|██████████| 469/469 [00:24<00:00, 19.41it/s]



Test set: Average loss: 0.0539, Accuracy: 9836/10000 (98.36%)

EPOCH:  4


Loss=0.053709547966718674 Batch_id=468 Accuracy=97.42: 100%|██████████| 469/469 [00:24<00:00, 19.38it/s]



Test set: Average loss: 0.0457, Accuracy: 9854/10000 (98.54%)

EPOCH:  5


Loss=0.04369359835982323 Batch_id=468 Accuracy=97.58: 100%|██████████| 469/469 [00:24<00:00, 18.82it/s]



Test set: Average loss: 0.0461, Accuracy: 9836/10000 (98.36%)

EPOCH:  6


Loss=0.025076506659388542 Batch_id=468 Accuracy=97.76: 100%|██████████| 469/469 [00:25<00:00, 18.58it/s]



Test set: Average loss: 0.0349, Accuracy: 9891/10000 (98.91%)

EPOCH:  7


Loss=0.15627071261405945 Batch_id=468 Accuracy=97.82: 100%|██████████| 469/469 [00:24<00:00, 18.96it/s]



Test set: Average loss: 0.0451, Accuracy: 9844/10000 (98.44%)

EPOCH:  8


Loss=0.06255677342414856 Batch_id=468 Accuracy=98.06: 100%|██████████| 469/469 [00:24<00:00, 19.04it/s]



Test set: Average loss: 0.0384, Accuracy: 9872/10000 (98.72%)

EPOCH:  9


Loss=0.0784931480884552 Batch_id=468 Accuracy=98.03: 100%|██████████| 469/469 [00:25<00:00, 18.62it/s]



Test set: Average loss: 0.0328, Accuracy: 9894/10000 (98.94%)

EPOCH:  10


Loss=0.04321609437465668 Batch_id=468 Accuracy=98.10: 100%|██████████| 469/469 [00:23<00:00, 19.63it/s]



Test set: Average loss: 0.0310, Accuracy: 9898/10000 (98.98%)

EPOCH:  11


Loss=0.1327313929796219 Batch_id=468 Accuracy=98.22: 100%|██████████| 469/469 [00:24<00:00, 19.30it/s]



Test set: Average loss: 0.0352, Accuracy: 9885/10000 (98.85%)

EPOCH:  12


Loss=0.04299899563193321 Batch_id=468 Accuracy=98.26: 100%|██████████| 469/469 [00:24<00:00, 19.18it/s]



Test set: Average loss: 0.0301, Accuracy: 9895/10000 (98.95%)

EPOCH:  13


Loss=0.04475010931491852 Batch_id=468 Accuracy=98.39: 100%|██████████| 469/469 [00:24<00:00, 19.44it/s]



Test set: Average loss: 0.0276, Accuracy: 9906/10000 (99.06%)

EPOCH:  14


Loss=0.03322805091738701 Batch_id=468 Accuracy=98.33: 100%|██████████| 469/469 [00:24<00:00, 19.41it/s]



Test set: Average loss: 0.0267, Accuracy: 9909/10000 (99.09%)

EPOCH:  15


Loss=0.037671636790037155 Batch_id=468 Accuracy=98.33: 100%|██████████| 469/469 [00:25<00:00, 18.43it/s]



Test set: Average loss: 0.0275, Accuracy: 9902/10000 (99.02%)

