In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input -? OUtput? RF
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)

        # second convolution block
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)

        # third convolution block
        self.conv5 = nn.Conv2d(256, 512, 3)
        self.conv6 = nn.Conv2d(512, 1024, 3)

        # fourth convolution block
        self.conv7 = nn.Conv2d(1024, 10, 3)

    def forward(self, x):
        # first convolution block
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        
        # second convoluton block
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        
        # third convoluton block
        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        
        # fourth convoluton block
        x = F.relu(self.conv7(x))
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [None]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------



In [None]:

torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [None]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    # pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
    # pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

#### Customized model 1

1. The model consists of two convolutional layers 32 -> 64 with 3x3 kernel 
2. GAP layer has been added by using the average pooling (28) over the size of the entire dimension of the input from the previous layer
3. This is then passed through a Dense layer which is the output layer in this case
4. batch normalization is done after every CNN layer 
5. no dropouts have been implemented in this model

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)  #Input -32x28x28 Output - 64x28x28 
        self.batch2 = nn.BatchNorm2d(64)
        self.pool1 = nn.AvgPool2d(28) # global average pooling layer  input = 64x28x28 output  = 64x1x1 
        self.linear1 = nn.Linear(64,10)
        
    def forward(self, x):
        # first convolution block
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
       BatchNorm2d-2           [-1, 32, 28, 28]              64
            Conv2d-3           [-1, 64, 28, 28]          18,496
       BatchNorm2d-4           [-1, 64, 28, 28]             128
         AvgPool2d-5             [-1, 64, 1, 1]               0
            Linear-6                   [-1, 10]             650
Total params: 19,658
Trainable params: 19,658
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.15
Params size (MB): 0.07
Estimated Total Size (MB): 1.23
----------------------------------------------------------------




In [None]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

Epoch number 1





Test set: Average loss: 1.2356, Accuracy: 6852/10000 (69%)

Epoch number 2

Test set: Average loss: 0.7770, Accuracy: 8250/10000 (82%)

Epoch number 3

Test set: Average loss: 0.6064, Accuracy: 8197/10000 (82%)

Epoch number 4

Test set: Average loss: 0.5183, Accuracy: 8533/10000 (85%)

Epoch number 5

Test set: Average loss: 0.2635, Accuracy: 9364/10000 (94%)

Epoch number 6

Test set: Average loss: 0.2477, Accuracy: 9456/10000 (95%)

Epoch number 7

Test set: Average loss: 0.2153, Accuracy: 9475/10000 (95%)

Epoch number 8

Test set: Average loss: 0.3899, Accuracy: 8859/10000 (89%)

Epoch number 9

Test set: Average loss: 0.1919, Accuracy: 9468/10000 (95%)

Epoch number 10

Test set: Average loss: 0.2169, Accuracy: 9395/10000 (94%)

Epoch number 11

Test set: Average loss: 0.1571, Accuracy: 9614/10000 (96%)

Epoch number 12

Test set: Average loss: 0.2268, Accuracy: 9329/10000 (93%)

Epoch number 13

Test set: Average loss: 0.1494, Accuracy: 9596/10000 (96%)

Epoch number 14

Test s

#### Customized model 2

1. The model consists of three convolutional layers 32 -> 64 with 3x3 kernel 
2. GAP layer has been added by using the average pooling (28) over the size of the entire dimension of the input from the previous layer
3. This is then passed through a Dense layer which is the output layer in this case
4. batch normalization is done after every CNN layer 
5. dropouts 0.20 have been implemented in this model after all cnn layers after batch normalization has been done

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)  #Input -32x28x28 Output - 64x28x28 
        self.batch2 = nn.BatchNorm2d(64)
        self.pool1 = nn.AvgPool2d(28) # global average pooling layer  input = 64x28x28 output  = 64x1x1 
        self.linear1 = nn.Linear(64,10)
        
    def forward(self, x):
        # first convolution block
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)
        x = F.dropout2d(x,0.2)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        x = F.dropout2d(x,0.2)

        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)
  
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))


model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
       BatchNorm2d-2           [-1, 32, 28, 28]              64
            Conv2d-3           [-1, 64, 28, 28]          18,496
       BatchNorm2d-4           [-1, 64, 28, 28]             128
         AvgPool2d-5             [-1, 64, 1, 1]               0
            Linear-6                   [-1, 10]             650
Total params: 19,658
Trainable params: 19,658
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.15
Params size (MB): 0.07
Estimated Total Size (MB): 1.23
----------------------------------------------------------------
Epoch number 1





Test set: Average loss: 1.4633, Accuracy: 5058/10000 (51%)

Epoch number 2

Test set: Average loss: 1.4338, Accuracy: 4596/10000 (46%)

Epoch number 3

Test set: Average loss: 1.0014, Accuracy: 6941/10000 (69%)

Epoch number 4

Test set: Average loss: 0.8203, Accuracy: 7596/10000 (76%)

Epoch number 5

Test set: Average loss: 0.5766, Accuracy: 8446/10000 (84%)

Epoch number 6

Test set: Average loss: 0.4935, Accuracy: 8780/10000 (88%)

Epoch number 7

Test set: Average loss: 0.4112, Accuracy: 8858/10000 (89%)

Epoch number 8

Test set: Average loss: 0.3805, Accuracy: 9012/10000 (90%)

Epoch number 9

Test set: Average loss: 0.3142, Accuracy: 9145/10000 (91%)

Epoch number 10

Test set: Average loss: 0.2782, Accuracy: 9234/10000 (92%)

Epoch number 11

Test set: Average loss: 0.2633, Accuracy: 9295/10000 (93%)

Epoch number 12

Test set: Average loss: 0.2599, Accuracy: 9277/10000 (93%)

Epoch number 13

Test set: Average loss: 0.2416, Accuracy: 9393/10000 (94%)

Epoch number 14

Test s

### Customized model 3
1. The model consists of three convolutional layers 32 -> 64 with 3x3 kernel 
2. GAP layer has been added by using the average pooling (28) over the size of the entire dimension of the input from the previous layer
3. This is then passed through a Dense layer which is the output layer in this case
4. batch normalization is done after every CNN layer 
5. dropouts 0.10 have been implemented in this model after the last convolutional layers after batch normalization has been done

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)  #Input -32x28x28 Output - 64x28x28 
        self.batch2 = nn.BatchNorm2d(64)
        self.pool1 = nn.AvgPool2d(28) # global average pooling layer  input = 64x28x28 output  = 64x1x1 
        self.linear1 = nn.Linear(64,10)
        
    def forward(self, x):
        # first convolution block
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        x = F.dropout2d(x,0.1)

        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)
  
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))


model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
       BatchNorm2d-2           [-1, 32, 28, 28]              64
            Conv2d-3           [-1, 64, 28, 28]          18,496
       BatchNorm2d-4           [-1, 64, 28, 28]             128
         AvgPool2d-5             [-1, 64, 1, 1]               0
            Linear-6                   [-1, 10]             650
Total params: 19,658
Trainable params: 19,658
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.15
Params size (MB): 0.07
Estimated Total Size (MB): 1.23
----------------------------------------------------------------
Epoch number 1





Test set: Average loss: 1.2385, Accuracy: 6510/10000 (65%)

Epoch number 2

Test set: Average loss: 0.7689, Accuracy: 8340/10000 (83%)

Epoch number 3

Test set: Average loss: 0.5166, Accuracy: 8757/10000 (88%)

Epoch number 4

Test set: Average loss: 0.3756, Accuracy: 9181/10000 (92%)

Epoch number 5

Test set: Average loss: 0.6206, Accuracy: 7800/10000 (78%)

Epoch number 6

Test set: Average loss: 0.2189, Accuracy: 9480/10000 (95%)

Epoch number 7

Test set: Average loss: 0.2059, Accuracy: 9482/10000 (95%)

Epoch number 8

Test set: Average loss: 0.1769, Accuracy: 9566/10000 (96%)

Epoch number 9

Test set: Average loss: 0.2201, Accuracy: 9432/10000 (94%)

Epoch number 10

Test set: Average loss: 0.1672, Accuracy: 9568/10000 (96%)

Epoch number 11

Test set: Average loss: 0.1562, Accuracy: 9595/10000 (96%)

Epoch number 12

Test set: Average loss: 0.1523, Accuracy: 9582/10000 (96%)

Epoch number 13

Test set: Average loss: 0.1465, Accuracy: 9614/10000 (96%)

Epoch number 14

Test s

### Customized model 4

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch1 = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch2 = nn.BatchNorm2d(16)
        self.conv3= nn.Conv2d(16, 32, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch3 = nn.BatchNorm2d(32)
        self.conv4= nn.Conv2d(32, 32, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch4 = nn.BatchNorm2d(32)

        # self.conv4 = nn.Conv2d(32, 64, 3, padding=1)  #Input -32x28x28 Output - 32x28x28 
        # self.batch4 = nn.BatchNorm2d(64)
        self.pool1 = nn.AvgPool2d(28) # global average pooling layer  input = 64x28x28 output  = 64x1x1 
        self.linear1 = nn.Linear(32,10)
        
    def forward(self, x):
        # first convolution block
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)


        x = self.conv3(x)
        x = F.relu(x)
        x = self.batch3(x)
        x = F.dropout2d(x,0.05)

        x = self.conv4(x)
        x = F.relu(x)
        x = self.batch4(x)


        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
       BatchNorm2d-2            [-1, 8, 28, 28]              16
            Conv2d-3           [-1, 16, 28, 28]           1,168
       BatchNorm2d-4           [-1, 16, 28, 28]              32
            Conv2d-5           [-1, 32, 28, 28]           4,640
       BatchNorm2d-6           [-1, 32, 28, 28]              64
            Conv2d-7           [-1, 32, 28, 28]           9,248
       BatchNorm2d-8           [-1, 32, 28, 28]              64
         AvgPool2d-9             [-1, 32, 1, 1]               0
           Linear-10                   [-1, 10]             330
Total params: 15,642
Trainable params: 15,642
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.05
Params size (MB): 0.06
Estimated Tot




Test set: Average loss: 0.6199, Accuracy: 8373/10000 (84%)

Epoch number 2

Test set: Average loss: 0.2843, Accuracy: 9335/10000 (93%)

Epoch number 3

Test set: Average loss: 0.1709, Accuracy: 9582/10000 (96%)

Epoch number 4

Test set: Average loss: 0.1251, Accuracy: 9665/10000 (97%)

Epoch number 5

Test set: Average loss: 0.1716, Accuracy: 9525/10000 (95%)

Epoch number 6

Test set: Average loss: 0.1043, Accuracy: 9713/10000 (97%)

Epoch number 7

Test set: Average loss: 0.1034, Accuracy: 9700/10000 (97%)

Epoch number 8

Test set: Average loss: 0.0779, Accuracy: 9798/10000 (98%)

Epoch number 9

Test set: Average loss: 0.0747, Accuracy: 9785/10000 (98%)

Epoch number 10

Test set: Average loss: 0.1030, Accuracy: 9686/10000 (97%)

Epoch number 11

Test set: Average loss: 0.1008, Accuracy: 9709/10000 (97%)

Epoch number 12

Test set: Average loss: 0.0805, Accuracy: 9780/10000 (98%)

Epoch number 13

Test set: Average loss: 0.0665, Accuracy: 9812/10000 (98%)

Epoch number 14

Test s

### Customized model 5

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch1 = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch2 = nn.BatchNorm2d(16)
        self.conv3= nn.Conv2d(16, 32, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch3 = nn.BatchNorm2d(32)
        self.conv4= nn.Conv2d(32, 46, 3, padding=1) #Input -1x28x28 Output - 32x28x28 
        self.batch4 = nn.BatchNorm2d(46)

        # self.conv4 = nn.Conv2d(32, 64, 3, padding=1)  #Input -32x28x28 Output - 32x28x28 
        # self.batch4 = nn.BatchNorm2d(64)
        self.pool1 = nn.AvgPool2d(28) # global average pooling layer  input = 64x28x28 output  = 64x1x1 
        self.linear1 = nn.Linear(46,10)
        
    def forward(self, x):
        # first convolution block
       
        x = self.conv1(x)
        x = F.relu(x)
        # x = self.batch1(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)

        x = self.conv3(x)
        x = F.relu(x)
        # x = self.batch3(x)
        x = F.dropout2d(x,0.005)

        x = self.conv4(x)
        x = F.relu(x)
        x = self.batch4(x)


        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
            Conv2d-2           [-1, 16, 28, 28]           1,168
       BatchNorm2d-3           [-1, 16, 28, 28]              32
            Conv2d-4           [-1, 32, 28, 28]           4,640
            Conv2d-5           [-1, 46, 28, 28]          13,294
       BatchNorm2d-6           [-1, 46, 28, 28]              92
         AvgPool2d-7             [-1, 46, 1, 1]               0
            Linear-8                   [-1, 10]             470
Total params: 19,776
Trainable params: 19,776
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.98
Params size (MB): 0.08
Estimated Total Size (MB): 1.06
----------------------------------------------------------------
Epoch number 1





Test set: Average loss: 0.4681, Accuracy: 9045/10000 (90.45%)

Epoch number 2

Test set: Average loss: 0.2600, Accuracy: 9318/10000 (93.18%)

Epoch number 3

Test set: Average loss: 0.1490, Accuracy: 9596/10000 (95.96%)

Epoch number 4

Test set: Average loss: 0.1167, Accuracy: 9698/10000 (96.98%)

Epoch number 5

Test set: Average loss: 0.1435, Accuracy: 9592/10000 (95.92%)

Epoch number 6

Test set: Average loss: 0.1708, Accuracy: 9466/10000 (94.66%)

Epoch number 7

Test set: Average loss: 0.0931, Accuracy: 9739/10000 (97.39%)

Epoch number 8

Test set: Average loss: 0.0983, Accuracy: 9723/10000 (97.23%)

Epoch number 9

Test set: Average loss: 0.0773, Accuracy: 9773/10000 (97.73%)

Epoch number 10

Test set: Average loss: 0.0662, Accuracy: 9801/10000 (98.01%)

Epoch number 11

Test set: Average loss: 0.0787, Accuracy: 9756/10000 (97.56%)

Epoch number 12

Test set: Average loss: 0.0579, Accuracy: 9831/10000 (98.31%)

Epoch number 13

Test set: Average loss: 0.0648, Accuracy: 9809/

### Customized model 6

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1) #Input -1x28x28 Output - 8x28x28 
        self.batch1 = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1) #Input -8x28x28 Output - 16x28x28 
        self.batch2 = nn.BatchNorm2d(16)
        
        self.poola = nn.MaxPool2d(2, 2) # input 16x28x28 output 16x14x14

        self.conv3= nn.Conv2d(16, 32, 3, padding=1) #Input -16x14x14 Output - 32x14x14 
        self.batch3 = nn.BatchNorm2d(32)
        self.conv4= nn.Conv2d(32, 46, 3, padding=1) #Input -32x14x14 Output - 46x14x14 
        self.batch4 = nn.BatchNorm2d(46)

        self.pool1 = nn.AvgPool2d(14) # global average pooling layer  input = 46x14x14 output  = 46x1x1 
        self.linear1 = nn.Linear(46,10)
        
    def forward(self, x):
        # first convolution block
       
        x = self.conv1(x)
        x = F.relu(x)
        # x = self.batch1(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        x = self.poola(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        # x = self.batch3(x)
        x = F.dropout2d(x,0.005)

        x = self.conv4(x)
        x = F.relu(x)
        x = self.batch4(x)


        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))


model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
            Conv2d-2           [-1, 16, 28, 28]           1,168
       BatchNorm2d-3           [-1, 16, 28, 28]              32
         MaxPool2d-4           [-1, 16, 14, 14]               0
            Conv2d-5           [-1, 32, 14, 14]           4,640
            Conv2d-6           [-1, 46, 14, 14]          13,294
       BatchNorm2d-7           [-1, 46, 14, 14]              92
         AvgPool2d-8             [-1, 46, 1, 1]               0
            Linear-9                   [-1, 10]             470
Total params: 19,776
Trainable params: 19,776
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.45
Params size (MB): 0.08
Estimated Total Size (MB): 0.53
---------------------------------------------




Test set: Average loss: 0.2472, Accuracy: 9422/10000 (94.22%)

Epoch number 2

Test set: Average loss: 0.2104, Accuracy: 9416/10000 (94.16%)

Epoch number 3

Test set: Average loss: 0.1239, Accuracy: 9668/10000 (96.68%)

Epoch number 4

Test set: Average loss: 0.0784, Accuracy: 9793/10000 (97.93%)

Epoch number 5

Test set: Average loss: 0.0850, Accuracy: 9766/10000 (97.66%)

Epoch number 6

Test set: Average loss: 0.0472, Accuracy: 9866/10000 (98.66%)

Epoch number 7

Test set: Average loss: 0.0489, Accuracy: 9864/10000 (98.64%)

Epoch number 8

Test set: Average loss: 0.0465, Accuracy: 9865/10000 (98.65%)

Epoch number 9

Test set: Average loss: 0.0431, Accuracy: 9873/10000 (98.73%)

Epoch number 10

Test set: Average loss: 0.0410, Accuracy: 9878/10000 (98.78%)

Epoch number 11

Test set: Average loss: 0.0378, Accuracy: 9892/10000 (98.92%)

Epoch number 12

Test set: Average loss: 0.0340, Accuracy: 9896/10000 (98.96%)

Epoch number 13

Test set: Average loss: 0.0413, Accuracy: 9887/

### Customized model 7

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1) #Input -1x28x28 Output - 8x28x28 
        self.batch1 = nn.BatchNorm2d(8)
        self.poola = nn.MaxPool2d(2, 2) # input  - 8x28x28 output - 8x14x14

        self.conv2 = nn.Conv2d(8, 16, 3, padding=1) #Input -8x14x14 Output - 16x14x14 
        self.batch2 = nn.BatchNorm2d(16)        

        self.conv3= nn.Conv2d(16, 32, 3, padding=1) #Input -16x14x14 Output - 32x14x14 
        self.batch3 = nn.BatchNorm2d(32)
        self.poolb = nn.MaxPool2d(2, 2)

        self.conv4= nn.Conv2d(32, 46, 3, padding=1) #Input -32x7x7 Output - 46x7x7 
        self.batch4 = nn.BatchNorm2d(46)

        self.pool1 = nn.AvgPool2d(7) # global average pooling layer  input = 46x14x14 output  = 46x1x1 
        self.linear1 = nn.Linear(46,10)
        
    def forward(self, x):
        # first convolution block
       
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)
        x = self.poola(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        

        x = self.conv3(x)
        x = F.relu(x)
        x = self.batch3(x)
        x = self.poolb(x)
        x = F.dropout2d(x,0.005)

        x = self.conv4(x)
        x = F.relu(x)
        x = self.batch4(x)


        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))


model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
       BatchNorm2d-2            [-1, 8, 28, 28]              16
         MaxPool2d-3            [-1, 8, 14, 14]               0
            Conv2d-4           [-1, 16, 14, 14]           1,168
       BatchNorm2d-5           [-1, 16, 14, 14]              32
            Conv2d-6           [-1, 32, 14, 14]           4,640
       BatchNorm2d-7           [-1, 32, 14, 14]              64
         MaxPool2d-8             [-1, 32, 7, 7]               0
            Conv2d-9             [-1, 46, 7, 7]          13,294
      BatchNorm2d-10             [-1, 46, 7, 7]              92
        AvgPool2d-11             [-1, 46, 1, 1]               0
           Linear-12                   [-1, 10]             470
Total params: 19,856
Trainable params: 19,856
Non-trainable params: 0
---------------------------------




Test set: Average loss: 0.1262, Accuracy: 9676/10000 (96.76%)

Epoch number 2

Test set: Average loss: 0.0961, Accuracy: 9726/10000 (97.26%)

Epoch number 3

Test set: Average loss: 0.0571, Accuracy: 9836/10000 (98.36%)

Epoch number 4

Test set: Average loss: 0.0555, Accuracy: 9839/10000 (98.39%)

Epoch number 5

Test set: Average loss: 0.0459, Accuracy: 9870/10000 (98.70%)

Epoch number 6

Test set: Average loss: 0.0732, Accuracy: 9774/10000 (97.74%)

Epoch number 7

Test set: Average loss: 0.0425, Accuracy: 9869/10000 (98.69%)

Epoch number 8

Test set: Average loss: 0.0374, Accuracy: 9885/10000 (98.85%)

Epoch number 9

Test set: Average loss: 0.0388, Accuracy: 9881/10000 (98.81%)

Epoch number 10

Test set: Average loss: 0.0366, Accuracy: 9882/10000 (98.82%)

Epoch number 11

Test set: Average loss: 0.0326, Accuracy: 9896/10000 (98.96%)

Epoch number 12

Test set: Average loss: 0.0333, Accuracy: 9894/10000 (98.94%)

Epoch number 13

Test set: Average loss: 0.0317, Accuracy: 9896/

### Customized model 8

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1) #Input -1x28x28 Output - 8x28x28 
        self.batch1 = nn.BatchNorm2d(8)
        self.poola = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(8, 16, 3, padding=1) #Input -8x14x14 Output - 16x14x14 
        self.batch2 = nn.BatchNorm2d(16)        
        self.poolb = nn.MaxPool2d(2, 2)

        self.conv3= nn.Conv2d(16, 32, 3, padding=1) #Input -16x7x7 Output - 32x7x7 
        self.batch3 = nn.BatchNorm2d(32)
        self.poolc = nn.MaxPool2d(2, 2)

        self.conv4= nn.Conv2d(32, 46, 3, padding=1) #Input -32x3x3 Output - 46x3x3 
        self.batch4 = nn.BatchNorm2d(46)
        self.pool1 = nn.AvgPool2d(3) # global average pooling layer  input = 46x14x14 output  = 46x1x1 
        
        self.linear1 = nn.Linear(46,10)
        
    def forward(self, x):
        # first convolution block
       
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)
        x = self.poola(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        x = self.poolb(x)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.batch3(x)
        x = self.poolc(x)
        x = F.dropout2d(x,0.005)

        x = self.conv4(x)
        x = F.relu(x)
        x = self.batch4(x)


        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))


model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
       BatchNorm2d-2            [-1, 8, 28, 28]              16
         MaxPool2d-3            [-1, 8, 14, 14]               0
            Conv2d-4           [-1, 16, 14, 14]           1,168
       BatchNorm2d-5           [-1, 16, 14, 14]              32
         MaxPool2d-6             [-1, 16, 7, 7]               0
            Conv2d-7             [-1, 32, 7, 7]           4,640
       BatchNorm2d-8             [-1, 32, 7, 7]              64
         MaxPool2d-9             [-1, 32, 3, 3]               0
           Conv2d-10             [-1, 46, 3, 3]          13,294
      BatchNorm2d-11             [-1, 46, 3, 3]              92
        AvgPool2d-12             [-1, 46, 1, 1]               0
           Linear-13                   [-1, 10]             470
Total params: 19,856
Trainable params: 




Test set: Average loss: 0.0705, Accuracy: 9795/10000 (97.95%)

Epoch number 2

Test set: Average loss: 0.0486, Accuracy: 9871/10000 (98.71%)

Epoch number 3

Test set: Average loss: 0.0414, Accuracy: 9875/10000 (98.75%)

Epoch number 4

Test set: Average loss: 0.0526, Accuracy: 9829/10000 (98.29%)

Epoch number 5

Test set: Average loss: 0.0380, Accuracy: 9875/10000 (98.75%)

Epoch number 6

Test set: Average loss: 0.0337, Accuracy: 9889/10000 (98.89%)

Epoch number 7

Test set: Average loss: 0.0347, Accuracy: 9895/10000 (98.95%)

Epoch number 8

Test set: Average loss: 0.0351, Accuracy: 9889/10000 (98.89%)

Epoch number 9

Test set: Average loss: 0.0291, Accuracy: 9899/10000 (98.99%)

Epoch number 10

Test set: Average loss: 0.0307, Accuracy: 9899/10000 (98.99%)

Epoch number 11

Test set: Average loss: 0.0325, Accuracy: 9891/10000 (98.91%)

Epoch number 12

Test set: Average loss: 0.0336, Accuracy: 9886/10000 (98.86%)

Epoch number 13

Test set: Average loss: 0.0320, Accuracy: 9893/

### Customized model 9

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1) #Input -1x28x28 Output - 8x28x28 
        self.batch1 = nn.BatchNorm2d(8)

        self.conv2 = nn.Conv2d(8, 16, 3, padding=1) #Input -8x28x28 Output - 16x28x28 
        self.batch2 = nn.BatchNorm2d(16)        
        self.poola = nn.MaxPool2d(2, 2)

        self.conv3= nn.Conv2d(16, 24, 3, padding=1) #Input -16x14x14 Output - 24x14x14 
        self.batch3 = nn.BatchNorm2d(24)

        self.conv4= nn.Conv2d(24, 36, 3, padding=1) #Input -24x14x14 Output - 36x14x14 
        self.batch4 = nn.BatchNorm2d(36)
        self.poolb = nn.MaxPool2d(2, 2)

        self.pool1 = nn.AvgPool2d(7) # global average pooling layer  input = 38x7x7 output  = 38x1x1         
        self.linear1 = nn.Linear(36,10)
        
    def forward(self, x):
        # first convolution block
       
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        x = self.poola(x)
        x = F.dropout2d(x,0.005)


        x = self.conv3(x)
        x = F.relu(x)
        x = self.batch3(x)
        
        x = self.conv4(x)
        x = F.relu(x)
        x = self.batch4(x)
        x = self.poolb(x)

        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))


model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
       BatchNorm2d-2            [-1, 8, 28, 28]              16
            Conv2d-3           [-1, 16, 28, 28]           1,168
       BatchNorm2d-4           [-1, 16, 28, 28]              32
         MaxPool2d-5           [-1, 16, 14, 14]               0
            Conv2d-6           [-1, 24, 14, 14]           3,480
       BatchNorm2d-7           [-1, 24, 14, 14]              48
            Conv2d-8           [-1, 36, 14, 14]           7,812
       BatchNorm2d-9           [-1, 36, 14, 14]              72
        MaxPool2d-10             [-1, 36, 7, 7]               0
        AvgPool2d-11             [-1, 36, 1, 1]               0
           Linear-12                   [-1, 10]             370
Total params: 13,078
Trainable params: 13,078
Non-trainable params: 0
---------------------------------




Test set: Average loss: 0.1147, Accuracy: 9712/10000 (97.12%)

Epoch number 2

Test set: Average loss: 0.0717, Accuracy: 9798/10000 (97.98%)

Epoch number 3

Test set: Average loss: 0.0578, Accuracy: 9841/10000 (98.41%)

Epoch number 4

Test set: Average loss: 0.0527, Accuracy: 9861/10000 (98.61%)

Epoch number 5

Test set: Average loss: 0.0504, Accuracy: 9853/10000 (98.53%)

Epoch number 6

Test set: Average loss: 0.0400, Accuracy: 9872/10000 (98.72%)

Epoch number 7

Test set: Average loss: 0.0388, Accuracy: 9880/10000 (98.80%)

Epoch number 8

Test set: Average loss: 0.0438, Accuracy: 9867/10000 (98.67%)

Epoch number 9

Test set: Average loss: 0.0374, Accuracy: 9892/10000 (98.92%)

Epoch number 10

Test set: Average loss: 0.0403, Accuracy: 9880/10000 (98.80%)

Epoch number 11

Test set: Average loss: 0.0387, Accuracy: 9877/10000 (98.77%)

Epoch number 12

Test set: Average loss: 0.0399, Accuracy: 9868/10000 (98.68%)

Epoch number 13

Test set: Average loss: 0.0313, Accuracy: 9898/

### Customized 10

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 10, 3, padding=1) #Input -1x28x28 Output - 8x28x28 
        self.batch1 = nn.BatchNorm2d(10)

        self.conv2 = nn.Conv2d(10, 20, 3, padding=1) #Input -8x28x28 Output - 16x28x28 
        self.batch2 = nn.BatchNorm2d(20)        
        self.poola = nn.MaxPool2d(2, 2)

        self.conv3= nn.Conv2d(20, 30, 3, padding=1) #Input -16x14x14 Output - 24x14x14 
        self.batch3 = nn.BatchNorm2d(30)
        self.poolb0 = nn.MaxPool2d(2, 2)

        self.conv4= nn.Conv2d(30, 40, 3, padding=1) #Input -24x14x14 Output - 36x14x14 
        self.batch4 = nn.BatchNorm2d(40)
        self.poolb = nn.MaxPool2d(2, 2)

        self.pool1 = nn.AvgPool2d(3) # global average pooling layer  input = 38x7x7 output  = 38x1x1         
        self.linear1 = nn.Linear(40,10)
        
    def forward(self, x):
        # first convolution block
       
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        x = self.poola(x)
        x = F.dropout2d(x,0.01)


        x = self.conv3(x)
        x = F.relu(x)
        x = self.batch3(x)
        x = self.poolb0(x)
        
        x = self.conv4(x)
        x = F.relu(x)
        x = self.batch4(x)
        x = self.poolb(x)

        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))


model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 28, 28]             100
       BatchNorm2d-2           [-1, 10, 28, 28]              20
            Conv2d-3           [-1, 20, 28, 28]           1,820
       BatchNorm2d-4           [-1, 20, 28, 28]              40
         MaxPool2d-5           [-1, 20, 14, 14]               0
            Conv2d-6           [-1, 30, 14, 14]           5,430
       BatchNorm2d-7           [-1, 30, 14, 14]              60
         MaxPool2d-8             [-1, 30, 7, 7]               0
            Conv2d-9             [-1, 40, 7, 7]          10,840
      BatchNorm2d-10             [-1, 40, 7, 7]              80
        MaxPool2d-11             [-1, 40, 3, 3]               0
        AvgPool2d-12             [-1, 40, 1, 1]               0
           Linear-13                   [-1, 10]             410
Total params: 18,800
Trainable params: 




Test set: Average loss: 0.0694, Accuracy: 9825/10000 (98.25%)

Epoch number 2

Test set: Average loss: 0.0497, Accuracy: 9857/10000 (98.57%)

Epoch number 3

Test set: Average loss: 0.0392, Accuracy: 9886/10000 (98.86%)

Epoch number 4

Test set: Average loss: 0.0331, Accuracy: 9895/10000 (98.95%)

Epoch number 5

Test set: Average loss: 0.0307, Accuracy: 9903/10000 (99.03%)

Epoch number 6

Test set: Average loss: 0.0301, Accuracy: 9901/10000 (99.01%)

Epoch number 7

Test set: Average loss: 0.0257, Accuracy: 9912/10000 (99.12%)

Epoch number 8

Test set: Average loss: 0.0264, Accuracy: 9909/10000 (99.09%)

Epoch number 9

Test set: Average loss: 0.0315, Accuracy: 9898/10000 (98.98%)

Epoch number 10

Test set: Average loss: 0.0269, Accuracy: 9902/10000 (99.02%)

Epoch number 11

Test set: Average loss: 0.0262, Accuracy: 9909/10000 (99.09%)

Epoch number 12

Test set: Average loss: 0.0269, Accuracy: 9920/10000 (99.20%)

Epoch number 13

Test set: Average loss: 0.0249, Accuracy: 9916/

### Customized model 11


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 10, 3, padding=1) #Input -1x28x28 Output - 10x28x28 
        self.batch1 = nn.BatchNorm2d(10)
        
        # second convolution block
        self.conv2 = nn.Conv2d(10, 20, 3, padding=1) #Input -10x28x28 Output - 20x28x28 
        self.batch2 = nn.BatchNorm2d(20)        
        self.poola = nn.MaxPool2d(2, 2)

        # third convolution block
        self.conv3= nn.Conv2d(20, 30, 3, padding=1) #Input -20x14x14 Output -30x14x14 
        self.batch3 = nn.BatchNorm2d(30)
        self.poolb0 = nn.MaxPool2d(2, 2)

        # fourth convolution block
        self.conv4= nn.Conv2d(30, 40, 3, padding=1) #Input -30x14x14 Output - 40x7x7 
        self.batch4 = nn.BatchNorm2d(40)
        self.poolb = nn.MaxPool2d(2, 2)

        # GAP layer
        self.pool1 = nn.AvgPool2d(3) # global average pooling layer  input = 40x3x3 output  = 40x1x1     ? 3 because this is the input dimension of the iamge    
        # FC layer - > output layer  
        self.linear1 = nn.Linear(40,10)
        
    def forward(self, x):
        # first convolution block
       
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)
        x = F.dropout2d(x,0.01) # first drop out ;a

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        x = F.dropout2d(x,0.01)
        x = self.poola(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = self.batch3(x)
        x = F.dropout2d(x,0.01)
        x = self.poolb0(x)
        
        x = self.conv4(x)
        x = F.relu(x)
        x = self.batch4(x)
        x = F.dropout2d(x,0.01)
        x = self.poolb(x)

        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))


model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 28, 28]             100
       BatchNorm2d-2           [-1, 10, 28, 28]              20
            Conv2d-3           [-1, 20, 28, 28]           1,820
       BatchNorm2d-4           [-1, 20, 28, 28]              40
         MaxPool2d-5           [-1, 20, 14, 14]               0
            Conv2d-6           [-1, 30, 14, 14]           5,430
       BatchNorm2d-7           [-1, 30, 14, 14]              60
         MaxPool2d-8             [-1, 30, 7, 7]               0
            Conv2d-9             [-1, 40, 7, 7]          10,840
      BatchNorm2d-10             [-1, 40, 7, 7]              80
        MaxPool2d-11             [-1, 40, 3, 3]               0
        AvgPool2d-12             [-1, 40, 1, 1]               0
           Linear-13                   [-1, 10]             410
Total params: 18,800
Trainable params: 




Test set: Average loss: 0.0771, Accuracy: 9795/10000 (97.95%)

Epoch number 2

Test set: Average loss: 0.0507, Accuracy: 9862/10000 (98.62%)

Epoch number 3

Test set: Average loss: 0.0403, Accuracy: 9873/10000 (98.73%)

Epoch number 4

Test set: Average loss: 0.0330, Accuracy: 9898/10000 (98.98%)

Epoch number 5

Test set: Average loss: 0.0343, Accuracy: 9893/10000 (98.93%)

Epoch number 6

Test set: Average loss: 0.0270, Accuracy: 9921/10000 (99.21%)

Epoch number 7

Test set: Average loss: 0.0284, Accuracy: 9911/10000 (99.11%)

Epoch number 8

Test set: Average loss: 0.0288, Accuracy: 9909/10000 (99.09%)

Epoch number 9

Test set: Average loss: 0.0287, Accuracy: 9901/10000 (99.01%)

Epoch number 10

Test set: Average loss: 0.0258, Accuracy: 9908/10000 (99.08%)

Epoch number 11

Test set: Average loss: 0.0260, Accuracy: 9917/10000 (99.17%)

Epoch number 12

Test set: Average loss: 0.0244, Accuracy: 9928/10000 (99.28%)

Epoch number 13

Test set: Average loss: 0.0276, Accuracy: 9916/

### Customized model 12

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # first convolution block
        self.conv1 = nn.Conv2d(1, 10, 3, padding=1) #Input -1x28x28 Output - 8x28x28 
        self.batch1 = nn.BatchNorm2d(10)

        self.conv2 = nn.Conv2d(10, 20, 3, padding=1) #Input -8x28x28 Output - 16x28x28 
        self.batch2 = nn.BatchNorm2d(20)        
        self.poola = nn.MaxPool2d(2, 2)

        self.conv3= nn.Conv2d(20, 30, 3, padding=1) #Input -16x14x14 Output - 24x14x14 
        self.batch3 = nn.BatchNorm2d(30)
        self.poolb0 = nn.MaxPool2d(2, 2)

        self.conv4= nn.Conv2d(30, 40, 3, padding=1) #Input -24x14x14 Output - 36x14x14 
        self.batch4 = nn.BatchNorm2d(40)
        self.poolb = nn.MaxPool2d(2, 2)

        self.pool1 = nn.AvgPool2d(3) # global average pooling layer  input = 38x7x7 output  = 38x1x1         
        self.linear1 = nn.Linear(40,10)
        
    def forward(self, x):
        # first convolution block
       
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batch1(x)
        x = F.dropout2d(x,0.05)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.batch2(x)
        x = F.dropout2d(x,0.05)
        x = self.poola(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = self.batch3(x)
        x = F.dropout2d(x,0.01)
        x = self.poolb0(x)
        
        x = self.conv4(x)
        x = F.relu(x)
        x = self.batch4(x)
        x = F.dropout2d(x,0.01)
        x = self.poolb(x)

        x = self.pool1(x)
        x  = x.squeeze()
        x = self.linear1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))


model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print(f'Epoch number {epoch}')
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 28, 28]             100
       BatchNorm2d-2           [-1, 10, 28, 28]              20
            Conv2d-3           [-1, 20, 28, 28]           1,820
       BatchNorm2d-4           [-1, 20, 28, 28]              40
         MaxPool2d-5           [-1, 20, 14, 14]               0
            Conv2d-6           [-1, 30, 14, 14]           5,430
       BatchNorm2d-7           [-1, 30, 14, 14]              60
         MaxPool2d-8             [-1, 30, 7, 7]               0
            Conv2d-9             [-1, 40, 7, 7]          10,840
      BatchNorm2d-10             [-1, 40, 7, 7]              80
        MaxPool2d-11             [-1, 40, 3, 3]               0
        AvgPool2d-12             [-1, 40, 1, 1]               0
           Linear-13                   [-1, 10]             410
Total params: 18,800
Trainable params: 




Test set: Average loss: 0.0814, Accuracy: 9772/10000 (97.72%)

Epoch number 2

Test set: Average loss: 0.0482, Accuracy: 9866/10000 (98.66%)

Epoch number 3

Test set: Average loss: 0.0453, Accuracy: 9862/10000 (98.62%)

Epoch number 4

Test set: Average loss: 0.0401, Accuracy: 9871/10000 (98.71%)

Epoch number 5

Test set: Average loss: 0.0339, Accuracy: 9903/10000 (99.03%)

Epoch number 6

Test set: Average loss: 0.0344, Accuracy: 9886/10000 (98.86%)

Epoch number 7

Test set: Average loss: 0.0364, Accuracy: 9884/10000 (98.84%)

Epoch number 8

Test set: Average loss: 0.0301, Accuracy: 9902/10000 (99.02%)

Epoch number 9

Test set: Average loss: 0.0309, Accuracy: 9898/10000 (98.98%)

Epoch number 10

Test set: Average loss: 0.0342, Accuracy: 9894/10000 (98.94%)

Epoch number 11

Test set: Average loss: 0.0280, Accuracy: 9910/10000 (99.10%)

Epoch number 12

Test set: Average loss: 0.0274, Accuracy: 9909/10000 (99.09%)

Epoch number 13

Test set: Average loss: 0.0298, Accuracy: 9895/