In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [109]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1) #input -? OUtput? RF
        self.conv1_bn = nn.BatchNorm2d(8)
        self.pool1 = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(8,16,3,padding=1)
        self.conv2_bn = nn.BatchNorm2d(16)
        self.pool2 = nn.MaxPool2d(2,2)
        self.conv3 = nn.Conv2d(16,32,3,padding=1)
        self.conv3_bn = nn.BatchNorm2d(32)
        self.pool3 = nn.MaxPool2d(2,2)
        self.conv4 = nn.Conv2d(32,16,3)
        self.conv4_bn = nn.BatchNorm2d(16)
        self.pool4 = nn.MaxPool2d(2,2)
        self.fc1 = nn.Linear(16*5*5,10)
  
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.25)

    def forward(self, x):
        x = self.conv1(x) 
        x = self.conv1_bn(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.conv2_bn(x)
        x = F.relu(x)
        x = self.dropout1(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.conv3_bn(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.conv4(x)
        x = self.conv4_bn(x)
        x = F.relu(x)
        x = x.reshape(-1,16*5*5)
        x = self.fc1(x)
        return F.log_softmax(x)

In [110]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
       BatchNorm2d-2            [-1, 8, 28, 28]              16
         MaxPool2d-3            [-1, 8, 14, 14]               0
            Conv2d-4           [-1, 16, 14, 14]           1,168
       BatchNorm2d-5           [-1, 16, 14, 14]              32
         Dropout2d-6           [-1, 16, 14, 14]               0
         MaxPool2d-7             [-1, 16, 7, 7]               0
            Conv2d-8             [-1, 32, 7, 7]           4,640
       BatchNorm2d-9             [-1, 32, 7, 7]              64
        Dropout2d-10             [-1, 32, 7, 7]               0
           Conv2d-11             [-1, 16, 5, 5]           4,624
      BatchNorm2d-12             [-1, 16, 5, 5]              32
    

  return F.log_softmax(x)


In [111]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [112]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [113]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    print("Epoch: ",epoch)
    print("--------------")
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

Epoch:  1
--------------


  return F.log_softmax(x)
loss=0.14157654345035553 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 26.12it/s]



Test set: Average loss: 0.0670, Accuracy: 9785/10000 (97.85%)

Epoch:  2
--------------


loss=0.06746566295623779 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.93it/s]



Test set: Average loss: 0.0477, Accuracy: 9830/10000 (98.30%)

Epoch:  3
--------------


loss=0.0667879655957222 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.63it/s]



Test set: Average loss: 0.0407, Accuracy: 9852/10000 (98.52%)

Epoch:  4
--------------


loss=0.05491636320948601 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.16it/s]



Test set: Average loss: 0.0384, Accuracy: 9871/10000 (98.71%)

Epoch:  5
--------------


loss=0.031515952199697495 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.30it/s]



Test set: Average loss: 0.0319, Accuracy: 9885/10000 (98.85%)

Epoch:  6
--------------


loss=0.040687981992959976 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.01it/s]



Test set: Average loss: 0.0329, Accuracy: 9878/10000 (98.78%)

Epoch:  7
--------------


loss=0.08007332682609558 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.48it/s]



Test set: Average loss: 0.0320, Accuracy: 9884/10000 (98.84%)

Epoch:  8
--------------


loss=0.023730916902422905 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.97it/s]



Test set: Average loss: 0.0272, Accuracy: 9910/10000 (99.10%)

Epoch:  9
--------------


loss=0.018825998529791832 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.66it/s]



Test set: Average loss: 0.0260, Accuracy: 9909/10000 (99.09%)

Epoch:  10
--------------


loss=0.01291016023606062 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 31.25it/s]



Test set: Average loss: 0.0267, Accuracy: 9904/10000 (99.04%)

Epoch:  11
--------------


loss=0.029386267066001892 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.89it/s]



Test set: Average loss: 0.0261, Accuracy: 9905/10000 (99.05%)

Epoch:  12
--------------


loss=0.014224191196262836 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.57it/s]



Test set: Average loss: 0.0236, Accuracy: 9920/10000 (99.20%)

Epoch:  13
--------------


loss=0.057570770382881165 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.39it/s]



Test set: Average loss: 0.0244, Accuracy: 9913/10000 (99.13%)

Epoch:  14
--------------


loss=0.006078090984374285 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.84it/s]



Test set: Average loss: 0.0261, Accuracy: 9909/10000 (99.09%)

Epoch:  15
--------------


loss=0.15223008394241333 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.88it/s]



Test set: Average loss: 0.0246, Accuracy: 9913/10000 (99.13%)

Epoch:  16
--------------


loss=0.0050852056592702866 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 29.56it/s]



Test set: Average loss: 0.0257, Accuracy: 9908/10000 (99.08%)

Epoch:  17
--------------


loss=0.012222692370414734 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 33.07it/s]



Test set: Average loss: 0.0234, Accuracy: 9913/10000 (99.13%)

Epoch:  18
--------------


loss=0.051050424575805664 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.60it/s]



Test set: Average loss: 0.0229, Accuracy: 9920/10000 (99.20%)

Epoch:  19
--------------


loss=0.018453702330589294 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.36it/s]



Test set: Average loss: 0.0247, Accuracy: 9914/10000 (99.14%)



In [23]:
### Iteration1: 

Epochs: , BATCHSIZE = 128

# ----------------------------------------------------------------
#         Layer (type)               Output Shape         Param #
# ================================================================
#             Conv2d-1           [-1, 32, 28, 28]             320
#        BatchNorm2d-2           [-1, 32, 28, 28]              64
#          MaxPool2d-3           [-1, 32, 14, 14]               0
#             Conv2d-4           [-1, 64, 14, 14]          18,496
#        BatchNorm2d-5           [-1, 64, 14, 14]             128
#          Dropout2d-6           [-1, 64, 14, 14]               0
#          MaxPool2d-7             [-1, 64, 7, 7]               0
#             Linear-8                   [-1, 10]          31,370
# ================================================================
# Total params: 50,378
# Trainable params: 50,378
# Non-trainable params: 0

# loss=0.04137827828526497 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.52it/s]

# Test set: Average loss: 0.0502, Accuracy: 9828/10000 (98.28%)

# loss=0.05746664106845856 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.78it/s]

# Test set: Average loss: 0.0536, Accuracy: 9818/10000 (98.18%)

# loss=0.0651572197675705 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.59it/s]

# Test set: Average loss: 0.0390, Accuracy: 9866/10000 (98.66%)

# loss=0.02148214727640152 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.87it/s]

# Test set: Average loss: 0.0389, Accuracy: 9871/10000 (98.71%)

# loss=0.008850262500345707 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.39it/s]

# Test set: Average loss: 0.0337, Accuracy: 9891/10000 (98.91%)

# loss=0.01182077918201685 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.59it/s]

# Test set: Average loss: 0.0326, Accuracy: 9886/10000 (98.86%)

# loss=0.007367847952991724 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 31.19it/s]

# Test set: Average loss: 0.0277, Accuracy: 9899/10000 (98.99%)

# loss=0.005205400753766298 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 31.17it/s]

# Test set: Average loss: 0.0364, Accuracy: 9874/10000 (98.74%)

# loss=0.025386208668351173 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 31.17it/s]

# Test set: Average loss: 0.0294, Accuracy: 9901/10000 (99.01%)

# loss=0.016618864610791206 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.67it/s]

# Test set: Average loss: 0.0302, Accuracy: 9895/10000 (98.95%)

# loss=0.05144502595067024 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 31.22it/s]

# Test set: Average loss: 0.0330, Accuracy: 9896/10000 (98.96%)

# loss=0.01883302442729473 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.49it/s]

# Test set: Average loss: 0.0306, Accuracy: 9897/10000 (98.97%)

# loss=0.025095218792557716 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.62it/s]

# Test set: Average loss: 0.0289, Accuracy: 9902/10000 (99.02%)

# loss=0.001917584682814777 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.56it/s]

# Test set: Average loss: 0.0316, Accuracy: 9898/10000 (98.98%)

# loss=0.022400440648198128 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.33it/s]

# Test set: Average loss: 0.0276, Accuracy: 9909/10000 (99.09%)

###################################################################################################


## Iteration2: 

Epochs: 15, BATCHSIZE = 128

# ----------------------------------------------------------------
#         Layer (type)               Output Shape         Param #
# ================================================================
#             Conv2d-1           [-1, 32, 28, 28]             320
#        BatchNorm2d-2           [-1, 32, 28, 28]              64
#          MaxPool2d-3           [-1, 32, 14, 14]               0
#             Conv2d-4           [-1, 64, 14, 14]          18,496
#        BatchNorm2d-5           [-1, 64, 14, 14]             128
#          Dropout2d-6           [-1, 64, 14, 14]               0
#          MaxPool2d-7             [-1, 64, 7, 7]               0
#             Conv2d-8             [-1, 64, 5, 5]          36,928
#        BatchNorm2d-9             [-1, 64, 5, 5]             128
#         Dropout2d-10             [-1, 64, 5, 5]               0
#            Conv2d-11             [-1, 64, 3, 3]          36,928
#       BatchNorm2d-12             [-1, 64, 3, 3]             128
#            Conv2d-13             [-1, 10, 1, 1]           5,770
# ================================================================
# Total params: 98,890
# Trainable params: 98,890
# Non-trainable params: 0

# loss=0.06406193226575851 batch_id=468: 100%|██████████| 469/469 [00:20<00:00, 23.25it/s]Epoch:  1


#  Test set: Average loss: 0.0419, Accuracy: 9875/10000 (98.75%)

# loss=0.028208402916789055 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.16it/s]Epoch:  2


#  Test set: Average loss: 0.0322, Accuracy: 9898/10000 (98.98%)

# loss=0.088231660425663 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.38it/s]Epoch:  3


#  Test set: Average loss: 0.0288, Accuracy: 9901/10000 (99.01%)

# loss=0.04493309184908867 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.20it/s]Epoch:  4


#  Test set: Average loss: 0.0273, Accuracy: 9905/10000 (99.05%)

# loss=0.056821998208761215 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 27.30it/s]Epoch:  5


#  Test set: Average loss: 0.0247, Accuracy: 9920/10000 (99.20%)

# loss=0.01396572683006525 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.31it/s]Epoch:  6


#  Test set: Average loss: 0.0220, Accuracy: 9926/10000 (99.26%)

# loss=0.03296106681227684 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.49it/s]Epoch:  7


#  Test set: Average loss: 0.0221, Accuracy: 9932/10000 (99.32%)

# loss=0.026116609573364258 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.73it/s]Epoch:  8


#  Test set: Average loss: 0.0246, Accuracy: 9920/10000 (99.20%)

# loss=0.028498271480202675 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 29.01it/s]Epoch:  9


#  Test set: Average loss: 0.0212, Accuracy: 9934/10000 (99.34%)

# loss=0.00737975537776947 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.71it/s]Epoch:  10


#  Test set: Average loss: 0.0217, Accuracy: 9932/10000 (99.32%)

# loss=0.0027402618434280157 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.25it/s]Epoch:  11


#  Test set: Average loss: 0.0213, Accuracy: 9930/10000 (99.30%)

# loss=0.04822007939219475 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.48it/s]Epoch:  12


#  Test set: Average loss: 0.0204, Accuracy: 9941/10000 (99.41%)

# loss=0.0029580360278487206 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.25it/s]Epoch:  13


#  Test set: Average loss: 0.0228, Accuracy: 9925/10000 (99.25%)

# loss=0.017925599589943886 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.41it/s]Epoch:  14


#  Test set: Average loss: 0.0210, Accuracy: 9930/10000 (99.30%)

# loss=0.028340427204966545 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.27it/s]Epoch:  15


#  Test set: Average loss: 0.0212, Accuracy: 9936/10000 (99.36%)



### Iteration3: 

Epochs: 17, BATCHSIZE = 128

#         Layer (type)               Output Shape         Param #
# ================================================================
#             Conv2d-1            [-1, 4, 28, 28]              40
#        BatchNorm2d-2            [-1, 4, 28, 28]               8
#          MaxPool2d-3            [-1, 4, 14, 14]               0
#             Conv2d-4            [-1, 8, 14, 14]             296
#        BatchNorm2d-5            [-1, 8, 14, 14]              16
#          Dropout2d-6            [-1, 8, 14, 14]               0
#          MaxPool2d-7              [-1, 8, 7, 7]               0
#             Conv2d-8             [-1, 16, 7, 7]           1,168
#        BatchNorm2d-9             [-1, 16, 7, 7]              32
#            Linear-10                  [-1, 240]         188,400
#         Dropout2d-11                  [-1, 240]               0
#            Linear-12                   [-1, 10]           2,410
# ================================================================
# Total params: 192,370
# Trainable params: 192,370
# Non-trainable params: 0


# Epoch:  1
# --------------
#   0%|          | 0/469 [00:00<?, ?it/s]<ipython-input-18-cd0f03b322d7>:35: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.
#   return F.log_softmax(x)
# loss=0.11563298851251602 batch_id=468: 100%|██████████| 469/469 [00:23<00:00, 19.99it/s]

# Test set: Average loss: 0.0996, Accuracy: 9683/10000 (96.83%)

# Epoch:  2
# --------------
# loss=0.154499813914299 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 27.04it/s]

# Test set: Average loss: 0.0672, Accuracy: 9777/10000 (97.77%)

# Epoch:  3
# --------------
# loss=0.06712274998426437 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.96it/s]

# Test set: Average loss: 0.0563, Accuracy: 9808/10000 (98.08%)

# Epoch:  4
# --------------
# loss=0.18289309740066528 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.44it/s]

# Test set: Average loss: 0.0520, Accuracy: 9830/10000 (98.30%)

# Epoch:  5
# --------------
# loss=0.1344272941350937 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 27.12it/s]

# Test set: Average loss: 0.0507, Accuracy: 9836/10000 (98.36%)

# Epoch:  6
# --------------
# loss=0.11444560438394547 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.49it/s]

# Test set: Average loss: 0.0427, Accuracy: 9851/10000 (98.51%)

# Epoch:  7
# --------------
# loss=0.14254280924797058 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.80it/s]

# Test set: Average loss: 0.0409, Accuracy: 9865/10000 (98.65%)

# Epoch:  8
# --------------
# loss=0.021893447265028954 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 27.69it/s]

# Test set: Average loss: 0.0416, Accuracy: 9854/10000 (98.54%)

# Epoch:  9
# --------------
# loss=0.026272214949131012 batch_id=468: 100%|██████████| 469/469 [00:17<00:00, 27.03it/s]

# Test set: Average loss: 0.0404, Accuracy: 9863/10000 (98.63%)

# Epoch:  10
# --------------
# loss=0.060075994580984116 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 29.54it/s]

# Test set: Average loss: 0.0411, Accuracy: 9856/10000 (98.56%)

# Epoch:  11
# --------------
# loss=0.07377823442220688 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 29.16it/s]

# Test set: Average loss: 0.0372, Accuracy: 9878/10000 (98.78%)

# Epoch:  12
# --------------
# loss=0.0055307913571596146 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 29.83it/s]

# Test set: Average loss: 0.0395, Accuracy: 9879/10000 (98.79%)

# Epoch:  13
# --------------
# loss=0.04446995258331299 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.45it/s]

# Test set: Average loss: 0.0359, Accuracy: 9878/10000 (98.78%)

# Epoch:  14
# --------------
# loss=0.029250210151076317 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.07it/s]

# Test set: Average loss: 0.0437, Accuracy: 9863/10000 (98.63%)

# Epoch:  15
# --------------
# loss=0.0585387647151947 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 28.66it/s]

# Test set: Average loss: 0.0395, Accuracy: 9871/10000 (98.71%)

# Epoch:  16
# --------------
# loss=0.10638707876205444 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.56it/s]

# Test set: Average loss: 0.0358, Accuracy: 9881/10000 (98.81%)

# Epoch:  17
# --------------
# loss=0.028931356966495514 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 29.99it/s]

# Test set: Average loss: 0.0360, Accuracy: 9885/10000 (98.85%)
