# Target

1. Use Max Pooling  and (1X1) Convolutions to make model lighter and reduce overfitting. Trim 1 or 2 Convolution Layers if necessary. 

2. Retain the basic structure/architecture of the model as far as possible.

# Results

Total Parameters: 9702

Best Training Accuracy: 99.34

Best Test Accuracy: 98.82

# Analysis

1. The model is still overfitting the data to some extent. 

2. The model is light (only 9k parameters) but training accuracy has reduced because of reducing the capacity of the model.

3. In the next Notebook, we will use Batch Normalization (BN) to increase the efficiency of back propagation by ensuring that inputs are normalized (mean ) ans std dev 1) before feeding to each layer. 

In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
%matplotlib inline
import matplotlib.pyplot as plt

In [0]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values. 
                                       # Note the difference between (0.1307) and (0.1307,)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])


In [4]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!


In [5]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True


In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.cnn_layers = Sequential(
            # Defining a 2D convolution layer
            Conv2d(1, 12, kernel_size=3, padding=0),
            ReLU(inplace=True),

            Conv2d(12, 12, kernel_size=3, padding=0),
            ReLU(inplace=True),

            Conv2d(12, 16, kernel_size=3, padding=0),
            ReLU(inplace=True),      

            MaxPool2d(kernel_size=2),
            
            Conv2d(16, 12, kernel_size=1, padding=0),
            ReLU(inplace=True),

            Conv2d(12, 12, kernel_size=3, padding=0),
            ReLU(inplace=True),
            
            Conv2d(12, 12, kernel_size=3, padding=0),
            ReLU(inplace=True),

            Conv2d(12, 10, kernel_size=3, padding=0),
            ReLU(inplace=True),

            Conv2d(10, 10, kernel_size=5, padding=0),
            ReLU(inplace=True),

            Conv2d(10, 10, kernel_size=1)

        )

    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)




In [7]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 12, 26, 26]             120
              ReLU-2           [-1, 12, 26, 26]               0
            Conv2d-3           [-1, 12, 24, 24]           1,308
              ReLU-4           [-1, 12, 24, 24]               0
            Conv2d-5           [-1, 16, 22, 22]           1,744
              ReLU-6           [-1, 16, 22, 22]               0
         MaxPool2d-7           [-1, 16, 11, 11]               0
            Conv2d-8           [-1, 12, 11, 11]             204
              ReLU-9           [-1, 12, 11, 11]               0
           Conv2d-10             [-1, 12, 9, 9]           1,308
             ReLU-11             [-1, 12, 9, 9]               0
           Conv2d-12             [-1, 12, 7, 7]           1,308
             ReLU-13             [-1, 12, 7, 7]               0
           Conv2d-14             [



In [0]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm
    
    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))

In [45]:
model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 16
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


  0%|          | 0/469 [00:00<?, ?it/s][A

EPOCH: 0



Loss=2.320674419403076 Batch_id=0 Accuracy=10.94:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=2.320674419403076 Batch_id=0 Accuracy=10.94:   0%|          | 1/469 [00:00<02:00,  3.90it/s][A
Loss=2.304607391357422 Batch_id=1 Accuracy=9.77:   0%|          | 1/469 [00:00<02:00,  3.90it/s] [A
Loss=2.288050413131714 Batch_id=2 Accuracy=9.90:   0%|          | 1/469 [00:00<02:00,  3.90it/s][A
Loss=2.31396222114563 Batch_id=3 Accuracy=9.38:   0%|          | 1/469 [00:00<02:00,  3.90it/s] [A
Loss=2.30914568901062 Batch_id=4 Accuracy=9.69:   0%|          | 1/469 [00:00<02:00,  3.90it/s][A
Loss=2.3497519493103027 Batch_id=5 Accuracy=9.64:   0%|          | 1/469 [00:00<02:00,  3.90it/s][A
Loss=2.3338422775268555 Batch_id=6 Accuracy=10.16:   0%|          | 1/469 [00:00<02:00,  3.90it/s][A
Loss=2.3338422775268555 Batch_id=6 Accuracy=10.16:   1%|▏         | 7/469 [00:00<01:25,  5.39it/s][A
Loss=2.3192243576049805 Batch_id=7 Accuracy=9.96:   1%|▏         | 7/469 [00:00<01:25,  5.39it/s] [


Test set: Average loss: 2.3008, Accuracy: 1135/10000 (11.35%)

EPOCH: 1



Loss=2.2999062538146973 Batch_id=0 Accuracy=7.81:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=2.2999062538146973 Batch_id=0 Accuracy=7.81:   0%|          | 1/469 [00:00<01:36,  4.86it/s][A
Loss=2.3123087882995605 Batch_id=1 Accuracy=8.20:   0%|          | 1/469 [00:00<01:36,  4.86it/s][A
Loss=2.2979416847229004 Batch_id=2 Accuracy=10.42:   0%|          | 1/469 [00:00<01:36,  4.86it/s][A
Loss=2.3112692832946777 Batch_id=3 Accuracy=10.35:   0%|          | 1/469 [00:00<01:36,  4.86it/s][A
Loss=2.3031575679779053 Batch_id=4 Accuracy=9.69:   0%|          | 1/469 [00:00<01:36,  4.86it/s] [A
Loss=2.3038055896759033 Batch_id=5 Accuracy=9.90:   0%|          | 1/469 [00:00<01:36,  4.86it/s][A
Loss=2.3038055896759033 Batch_id=5 Accuracy=9.90:   1%|▏         | 6/469 [00:00<01:09,  6.64it/s][A
Loss=2.306124210357666 Batch_id=6 Accuracy=9.93:   1%|▏         | 6/469 [00:00<01:09,  6.64it/s] [A
Loss=2.3072245121002197 Batch_id=7 Accuracy=9.77:   1%|▏         | 6/469 [00:00<01:09,  6.64it/


Test set: Average loss: 0.3055, Accuracy: 9043/10000 (90.43%)

EPOCH: 2



Loss=0.29811325669288635 Batch_id=0 Accuracy=90.62:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.29811325669288635 Batch_id=0 Accuracy=90.62:   0%|          | 1/469 [00:00<01:32,  5.05it/s][A
Loss=0.2144494354724884 Batch_id=1 Accuracy=91.80:   0%|          | 1/469 [00:00<01:32,  5.05it/s] [A
Loss=0.48979634046554565 Batch_id=2 Accuracy=90.89:   0%|          | 1/469 [00:00<01:32,  5.05it/s][A
Loss=0.2618245482444763 Batch_id=3 Accuracy=91.60:   0%|          | 1/469 [00:00<01:32,  5.05it/s] [A
Loss=0.34481120109558105 Batch_id=4 Accuracy=91.09:   0%|          | 1/469 [00:00<01:32,  5.05it/s][A
Loss=0.19223013520240784 Batch_id=5 Accuracy=91.67:   0%|          | 1/469 [00:00<01:32,  5.05it/s][A
Loss=0.2857106328010559 Batch_id=6 Accuracy=90.96:   0%|          | 1/469 [00:00<01:32,  5.05it/s] [A
Loss=0.2857106328010559 Batch_id=6 Accuracy=90.96:   1%|▏         | 7/469 [00:00<01:06,  6.91it/s][A
Loss=0.38140878081321716 Batch_id=7 Accuracy=90.92:   1%|▏         | 7/469 [00:00


Test set: Average loss: 0.1067, Accuracy: 9679/10000 (96.79%)

EPOCH: 3



Loss=0.06967125833034515 Batch_id=0 Accuracy=97.66:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.06967125833034515 Batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:32,  5.07it/s][A
Loss=0.07980763912200928 Batch_id=1 Accuracy=97.27:   0%|          | 1/469 [00:00<01:32,  5.07it/s][A
Loss=0.10282804071903229 Batch_id=2 Accuracy=96.61:   0%|          | 1/469 [00:00<01:32,  5.07it/s][A
Loss=0.17793923616409302 Batch_id=3 Accuracy=96.29:   0%|          | 1/469 [00:00<01:32,  5.07it/s][A
Loss=0.11412159353494644 Batch_id=4 Accuracy=96.56:   0%|          | 1/469 [00:00<01:32,  5.07it/s][A
Loss=0.0922795981168747 Batch_id=5 Accuracy=96.48:   0%|          | 1/469 [00:00<01:32,  5.07it/s] [A
Loss=0.0922795981168747 Batch_id=5 Accuracy=96.48:   1%|▏         | 6/469 [00:00<01:06,  6.93it/s][A
Loss=0.07051258534193039 Batch_id=6 Accuracy=96.54:   1%|▏         | 6/469 [00:00<01:06,  6.93it/s][A
Loss=0.08445810526609421 Batch_id=7 Accuracy=96.58:   1%|▏         | 6/469 [00:00


Test set: Average loss: 0.0774, Accuracy: 9759/10000 (97.59%)

EPOCH: 4



Loss=0.032734133303165436 Batch_id=0 Accuracy=99.22:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.032734133303165436 Batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:18,  6.00it/s][A
Loss=0.09637035429477692 Batch_id=1 Accuracy=98.05:   0%|          | 1/469 [00:00<01:18,  6.00it/s] [A
Loss=0.13498744368553162 Batch_id=2 Accuracy=97.92:   0%|          | 1/469 [00:00<01:18,  6.00it/s][A
Loss=0.13498744368553162 Batch_id=2 Accuracy=97.92:   1%|          | 3/469 [00:00<01:01,  7.57it/s][A
Loss=0.3127261996269226 Batch_id=3 Accuracy=96.48:   1%|          | 3/469 [00:00<01:01,  7.57it/s] [A
Loss=0.04170522838830948 Batch_id=4 Accuracy=96.88:   1%|          | 3/469 [00:00<01:01,  7.57it/s][A
Loss=0.04124944284558296 Batch_id=5 Accuracy=97.27:   1%|          | 3/469 [00:00<01:01,  7.57it/s][A
Loss=0.03265538811683655 Batch_id=6 Accuracy=97.54:   1%|          | 3/469 [00:00<01:01,  7.57it/s][A
Loss=0.11529511213302612 Batch_id=7 Accuracy=97.46:   1%|          | 3/469 [0


Test set: Average loss: 0.0659, Accuracy: 9789/10000 (97.89%)

EPOCH: 5



Loss=0.102828249335289 Batch_id=0 Accuracy=96.09:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.102828249335289 Batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:39,  4.68it/s][A
Loss=0.018935494124889374 Batch_id=1 Accuracy=98.05:   0%|          | 1/469 [00:00<01:39,  4.68it/s][A
Loss=0.050927937030792236 Batch_id=2 Accuracy=97.92:   0%|          | 1/469 [00:00<01:39,  4.68it/s][A
Loss=0.03970654681324959 Batch_id=3 Accuracy=98.05:   0%|          | 1/469 [00:00<01:39,  4.68it/s] [A
Loss=0.06469114869832993 Batch_id=4 Accuracy=97.81:   0%|          | 1/469 [00:00<01:39,  4.68it/s][A
Loss=0.04959012195467949 Batch_id=5 Accuracy=97.92:   0%|          | 1/469 [00:00<01:39,  4.68it/s][A
Loss=0.04959012195467949 Batch_id=5 Accuracy=97.92:   1%|▏         | 6/469 [00:00<01:12,  6.41it/s][A
Loss=0.011589940637350082 Batch_id=6 Accuracy=98.21:   1%|▏         | 6/469 [00:00<01:12,  6.41it/s][A
Loss=0.1427420973777771 Batch_id=7 Accuracy=97.85:   1%|▏         | 6/469 [00:00


Test set: Average loss: 0.0560, Accuracy: 9827/10000 (98.27%)

EPOCH: 6



Loss=0.13757720589637756 Batch_id=0 Accuracy=95.31:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.13757720589637756 Batch_id=0 Accuracy=95.31:   0%|          | 1/469 [00:00<01:49,  4.28it/s][A
Loss=0.06492996215820312 Batch_id=1 Accuracy=96.88:   0%|          | 1/469 [00:00<01:49,  4.28it/s][A
Loss=0.008328258991241455 Batch_id=2 Accuracy=97.92:   0%|          | 1/469 [00:00<01:49,  4.28it/s][A
Loss=0.03531373664736748 Batch_id=3 Accuracy=98.44:   0%|          | 1/469 [00:00<01:49,  4.28it/s] [A
Loss=0.041655249893665314 Batch_id=4 Accuracy=98.59:   0%|          | 1/469 [00:00<01:49,  4.28it/s][A
Loss=0.03379683569073677 Batch_id=5 Accuracy=98.70:   0%|          | 1/469 [00:00<01:49,  4.28it/s] [A
Loss=0.08658552169799805 Batch_id=6 Accuracy=98.55:   0%|          | 1/469 [00:00<01:49,  4.28it/s][A
Loss=0.07680564373731613 Batch_id=7 Accuracy=98.34:   0%|          | 1/469 [00:00<01:49,  4.28it/s][A
Loss=0.07680564373731613 Batch_id=7 Accuracy=98.34:   2%|▏         | 8/469 [


Test set: Average loss: 0.0559, Accuracy: 9803/10000 (98.03%)

EPOCH: 7



Loss=0.05721643939614296 Batch_id=0 Accuracy=98.44:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.05721643939614296 Batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:30,  5.19it/s][A
Loss=0.059566278010606766 Batch_id=1 Accuracy=98.44:   0%|          | 1/469 [00:00<01:30,  5.19it/s][A
Loss=0.04171762615442276 Batch_id=2 Accuracy=98.70:   0%|          | 1/469 [00:00<01:30,  5.19it/s] [A
Loss=0.01604946330189705 Batch_id=3 Accuracy=98.83:   0%|          | 1/469 [00:00<01:30,  5.19it/s][A
Loss=0.031332869082689285 Batch_id=4 Accuracy=98.91:   0%|          | 1/469 [00:00<01:30,  5.19it/s][A
Loss=0.06770755350589752 Batch_id=5 Accuracy=98.57:   0%|          | 1/469 [00:00<01:30,  5.19it/s] [A
Loss=0.09128998219966888 Batch_id=6 Accuracy=98.33:   0%|          | 1/469 [00:00<01:30,  5.19it/s][A
Loss=0.09128998219966888 Batch_id=6 Accuracy=98.33:   1%|▏         | 7/469 [00:00<01:04,  7.13it/s][A
Loss=0.05674198642373085 Batch_id=7 Accuracy=98.34:   1%|▏         | 7/469 [


Test set: Average loss: 0.0411, Accuracy: 9866/10000 (98.66%)

EPOCH: 8



Loss=0.024952005594968796 Batch_id=0 Accuracy=99.22:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.024952005594968796 Batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:35,  4.89it/s][A
Loss=0.018219593912363052 Batch_id=1 Accuracy=99.61:   0%|          | 1/469 [00:00<01:35,  4.89it/s][A
Loss=0.02014371007680893 Batch_id=2 Accuracy=99.48:   0%|          | 1/469 [00:00<01:35,  4.89it/s] [A
Loss=0.09014741331338882 Batch_id=3 Accuracy=99.02:   0%|          | 1/469 [00:00<01:35,  4.89it/s][A
Loss=0.026379894465208054 Batch_id=4 Accuracy=98.91:   0%|          | 1/469 [00:00<01:35,  4.89it/s][A
Loss=0.043675195425748825 Batch_id=5 Accuracy=98.83:   0%|          | 1/469 [00:00<01:35,  4.89it/s][A
Loss=0.10563286393880844 Batch_id=6 Accuracy=98.66:   0%|          | 1/469 [00:00<01:35,  4.89it/s] [A
Loss=0.10563286393880844 Batch_id=6 Accuracy=98.66:   1%|▏         | 7/469 [00:00<01:08,  6.74it/s][A
Loss=0.017159417271614075 Batch_id=7 Accuracy=98.73:   1%|▏         | 7/4


Test set: Average loss: 0.0413, Accuracy: 9871/10000 (98.71%)

EPOCH: 9



Loss=0.04893290251493454 Batch_id=0 Accuracy=98.44:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.04893290251493454 Batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:30,  5.15it/s][A
Loss=0.027132298797369003 Batch_id=1 Accuracy=98.83:   0%|          | 1/469 [00:00<01:30,  5.15it/s][A
Loss=0.029180623590946198 Batch_id=2 Accuracy=98.96:   0%|          | 1/469 [00:00<01:30,  5.15it/s][A
Loss=0.010658703744411469 Batch_id=3 Accuracy=99.22:   0%|          | 1/469 [00:00<01:30,  5.15it/s][A
Loss=0.021364066749811172 Batch_id=4 Accuracy=99.38:   0%|          | 1/469 [00:00<01:30,  5.15it/s][A
Loss=0.021364066749811172 Batch_id=4 Accuracy=99.38:   1%|          | 5/469 [00:00<01:06,  6.97it/s][A
Loss=0.0352032333612442 Batch_id=5 Accuracy=99.22:   1%|          | 5/469 [00:00<01:06,  6.97it/s]  [A
Loss=0.026267535984516144 Batch_id=6 Accuracy=99.11:   1%|          | 5/469 [00:00<01:06,  6.97it/s][A
Loss=0.0063598379492759705 Batch_id=7 Accuracy=99.22:   1%|          | 5/


Test set: Average loss: 0.0448, Accuracy: 9856/10000 (98.56%)

EPOCH: 10



Loss=0.01601279526948929 Batch_id=0 Accuracy=99.22:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.01601279526948929 Batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:19,  5.90it/s][A
Loss=0.0302603617310524 Batch_id=1 Accuracy=99.22:   0%|          | 1/469 [00:00<01:19,  5.90it/s] [A
Loss=0.015981368720531464 Batch_id=2 Accuracy=99.48:   0%|          | 1/469 [00:00<01:19,  5.90it/s][A
Loss=0.015981368720531464 Batch_id=2 Accuracy=99.48:   1%|          | 3/469 [00:00<01:03,  7.35it/s][A
Loss=0.07141809165477753 Batch_id=3 Accuracy=99.02:   1%|          | 3/469 [00:00<01:03,  7.35it/s] [A
Loss=0.0873730331659317 Batch_id=4 Accuracy=98.59:   1%|          | 3/469 [00:00<01:03,  7.35it/s] [A
Loss=0.0017649084329605103 Batch_id=5 Accuracy=98.83:   1%|          | 3/469 [00:00<01:03,  7.35it/s][A
Loss=0.04594055563211441 Batch_id=6 Accuracy=98.66:   1%|          | 3/469 [00:00<01:03,  7.35it/s]  [A
Loss=0.003451809287071228 Batch_id=7 Accuracy=98.83:   1%|          | 3/4


Test set: Average loss: 0.0363, Accuracy: 9877/10000 (98.77%)

EPOCH: 11



Loss=0.015168983489274979 Batch_id=0 Accuracy=100.00:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.015168983489274979 Batch_id=0 Accuracy=100.00:   0%|          | 1/469 [00:00<01:36,  4.83it/s][A
Loss=0.08076989650726318 Batch_id=1 Accuracy=98.83:   0%|          | 1/469 [00:00<01:36,  4.83it/s]  [A
Loss=0.03282416984438896 Batch_id=2 Accuracy=98.70:   0%|          | 1/469 [00:00<01:36,  4.83it/s][A
Loss=0.020934604108333588 Batch_id=3 Accuracy=98.83:   0%|          | 1/469 [00:00<01:36,  4.83it/s][A
Loss=0.005781400948762894 Batch_id=4 Accuracy=99.06:   0%|          | 1/469 [00:00<01:36,  4.83it/s][A
Loss=0.00718432292342186 Batch_id=5 Accuracy=99.22:   0%|          | 1/469 [00:00<01:36,  4.83it/s] [A
Loss=0.00718432292342186 Batch_id=5 Accuracy=99.22:   1%|▏         | 6/469 [00:00<01:10,  6.60it/s][A
Loss=0.03112085908651352 Batch_id=6 Accuracy=99.22:   1%|▏         | 6/469 [00:00<01:10,  6.60it/s][A
Loss=0.008790761232376099 Batch_id=7 Accuracy=99.32:   1%|▏         | 6


Test set: Average loss: 0.0471, Accuracy: 9853/10000 (98.53%)

EPOCH: 12



Loss=0.021938923746347427 Batch_id=0 Accuracy=99.22:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.021938923746347427 Batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:34,  4.93it/s][A
Loss=0.01818368211388588 Batch_id=1 Accuracy=99.22:   0%|          | 1/469 [00:00<01:34,  4.93it/s] [A
Loss=0.03133011236786842 Batch_id=2 Accuracy=98.96:   0%|          | 1/469 [00:00<01:34,  4.93it/s][A
Loss=0.007703520357608795 Batch_id=3 Accuracy=99.22:   0%|          | 1/469 [00:00<01:34,  4.93it/s][A
Loss=0.04859821870923042 Batch_id=4 Accuracy=99.06:   0%|          | 1/469 [00:00<01:34,  4.93it/s] [A
Loss=0.05785708501935005 Batch_id=5 Accuracy=98.83:   0%|          | 1/469 [00:00<01:34,  4.93it/s][A
Loss=0.05785708501935005 Batch_id=5 Accuracy=98.83:   1%|▏         | 6/469 [00:00<01:08,  6.76it/s][A
Loss=0.07396037876605988 Batch_id=6 Accuracy=98.66:   1%|▏         | 6/469 [00:00<01:08,  6.76it/s][A
Loss=0.024106465280056 Batch_id=7 Accuracy=98.73:   1%|▏         | 6/469 [0


Test set: Average loss: 0.0374, Accuracy: 9869/10000 (98.69%)

EPOCH: 13



Loss=0.013070318847894669 Batch_id=0 Accuracy=100.00:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.013070318847894669 Batch_id=0 Accuracy=100.00:   0%|          | 1/469 [00:00<01:30,  5.19it/s][A
Loss=0.012456268072128296 Batch_id=1 Accuracy=99.61:   0%|          | 1/469 [00:00<01:30,  5.19it/s] [A
Loss=0.029965784400701523 Batch_id=2 Accuracy=98.96:   0%|          | 1/469 [00:00<01:30,  5.19it/s][A
Loss=0.03075578808784485 Batch_id=3 Accuracy=98.83:   0%|          | 1/469 [00:00<01:30,  5.19it/s] [A
Loss=0.050547026097774506 Batch_id=4 Accuracy=98.91:   0%|          | 1/469 [00:00<01:30,  5.19it/s][A
Loss=0.03348994255065918 Batch_id=5 Accuracy=98.83:   0%|          | 1/469 [00:00<01:30,  5.19it/s] [A
Loss=0.021883301436901093 Batch_id=6 Accuracy=98.88:   0%|          | 1/469 [00:00<01:30,  5.19it/s][A
Loss=0.021883301436901093 Batch_id=6 Accuracy=98.88:   1%|▏         | 7/469 [00:00<01:04,  7.12it/s][A
Loss=0.05008694529533386 Batch_id=7 Accuracy=98.83:   1%|▏         |


Test set: Average loss: 0.0336, Accuracy: 9881/10000 (98.81%)

EPOCH: 14



Loss=0.015659045428037643 Batch_id=0 Accuracy=100.00:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.015659045428037643 Batch_id=0 Accuracy=100.00:   0%|          | 1/469 [00:00<01:51,  4.21it/s][A
Loss=0.057638827711343765 Batch_id=1 Accuracy=99.22:   0%|          | 1/469 [00:00<01:51,  4.21it/s] [A
Loss=0.045028459280729294 Batch_id=2 Accuracy=99.22:   0%|          | 1/469 [00:00<01:51,  4.21it/s][A
Loss=0.005144640803337097 Batch_id=3 Accuracy=99.41:   0%|          | 1/469 [00:00<01:51,  4.21it/s][A
Loss=0.014458883553743362 Batch_id=4 Accuracy=99.53:   0%|          | 1/469 [00:00<01:51,  4.21it/s][A
Loss=0.006225027143955231 Batch_id=5 Accuracy=99.61:   0%|          | 1/469 [00:00<01:51,  4.21it/s][A
Loss=0.006225027143955231 Batch_id=5 Accuracy=99.61:   1%|▏         | 6/469 [00:00<01:19,  5.79it/s][A
Loss=0.017695866525173187 Batch_id=6 Accuracy=99.55:   1%|▏         | 6/469 [00:00<01:19,  5.79it/s][A
Loss=0.010129690170288086 Batch_id=7 Accuracy=99.61:   1%|▏         


Test set: Average loss: 0.0401, Accuracy: 9879/10000 (98.79%)

EPOCH: 15



Loss=0.006335049867630005 Batch_id=0 Accuracy=100.00:   0%|          | 0/469 [00:00<?, ?it/s][A
Loss=0.006335049867630005 Batch_id=0 Accuracy=100.00:   0%|          | 1/469 [00:00<01:43,  4.54it/s][A
Loss=0.015406109392642975 Batch_id=1 Accuracy=99.61:   0%|          | 1/469 [00:00<01:43,  4.54it/s] [A
Loss=0.046671491116285324 Batch_id=2 Accuracy=99.48:   0%|          | 1/469 [00:00<01:43,  4.54it/s][A
Loss=0.026129480451345444 Batch_id=3 Accuracy=99.61:   0%|          | 1/469 [00:00<01:43,  4.54it/s][A
Loss=0.004308417439460754 Batch_id=4 Accuracy=99.69:   0%|          | 1/469 [00:00<01:43,  4.54it/s][A
Loss=0.047538936138153076 Batch_id=5 Accuracy=99.61:   0%|          | 1/469 [00:00<01:43,  4.54it/s][A
Loss=0.047538936138153076 Batch_id=5 Accuracy=99.61:   1%|▏         | 6/469 [00:00<01:14,  6.24it/s][A
Loss=0.009505793452262878 Batch_id=6 Accuracy=99.67:   1%|▏         | 6/469 [00:00<01:14,  6.24it/s][A
Loss=0.03277595341205597 Batch_id=7 Accuracy=99.41:   1%|▏         |


Test set: Average loss: 0.0408, Accuracy: 9882/10000 (98.82%)



In [1]:
fig, axs = plt.subplots(2,2,figsize=(15,10))
axs[0, 0].plot(train_losses)
axs[0, 0].set_title("Training Loss")
axs[1, 0].plot(train_acc)
axs[1, 0].set_title("Training Accuracy")
axs[0, 1].plot(test_losses)
axs[0, 1].set_title("Test Loss")
axs[1, 1].plot(test_acc)
axs[1, 1].set_title("Test Accuracy")

NameError: ignored