<a href="https://colab.research.google.com/github/peeyushsinghal/ERA/blob/main/S7/ERA_S7_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms



In [2]:
!pip install -U albumentations

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:


import albumentations as A
import numpy as np
from albumentations.pytorch.transforms import ToTensorV2

def apply_transforms(mean,std_dev):
    train_transforms = A.Compose([
        A.ElasticTransform(p=0.1, alpha=120, sigma=12),
        A.Rotate(limit=8, p=0.99),
        # A.PixelDropout(dropout_prob=0.1, drop_value=1.0, p=0.1),
        A.GaussNoise(p=0.2),
        # A.HorizontalFlip(p=0.2),
        # A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, p=0.2),
        # A.CoarseDropout(
        #     max_holes=1, max_height=16, max_width=16, min_holes=1, min_height=16, min_width=16, fill_value=tuple((x * 255.0 for x in mean)), p=0.2,
        # ),
        # A.ToGray(p=0.15),
        A.Normalize(mean=mean, std=std_dev, always_apply=True),
        ToTensorV2(),
    ])

    test_transforms = A.Compose([
        A.Normalize(mean=mean, std=std_dev, always_apply=True),
        ToTensorV2(),
    ])

    return lambda img: train_transforms(image=np.array(img))["image"], lambda img: test_transforms(image=np.array(img))["image"]


In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        dropout_value = 0.1
        # Attempt S7 2---------------Further reduce capacity and see impact, remove capacity from early layers
        # Conv Block 1
        self.convblock1 = nn.Sequential(
            nn.Conv2d(1, 8, 5, stride= 1, padding=1, bias = False), #input -28 OUtput-26 RF 5
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(dropout_value),
            nn.Conv2d(8, 16, 3, stride= 1, padding=1, bias = False), #input -26 OUtput-26 RF 7, Jump_in -1, Jump_out -1
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        )

        # Transition Block 1
        # STRIDED CONVOLUTION LAYER
        self.transitionblock1 = nn.Sequential(
            nn.Conv2d(16,8,3,stride=2,padding=1, bias = False), # In- 26, Out- 13, RF-9 , Jump_in -1, Jump_out -2
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(dropout_value)
        )

        # Conv Block 2
        self.convblock2 = nn.Sequential(
            nn.Conv2d(8, 8, 3, stride= 1, padding=1, bias = False), #input -13  OUtput-13  RF 13, Jump_in -2, Jump_out -2
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(dropout_value),
            nn.Conv2d(8, 16, 3, stride= 1, padding=1, bias = False), #input -13  OUtput-13  RF 17, Jump_in -2, Jump_out -2
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        )

        # Transition Block 2
        # STRIDED CONVOLUTION LAYER
        self.transitionblock2 = nn.Sequential(
            nn.Conv2d(16,16,3,stride=2,padding=1, bias = False), # In- 13, Out-7, RF- 21 , Jump_in - 2, Jump_out - 4
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        )

        # Conv Block 3
        self.convblock3 = nn.Sequential(
            nn.Conv2d(16, 10, 3, stride= 1, bias = False), #input -7  OUtput-5  RF - 29, Jump_in -4, Jump_out -4
        )
        self.gap = nn.AvgPool2d(5) # input - 10x5x5 Output -10x1x1


        # # Attempt S7 1---------------Reduce the channel number throughout : Reduce capacity of model by reducing the number of channels
        # # Conv Block 1
        # self.convblock1 = nn.Sequential(
        #     nn.Conv2d(1, 16, 5, stride= 1, padding=1), #input -28 OUtput-26 RF 5
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value),
        #     nn.Conv2d(16, 16, 3, stride= 1, padding=1), #input -26 OUtput-26 RF 7, Jump_in -1, Jump_out -1
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Transition Block 1
        # # STRIDED CONVOLUTION LAYER
        # self.transitionblock1 = nn.Sequential(
        #     nn.Conv2d(16,16,3,stride=2,padding=1), # In- 26, Out- 13, RF-9 , Jump_in -1, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Conv Block 2
        # self.convblock2 = nn.Sequential(
        #     nn.Conv2d(16, 16, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 13, Jump_in -2, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value),
        #     nn.Conv2d(16, 16, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 17, Jump_in -2, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Transition Block 2
        # # STRIDED CONVOLUTION LAYER
        # self.transitionblock2 = nn.Sequential(
        #     nn.Conv2d(16,16,3,stride=2,padding=1), # In- 13, Out-7, RF- 21 , Jump_in - 2, Jump_out - 4
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Conv Block 3
        # self.convblock3 = nn.Sequential(
        #     nn.Conv2d(16, 10, 3, stride= 1), #input -7  OUtput-5  RF - 29, Jump_in -4, Jump_out -4
        # )
        # self.gap = nn.AvgPool2d(5) # input - 10x5x5 Output -10x1x1
        # # Attempt 2 ---------------
        # # Conv Block 1
        # self.convblock1 = nn.Sequential(
        #     nn.Conv2d(1, 32, 5, stride= 1, padding=1), #input -28 OUtput-26 RF 5
        #     nn.ReLU(),
        #     nn.BatchNorm2d(32),
        #     nn.Dropout(dropout_value),
        #     nn.Conv2d(32, 16, 3, stride= 1, padding=1), #input -26 OUtput-26 RF 7, Jump_in -1, Jump_out -1
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Transition Block 1
        # # STRIDED CONVOLUTION LAYER
        # self.transitionblock1 = nn.Sequential(
        #     nn.Conv2d(16,16,3,stride=2,padding=1), # In- 26, Out- 13, RF-9 , Jump_in -1, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Conv Block 2
        # self.convblock2 = nn.Sequential(
        #     nn.Conv2d(16, 24, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 13, Jump_in -2, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(24),
        #     nn.Dropout(dropout_value),
        #     nn.Conv2d(24, 16, 3, stride= 1, padding=1), #input -13  OUtput-13  RF 17, Jump_in -2, Jump_out -2
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Transition Block 2
        # # STRIDED CONVOLUTION LAYER
        # self.transitionblock2 = nn.Sequential(
        #     nn.Conv2d(16,16,3,stride=2,padding=1), # In- 13, Out-7, RF- 21 , Jump_in - 2, Jump_out - 4
        #     nn.ReLU(),
        #     nn.BatchNorm2d(16),
        #     nn.Dropout(dropout_value)
        # )

        # # Conv Block 3
        # self.convblock3 = nn.Sequential(
        #     nn.Conv2d(16, 10, 3, stride= 1), #input -7  OUtput-5  RF - 29, Jump_in -4, Jump_out -4
        # )
        # self.gap = nn.AvgPool2d(5) # input - 10x5x5 Output -10x1x1


        # Attempt 1-----------------------------
        # self.conv1 = nn.Conv2d(1, 32, 5, stride= 1, padding=1) #input -28 OUtput-26 RF 5
        # self.bn1 = nn.BatchNorm2d(32)
        # self.conv2 = nn.Conv2d(32, 16, 3, stride= 1, padding=1)#input -26 OUtput-26 RF 7
        # self.bn2 = nn.BatchNorm2d(16)

        # self.pool1 = nn.MaxPool2d(2, 2) #input -26 OUtput-13 RF 14
        # self.dropout1= nn.Dropout(0.1)

        # self.conv3 = nn.Conv2d(16, 32, 3, stride= 1, padding=1) #input -13 OUtput-13 RF 16
        # self.bn3 = nn.BatchNorm2d(32)
        # self.conv4 = nn.Conv2d( 32, 32, 3, stride= 1, padding=1)#input -13 OUtput-13 RF 18
        # self.bn4 = nn.BatchNorm2d(32)

        # self.pool2 = nn.MaxPool2d(2, 2) #input -13 OUtput-6 RF 36
        # self.dropout2 = nn.Dropout(0.1)

        # self.conv5 = nn.Conv2d(32, 10, 1, stride= 1) # use of 1x1 - input 6 output 6 RF 36
        # self.gap = nn.AvgPool2d(6) # input - 10x5x5 Output -10x1x1


        # Base attempt-----------------
        # self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input - 28 Output - 28  RF - 3, Jin -1 , Jout -1
        # self.conv2 = nn.Conv2d(32, 64, 3, padding=1) #input - 28 Output - 28  RF - 5, Jin -1 , Jout -1
        # self.pool1 = nn.MaxPool2d(2, 2) #input - 28 Output - 14, RF - 6, Jin -1 , Jout -2
        # self.conv3 = nn.Conv2d(64, 128, 3, padding=1) #input - 14 Output - 14, RF - 10, Jin - 2 , Jout -2
        # self.conv4 = nn.Conv2d(128, 256, 3, padding=1)  #input - 14 Output - 14, RF - 14, Jin - 2 , Jout -2
        # self.pool2 = nn.MaxPool2d(2, 2) #input - 14 Output - 7, RF - 16, Jin - 2 , Jout - 4
        # self.conv5 = nn.Conv2d(256, 512, 3) #input - 7 Output - 5, RF - 24, Jin - 4 , Jout - 4
        # self.conv6 = nn.Conv2d(512, 1024, 3) #input - 5 Output - 3, RF - 32, Jin - 4 , Jout - 4
        # self.conv7 = nn.Conv2d(1024, 10, 3) #input - 3 Output - 2, RF - 40, Jin - 4 , Jout - 4




    def forward(self, x):

      # Attempt 2 ---------------
      x = self.convblock1(x)
      x = self.transitionblock1(x)
      x = self.convblock2(x)
      x = self.transitionblock2(x)
      x = self.convblock3(x)
      x = self.gap(x)
      x = x.view(-1, 10)
      return F.log_softmax(x, dim = -1)

        # Attempt 1-------------------
        # x = F.relu(self.bn1(self.conv1(x)))
        # x = F.relu(self.bn2(self.conv2(x)))
        # x = self.dropout1(self.pool1(x))

        # x = F.relu(self.bn3(self.conv3(x)))
        # # print("after. bn 3", x.shape)
        # x = F.relu(self.bn4(self.conv4(x)))
        # # print("after. bn 4", x.shape)
        # x = self.dropout2(self.pool2(x))
        # # print(x.shape)

        # x = self.conv5(x)
        # # print(x.shape)
        # x = self.gap(x)
        # x = x.view(-1, 10)
        # return F.log_softmax(x)

        # base attempt------------------
        # x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        # x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        # x = F.relu(self.conv6(F.relu(self.conv5(x))))
        # x = F.relu(self.conv7(x))
        # x = x.view(-1, 10)
        # return F.log_softmax(x)

In [5]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
model

Net(
  (convblock1): Sequential(
    (0): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.1, inplace=False)
    (4): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (5): ReLU()
    (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Dropout(p=0.1, inplace=False)
  )
  (transitionblock1): Sequential(
    (0): Conv2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.1, inplace=False)
  )
  (convblock2): Sequential(
    (0): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.

In [6]:


torch.manual_seed(1)
batch_size = 128

mean, std_dev = (0.1307,), (0.3081,)
train_transforms, test_transforms = apply_transforms(mean,std_dev)

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}



train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=train_transforms),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=test_transforms),
    batch_size=batch_size, shuffle=True, **kwargs)


In [7]:
next(iter(test_loader))[0].shape

torch.Size([128, 1, 28, 28])

In [8]:
from tqdm import tqdm

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))
    return (correct / len(test_loader.dataset))

In [9]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]             200
              ReLU-2            [-1, 8, 26, 26]               0
       BatchNorm2d-3            [-1, 8, 26, 26]              16
           Dropout-4            [-1, 8, 26, 26]               0
            Conv2d-5           [-1, 16, 26, 26]           1,152
              ReLU-6           [-1, 16, 26, 26]               0
       BatchNorm2d-7           [-1, 16, 26, 26]              32
           Dropout-8           [-1, 16, 26, 26]               0
            Conv2d-9            [-1, 8, 13, 13]           1,152
             ReLU-10            [-1, 8, 13, 13]               0
      BatchNorm2d-11            [-1, 8, 13, 13]              16
          Dropout-12            [-1, 8, 13, 13]               0
    

In [13]:

train_losses = []
test_losses = []
train_acc = []
test_acc = []

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                 mode = 'max',
                                                 factor = 0.2,
                                                 patience = 1,
                                                 threshold = 0.005,
                                                 threshold_mode = 'abs',
                                                 verbose = True)
EPOCHS = 15
for epoch in range(1, EPOCHS+1):
  print("EPOCH: ", epoch)
  train(model, device, train_loader, optimizer, epoch)
  test_accuracy_epoch = test(model, device, test_loader)
  scheduler.step(test_accuracy_epoch)
  current_lr = optimizer.param_groups[0]['lr']
  print(f'test_accuracy_epoch = {test_accuracy_epoch}, current learning rate = {current_lr}' )

EPOCH:  1


Loss=0.36273857951164246 Batch_id=468 Accuracy=82.11: 100%|██████████| 469/469 [00:38<00:00, 12.14it/s]



Test set: Average loss: 0.0899, Accuracy: 9694/10000 (96.94%)

test_accuracy_epoch = 0.9694, current learning rate = 0.1
EPOCH:  2


Loss=0.3331053555011749 Batch_id=468 Accuracy=87.88: 100%|██████████| 469/469 [00:34<00:00, 13.52it/s]



Test set: Average loss: 0.0780, Accuracy: 9753/10000 (97.53%)

test_accuracy_epoch = 0.9753, current learning rate = 0.1
EPOCH:  3


Loss=0.33105167746543884 Batch_id=468 Accuracy=88.46: 100%|██████████| 469/469 [00:35<00:00, 13.38it/s]



Test set: Average loss: 0.0552, Accuracy: 9821/10000 (98.21%)

test_accuracy_epoch = 0.9821, current learning rate = 0.1
EPOCH:  4


Loss=0.26790541410446167 Batch_id=468 Accuracy=88.93: 100%|██████████| 469/469 [00:34<00:00, 13.52it/s]



Test set: Average loss: 0.0463, Accuracy: 9851/10000 (98.51%)

test_accuracy_epoch = 0.9851, current learning rate = 0.1
EPOCH:  5


Loss=0.33378925919532776 Batch_id=468 Accuracy=89.15: 100%|██████████| 469/469 [00:34<00:00, 13.45it/s]



Test set: Average loss: 0.0375, Accuracy: 9869/10000 (98.69%)

Epoch 00005: reducing learning rate of group 0 to 2.0000e-02.
test_accuracy_epoch = 0.9869, current learning rate = 0.020000000000000004
EPOCH:  6


Loss=0.1837727427482605 Batch_id=468 Accuracy=89.78: 100%|██████████| 469/469 [00:35<00:00, 13.23it/s]



Test set: Average loss: 0.0277, Accuracy: 9908/10000 (99.08%)

test_accuracy_epoch = 0.9908, current learning rate = 0.020000000000000004
EPOCH:  7


Loss=0.42365702986717224 Batch_id=468 Accuracy=89.79: 100%|██████████| 469/469 [00:36<00:00, 12.86it/s]



Test set: Average loss: 0.0247, Accuracy: 9919/10000 (99.19%)

test_accuracy_epoch = 0.9919, current learning rate = 0.020000000000000004
EPOCH:  8


Loss=0.15364612638950348 Batch_id=468 Accuracy=90.03: 100%|██████████| 469/469 [00:34<00:00, 13.46it/s]



Test set: Average loss: 0.0250, Accuracy: 9918/10000 (99.18%)

Epoch 00008: reducing learning rate of group 0 to 4.0000e-03.
test_accuracy_epoch = 0.9918, current learning rate = 0.004000000000000001
EPOCH:  9


Loss=0.24232561886310577 Batch_id=468 Accuracy=90.18: 100%|██████████| 469/469 [00:34<00:00, 13.44it/s]



Test set: Average loss: 0.0231, Accuracy: 9913/10000 (99.13%)

test_accuracy_epoch = 0.9913, current learning rate = 0.004000000000000001
EPOCH:  10


Loss=0.3703641891479492 Batch_id=468 Accuracy=90.06: 100%|██████████| 469/469 [00:35<00:00, 13.30it/s]



Test set: Average loss: 0.0242, Accuracy: 9918/10000 (99.18%)

Epoch 00010: reducing learning rate of group 0 to 8.0000e-04.
test_accuracy_epoch = 0.9918, current learning rate = 0.0008000000000000003
EPOCH:  11


Loss=0.22709111869335175 Batch_id=468 Accuracy=89.94: 100%|██████████| 469/469 [00:35<00:00, 13.33it/s]



Test set: Average loss: 0.0230, Accuracy: 9917/10000 (99.17%)

test_accuracy_epoch = 0.9917, current learning rate = 0.0008000000000000003
EPOCH:  12


Loss=0.2920962870121002 Batch_id=468 Accuracy=90.12: 100%|██████████| 469/469 [00:35<00:00, 13.16it/s]



Test set: Average loss: 0.0231, Accuracy: 9922/10000 (99.22%)

Epoch 00012: reducing learning rate of group 0 to 1.6000e-04.
test_accuracy_epoch = 0.9922, current learning rate = 0.00016000000000000007
EPOCH:  13


Loss=0.23366539180278778 Batch_id=468 Accuracy=90.09: 100%|██████████| 469/469 [00:36<00:00, 13.02it/s]



Test set: Average loss: 0.0230, Accuracy: 9921/10000 (99.21%)

test_accuracy_epoch = 0.9921, current learning rate = 0.00016000000000000007
EPOCH:  14


Loss=0.2610045075416565 Batch_id=468 Accuracy=89.92: 100%|██████████| 469/469 [00:35<00:00, 13.16it/s]



Test set: Average loss: 0.0236, Accuracy: 9919/10000 (99.19%)

Epoch 00014: reducing learning rate of group 0 to 3.2000e-05.
test_accuracy_epoch = 0.9919, current learning rate = 3.200000000000001e-05
EPOCH:  15


Loss=0.27751871943473816 Batch_id=468 Accuracy=90.11: 100%|██████████| 469/469 [00:35<00:00, 13.30it/s]



Test set: Average loss: 0.0230, Accuracy: 9923/10000 (99.23%)

test_accuracy_epoch = 0.9923, current learning rate = 3.200000000000001e-05
