<a href="https://colab.research.google.com/github/satyasundar/erav3-s8/blob/main/CIFAR10_model_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from prettytable import PrettyTable

import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision.datasets import CIFAR10
import numpy as np
from torch.utils.data import DataLoader

In [4]:


mean = [0.4914, 0.4822, 0.4465]
std = [0.2470, 0.2435, 0.2616]

train_transforms = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, p=0.1),
    #A.RandomBrightnessContrast(p=0.2),
    A.CoarseDropout(max_holes = 1, max_height=16, max_width=16, min_holes = 1, min_height=16, min_width=16, fill_value=(0.4914, 0.4822, 0.4465), mask_fill_value = None, p=0.1),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ToTensorV2()
])

test_transforms = A.Compose([
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ToTensorV2(),
])



class AlbumentationDataset(CIFAR10):
    def __init__(self, root="./data", train=True, download=True, transform=None):
        super().__init__(root=root, train=train, download=download, transform=transform)

    def __getitem__(self, index):
        image, label = self.data[index], self.targets[index]

        if self.transform is not None:
            transformed = self.transform(image=image)
            image = transformed["image"]

        return image, label

train = AlbumentationDataset(root='./data', train=True, download=True, transform=train_transforms)
test = AlbumentationDataset(root='./data', train=False, download=True, transform=test_transforms)

# Create DataLoader
# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
SEED = 2

# CUDA?
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
cuda = torch.cuda.is_available()
print("GPU Available?", device)

# For reproducibility
torch.manual_seed(SEED)

if device == "cuda":
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=128)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

# Pretty table for collecting all the accuracy and loss parameters in a table
log_table = PrettyTable()

GPU Available? cuda




In [6]:
dropout_value = 0.1
# CNN Model
class CIFAR10Model(nn.Module):
    def __init__(self):
        super(CIFAR10Model, self).__init__()

        self.convblock1 = nn.Sequential(
            #input size : 3x32x32
            # Block - 1, Layer - 1
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3), stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(), # ouptput: 32, RF: 3
            nn.Dropout(dropout_value),

             # #Layer - 3
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(), # output: 32, RF: 7
            nn.Dropout(dropout_value),

            # Layer 2 - depthwise separable convolution
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), stride=1, padding=1, groups=32),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(1, 1), stride=1, padding=0),
            nn.BatchNorm2d(64),
            nn.ReLU(), #output: 32, RF: 5


            # Stride-2 Convolution - downsampling
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=2, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(), # output: 16, RF: 9
            nn.Dropout(dropout_value),
        )

        self.transition1 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=16, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU() #output : 16, RF:9
        )
        self.convblock2 = nn.Sequential(

            #Block - 2, Layer - 1
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(), #output: 16, RF: 13
            nn.Dropout(dropout_value),

            # Block - 2, Layer - 2
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(), #output: 16, RF: 17
            nn.Dropout(dropout_value),

            # stride = 2 convolution downsampling
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=2, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(), #output: 8, RF: 21
            nn.Dropout(dropout_value),
        )

        self.transition2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=16, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU() #output:8, RF:21
        )

        self.convblock3 = nn.Sequential(

            #Block - 3, Layer - 1

            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(), #output: 8, RF: 29
            nn.Dropout(dropout_value),

            # Block - 3, Layer - 2
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(), #output: 8, RF: 37
            nn.Dropout(dropout_value),

             # Block - 3, Layer - 2 downsample here
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=2, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(), #output: 8, RF: 37
            nn.Dropout(dropout_value),

        )

        self.transition3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=16, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )

        self.convblock4 = nn.Sequential(

            #Block - 4, Layer Dilated Convolution
            nn.Conv2d(in_channels=16, out_channels=64, kernel_size=(3, 3), stride=1, padding=2, dilation=2, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(), #output: 4, RF: 59
        )

        self.outputblock = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            #nn.AvgPool2d(kernel_size=4),
            nn.Conv2d(in_channels=64, out_channels=10, kernel_size=(1, 1), padding=0, bias=False)
        )



    def forward(self, x):
        x = self.convblock1(x)
        x = self.transition1(x)
        x = self.convblock2(x)
        x = self.transition2(x)
        x = self.convblock3(x)
        x = self.transition3(x)
        x = self.convblock4(x)
        x = self.outputblock(x)
        x = x.view(-1, 10)
        return x


In [7]:
from torchsummary import summary
use_cuda = torch.cuda.is_available()
cuda = torch.device("cuda" if use_cuda else "cpu")
print(cuda)
model = CIFAR10Model().to(cuda)
summary(model, input_size=(3, 32, 32))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             432
       BatchNorm2d-2           [-1, 16, 32, 32]              32
              ReLU-3           [-1, 16, 32, 32]               0
           Dropout-4           [-1, 16, 32, 32]               0
            Conv2d-5           [-1, 32, 32, 32]           4,608
       BatchNorm2d-6           [-1, 32, 32, 32]              64
              ReLU-7           [-1, 32, 32, 32]               0
           Dropout-8           [-1, 32, 32, 32]               0
            Conv2d-9           [-1, 32, 32, 32]             320
           Conv2d-10           [-1, 64, 32, 32]           2,112
      BatchNorm2d-11           [-1, 64, 32, 32]             128
             ReLU-12           [-1, 64, 32, 32]               0
           Conv2d-13           [-1, 64, 16, 16]          36,864
      BatchNorm2d-14           [-1

In [8]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    criterion = nn.CrossEntropyLoss()
    loss = criterion(y_pred, target)
    #loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    criterion = nn.CrossEntropyLoss(reduction='sum')

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            #test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))

In [9]:
from torch.optim.lr_scheduler import StepLR

print("model running on: ", device)
log_table = PrettyTable()
log_table.field_names = ["Epoch", "Training Accuracy", "Test Accuracy", "Diff", "Training Loss", "Test Loss"]

model =  CIFAR10Model().to(device)
#optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.003)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)

EPOCHS = 50
for epoch in range(EPOCHS):
    print("EPOCH:", epoch+1)
    train(model, device, train_loader, optimizer, epoch)
    #scheduler.step()
    test(model, device, test_loader)
    log_table.add_row([epoch+1, f"{train_acc[-1]:.2f}%", f"{test_acc[-1]:.2f}%", f"{float(train_acc[-1]) - float(test_acc[-1]):.2f}" ,f"{train_losses[-1]:.4f}", f"{test_losses[-1]:.4f}"])
print(log_table)

model running on:  cuda
EPOCH: 1


Loss=1.260455846786499 Batch_id=390 Accuracy=41.37: 100%|██████████| 391/391 [00:16<00:00, 23.52it/s]



Test set: Average loss: 1.6352, Accuracy: 4560/10000 (45.60%)

EPOCH: 2


Loss=1.2167580127716064 Batch_id=390 Accuracy=55.96: 100%|██████████| 391/391 [00:14<00:00, 26.09it/s]



Test set: Average loss: 1.0665, Accuracy: 6169/10000 (61.69%)

EPOCH: 3


Loss=1.2578181028366089 Batch_id=390 Accuracy=61.50: 100%|██████████| 391/391 [00:16<00:00, 24.11it/s]



Test set: Average loss: 1.0071, Accuracy: 6464/10000 (64.64%)

EPOCH: 4


Loss=0.8524954915046692 Batch_id=390 Accuracy=64.97: 100%|██████████| 391/391 [00:15<00:00, 25.73it/s]



Test set: Average loss: 1.0250, Accuracy: 6453/10000 (64.53%)

EPOCH: 5


Loss=0.8324249982833862 Batch_id=390 Accuracy=67.39: 100%|██████████| 391/391 [00:14<00:00, 26.16it/s]



Test set: Average loss: 0.9250, Accuracy: 6725/10000 (67.25%)

EPOCH: 6


Loss=0.9456140398979187 Batch_id=390 Accuracy=69.79: 100%|██████████| 391/391 [00:15<00:00, 25.55it/s]



Test set: Average loss: 0.7415, Accuracy: 7384/10000 (73.84%)

EPOCH: 7


Loss=0.9349117279052734 Batch_id=390 Accuracy=71.49: 100%|██████████| 391/391 [00:14<00:00, 26.09it/s]



Test set: Average loss: 0.8566, Accuracy: 7000/10000 (70.00%)

EPOCH: 8


Loss=0.9847301244735718 Batch_id=390 Accuracy=72.49: 100%|██████████| 391/391 [00:15<00:00, 25.93it/s]



Test set: Average loss: 0.7214, Accuracy: 7488/10000 (74.88%)

EPOCH: 9


Loss=0.6765730977058411 Batch_id=390 Accuracy=74.05: 100%|██████████| 391/391 [00:15<00:00, 25.83it/s]



Test set: Average loss: 0.6638, Accuracy: 7685/10000 (76.85%)

EPOCH: 10


Loss=0.6368850469589233 Batch_id=390 Accuracy=74.81: 100%|██████████| 391/391 [00:14<00:00, 26.26it/s]



Test set: Average loss: 0.6398, Accuracy: 7771/10000 (77.71%)

EPOCH: 11


Loss=0.545637309551239 Batch_id=390 Accuracy=75.50: 100%|██████████| 391/391 [00:16<00:00, 23.33it/s]



Test set: Average loss: 0.6251, Accuracy: 7819/10000 (78.19%)

EPOCH: 12


Loss=0.7309141755104065 Batch_id=390 Accuracy=76.53: 100%|██████████| 391/391 [00:14<00:00, 26.19it/s]



Test set: Average loss: 0.6467, Accuracy: 7789/10000 (77.89%)

EPOCH: 13


Loss=0.5841480493545532 Batch_id=390 Accuracy=77.06: 100%|██████████| 391/391 [00:15<00:00, 25.95it/s]



Test set: Average loss: 0.6176, Accuracy: 7876/10000 (78.76%)

EPOCH: 14


Loss=0.610268235206604 Batch_id=390 Accuracy=77.70: 100%|██████████| 391/391 [00:14<00:00, 26.15it/s]



Test set: Average loss: 0.6018, Accuracy: 7909/10000 (79.09%)

EPOCH: 15


Loss=0.6814438104629517 Batch_id=390 Accuracy=78.17: 100%|██████████| 391/391 [00:14<00:00, 26.10it/s]



Test set: Average loss: 0.5509, Accuracy: 8113/10000 (81.13%)

EPOCH: 16


Loss=0.6868442296981812 Batch_id=390 Accuracy=78.39: 100%|██████████| 391/391 [00:14<00:00, 26.22it/s]



Test set: Average loss: 0.5833, Accuracy: 8035/10000 (80.35%)

EPOCH: 17


Loss=0.7898532152175903 Batch_id=390 Accuracy=79.08: 100%|██████████| 391/391 [00:14<00:00, 26.21it/s]



Test set: Average loss: 0.5843, Accuracy: 8040/10000 (80.40%)

EPOCH: 18


Loss=0.5668607354164124 Batch_id=390 Accuracy=79.05: 100%|██████████| 391/391 [00:14<00:00, 26.41it/s]



Test set: Average loss: 0.5589, Accuracy: 8131/10000 (81.31%)

EPOCH: 19


Loss=0.48928794264793396 Batch_id=390 Accuracy=79.66: 100%|██████████| 391/391 [00:15<00:00, 26.03it/s]



Test set: Average loss: 0.5395, Accuracy: 8188/10000 (81.88%)

EPOCH: 20


Loss=0.5538708567619324 Batch_id=390 Accuracy=80.09: 100%|██████████| 391/391 [00:14<00:00, 26.19it/s]



Test set: Average loss: 0.5978, Accuracy: 7986/10000 (79.86%)

EPOCH: 21


Loss=0.6138526201248169 Batch_id=390 Accuracy=80.39: 100%|██████████| 391/391 [00:14<00:00, 26.19it/s]



Test set: Average loss: 0.5429, Accuracy: 8092/10000 (80.92%)

EPOCH: 22


Loss=0.6080974340438843 Batch_id=390 Accuracy=80.37: 100%|██████████| 391/391 [00:15<00:00, 25.99it/s]



Test set: Average loss: 0.5224, Accuracy: 8239/10000 (82.39%)

EPOCH: 23


Loss=0.5727637410163879 Batch_id=390 Accuracy=80.63: 100%|██████████| 391/391 [00:14<00:00, 26.38it/s]



Test set: Average loss: 0.5109, Accuracy: 8276/10000 (82.76%)

EPOCH: 24


Loss=0.47472482919692993 Batch_id=390 Accuracy=81.04: 100%|██████████| 391/391 [00:15<00:00, 25.88it/s]



Test set: Average loss: 0.5217, Accuracy: 8226/10000 (82.26%)

EPOCH: 25


Loss=0.5662104487419128 Batch_id=390 Accuracy=81.33: 100%|██████████| 391/391 [00:14<00:00, 26.31it/s]



Test set: Average loss: 0.4765, Accuracy: 8363/10000 (83.63%)

EPOCH: 26


Loss=0.42380914092063904 Batch_id=390 Accuracy=81.58: 100%|██████████| 391/391 [00:16<00:00, 24.31it/s]



Test set: Average loss: 0.5138, Accuracy: 8274/10000 (82.74%)

EPOCH: 27


Loss=0.45632320642471313 Batch_id=390 Accuracy=81.69: 100%|██████████| 391/391 [00:14<00:00, 26.16it/s]



Test set: Average loss: 0.4855, Accuracy: 8376/10000 (83.76%)

EPOCH: 28


Loss=0.45893198251724243 Batch_id=390 Accuracy=81.95: 100%|██████████| 391/391 [00:15<00:00, 26.00it/s]



Test set: Average loss: 0.4971, Accuracy: 8341/10000 (83.41%)

EPOCH: 29


Loss=0.577580988407135 Batch_id=390 Accuracy=82.07: 100%|██████████| 391/391 [00:14<00:00, 26.08it/s]



Test set: Average loss: 0.4789, Accuracy: 8349/10000 (83.49%)

EPOCH: 30


Loss=0.5034649968147278 Batch_id=390 Accuracy=82.23: 100%|██████████| 391/391 [00:15<00:00, 26.01it/s]



Test set: Average loss: 0.4842, Accuracy: 8366/10000 (83.66%)

EPOCH: 31


Loss=0.6198814511299133 Batch_id=390 Accuracy=82.18: 100%|██████████| 391/391 [00:14<00:00, 26.47it/s]



Test set: Average loss: 0.5106, Accuracy: 8268/10000 (82.68%)

EPOCH: 32


Loss=0.48808568716049194 Batch_id=390 Accuracy=82.54: 100%|██████████| 391/391 [00:14<00:00, 26.19it/s]



Test set: Average loss: 0.4603, Accuracy: 8422/10000 (84.22%)

EPOCH: 33


Loss=0.3705518841743469 Batch_id=390 Accuracy=82.85: 100%|██████████| 391/391 [00:14<00:00, 26.16it/s]



Test set: Average loss: 0.4497, Accuracy: 8466/10000 (84.66%)

EPOCH: 34


Loss=0.40688222646713257 Batch_id=390 Accuracy=82.79: 100%|██████████| 391/391 [00:14<00:00, 26.28it/s]



Test set: Average loss: 0.4865, Accuracy: 8368/10000 (83.68%)

EPOCH: 35


Loss=0.42879390716552734 Batch_id=390 Accuracy=82.99: 100%|██████████| 391/391 [00:15<00:00, 26.05it/s]



Test set: Average loss: 0.4702, Accuracy: 8416/10000 (84.16%)

EPOCH: 36


Loss=0.2693183720111847 Batch_id=390 Accuracy=83.30: 100%|██████████| 391/391 [00:14<00:00, 26.36it/s]



Test set: Average loss: 0.4868, Accuracy: 8377/10000 (83.77%)

EPOCH: 37


Loss=0.452004998922348 Batch_id=390 Accuracy=83.32: 100%|██████████| 391/391 [00:14<00:00, 26.21it/s]



Test set: Average loss: 0.4521, Accuracy: 8489/10000 (84.89%)

EPOCH: 38


Loss=0.603645384311676 Batch_id=390 Accuracy=83.37: 100%|██████████| 391/391 [00:14<00:00, 26.11it/s]



Test set: Average loss: 0.4501, Accuracy: 8479/10000 (84.79%)

EPOCH: 39


Loss=0.552642285823822 Batch_id=390 Accuracy=83.46: 100%|██████████| 391/391 [00:15<00:00, 25.97it/s]



Test set: Average loss: 0.4470, Accuracy: 8474/10000 (84.74%)

EPOCH: 40


Loss=0.39833930134773254 Batch_id=390 Accuracy=83.77: 100%|██████████| 391/391 [00:16<00:00, 23.65it/s]



Test set: Average loss: 0.4671, Accuracy: 8453/10000 (84.53%)

EPOCH: 41


Loss=0.4198873043060303 Batch_id=390 Accuracy=83.72: 100%|██████████| 391/391 [00:14<00:00, 26.13it/s]



Test set: Average loss: 0.4467, Accuracy: 8477/10000 (84.77%)

EPOCH: 42


Loss=0.4761854112148285 Batch_id=390 Accuracy=83.71: 100%|██████████| 391/391 [00:14<00:00, 26.28it/s]



Test set: Average loss: 0.4487, Accuracy: 8491/10000 (84.91%)

EPOCH: 43


Loss=0.28720229864120483 Batch_id=390 Accuracy=84.20: 100%|██████████| 391/391 [00:14<00:00, 26.28it/s]



Test set: Average loss: 0.4406, Accuracy: 8504/10000 (85.04%)

EPOCH: 44


Loss=0.466238409280777 Batch_id=390 Accuracy=84.15: 100%|██████████| 391/391 [00:14<00:00, 26.32it/s]



Test set: Average loss: 0.4404, Accuracy: 8535/10000 (85.35%)

EPOCH: 45


Loss=0.37305712699890137 Batch_id=390 Accuracy=84.16: 100%|██████████| 391/391 [00:14<00:00, 26.60it/s]



Test set: Average loss: 0.4427, Accuracy: 8495/10000 (84.95%)

EPOCH: 46


Loss=0.5065498948097229 Batch_id=390 Accuracy=84.06: 100%|██████████| 391/391 [00:15<00:00, 26.03it/s]



Test set: Average loss: 0.4446, Accuracy: 8513/10000 (85.13%)

EPOCH: 47


Loss=0.4016569256782532 Batch_id=390 Accuracy=84.44: 100%|██████████| 391/391 [00:15<00:00, 25.87it/s]



Test set: Average loss: 0.4517, Accuracy: 8481/10000 (84.81%)

EPOCH: 48


Loss=0.2598184645175934 Batch_id=390 Accuracy=84.40: 100%|██████████| 391/391 [00:14<00:00, 26.47it/s]



Test set: Average loss: 0.4251, Accuracy: 8580/10000 (85.80%)

EPOCH: 49


Loss=0.36957162618637085 Batch_id=390 Accuracy=84.43: 100%|██████████| 391/391 [00:14<00:00, 26.35it/s]



Test set: Average loss: 0.4303, Accuracy: 8567/10000 (85.67%)

EPOCH: 50


Loss=0.46526843309402466 Batch_id=390 Accuracy=84.66: 100%|██████████| 391/391 [00:14<00:00, 26.24it/s]



Test set: Average loss: 0.4422, Accuracy: 8543/10000 (85.43%)

+-------+-------------------+---------------+-------+---------------+-----------+
| Epoch | Training Accuracy | Test Accuracy |  Diff | Training Loss | Test Loss |
+-------+-------------------+---------------+-------+---------------+-----------+
|   1   |       41.37%      |     45.60%    | -4.23 |     1.2605    |   1.6352  |
|   2   |       55.96%      |     61.69%    | -5.73 |     1.2168    |   1.0665  |
|   3   |       61.50%      |     64.64%    | -3.14 |     1.2578    |   1.0071  |
|   4   |       64.97%      |     64.53%    |  0.44 |     0.8525    |   1.0250  |
|   5   |       67.39%      |     67.25%    |  0.14 |     0.8324    |   0.9250  |
|   6   |       69.79%      |     73.84%    | -4.05 |     0.9456    |   0.7415  |
|   7   |       71.49%      |     70.00%    |  1.49 |     0.9349    |   0.8566  |
|   8   |       72.49%      |     74.88%    | -2.39 |     0.9847    |   0.7214  |
|   9   |       74.05%      |     