In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

In [3]:
from torchvision import datasets, transforms
data_path = '/data-unversioned/p1ch6/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.RandomCrop(size=[32,32], padding=4),
        transforms.RandomHorizontalFlip(),                       
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),                # priorly calculated mean and standard deviations.
                             (0.2470, 0.2435, 0.2616))
    ]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /data-unversioned/p1ch6/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting /data-unversioned/p1ch6/cifar-10-python.tar.gz to /data-unversioned/p1ch6/


In [4]:
cifar10_val = datasets.CIFAR10(                      #loading the val set
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))                      
    ]))

Files already downloaded and verified


In [5]:
class resblock(nn.Module):                    #creating a resblock which would be used iterativealy.
    def __init__(self, n):
        super(resblock, self).__init__()
        self.conv = nn.Conv2d(n, n, kernel_size=3,
                              padding=1, bias=False)                 # defining layers for forward
        self.batch_norm = nn.BatchNorm2d(num_features=n)

        torch.nn.init.kaiming_normal_(self.conv.weight,
                                      nonlinearity='relu')  
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)

    def forward(self, x):
        out = self.conv(x)                                 #Actual resblock with the skip connection over 1 layer
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x

In [6]:
class deep(nn.Module):                # the model which uses resblock iteratively and some other layers
    def __init__(self, n1=32, n_blocks=14):
        super().__init__()
        self.n1 = n1
        self.conv1 = nn.Conv2d(3, n1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n1, 2*n1, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(2*n1, 4*n1, kernel_size=3, padding=1)       # defing layers for forward
        self.resblocks = nn.Sequential(
            *(n_blocks * [resblock(n=n1)]))
        self.resblocks2 = nn.Sequential(
            *(n_blocks * [resblock(n=n1*2)]))
        self.resblocks3 = nn.Sequential(
            *(n_blocks * [resblock(n=n1*4)]))
        self.fc1 = nn.Linear(8 * 8 * n1*4, 32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 10)
        
    def forward(self, x):                    # using the resblocks 3 times, each with twice the number of channels as before
        out = torch.relu(self.conv1(x))
        out = self.resblocks(out)                          
        out = F.max_pool2d(out, 2) 
        out = self.conv2(out)
        out = self.resblocks2(out)
        out = F.max_pool2d(out, 2) 
        out = self.conv3(out)
        out = self.resblocks3(out)
        out = out.view(-1, 8 * 8 * self.n1*4)
        out = torch.relu(self.fc1(out))
        out = torch.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [7]:
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))                             # shifting to gpu if available
print(f"Training on device {device}.")

Training on device cuda.


In [8]:
import datetime

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):           # defining the training loop
    for epoch in range(1, n_epochs + 1):                                        # these inputs would be defined in next cell
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        
        print('{} Epoch {}, Training loss {}'.format(
            datetime.datetime.now(), epoch,
            loss_train / len(train_loader)))

In [9]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)                   # defining mini batch size and shuffling

model =  deep().to(device=device)# <1>
optimizer = optim.SGD(model.parameters(), lr=1e-2, weight_decay=0.01)
loss_fn = nn.CrossEntropyLoss()

In [None]:
training_loop(                   #calling the training loop finally
    n_epochs = 80,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

In [15]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                         shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():   # we do no need backpropagation for this hence the nograd
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # taking the class at maximum probability as prediction
                total += labels.shape[0]  
                correct += int((predicted == labels).sum())   # calculating the no. of correct predictions.

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

Accuracy train: 0.89
Accuracy val: 0.86


In [13]:
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=0.01)      #lowering LR

In [None]:
training_loop(   # 10 iterations with new LR
    n_epochs = 10,                      
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

In [16]:
optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay=0.001)     #further lowering LR

In [17]:
training_loop(                    # another 10 iterations
    n_epochs = 10,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2021-07-29 13:19:36.676366 Epoch 1, Training loss 0.21547675989282405
2021-07-29 13:20:47.210372 Epoch 2, Training loss 0.18545380728247832
2021-07-29 13:21:57.443589 Epoch 3, Training loss 0.1689264615735663
2021-07-29 13:23:07.791370 Epoch 4, Training loss 0.15852573410610257
2021-07-29 13:24:18.132344 Epoch 5, Training loss 0.15325718430463997
2021-07-29 13:25:29.122913 Epoch 6, Training loss 0.14544702793859765
2021-07-29 13:26:39.644823 Epoch 7, Training loss 0.14116498423011406
2021-07-29 13:27:50.090788 Epoch 8, Training loss 0.1331365378811727
2021-07-29 13:29:00.388697 Epoch 9, Training loss 0.12908455516066392
2021-07-29 13:30:11.258031 Epoch 10, Training loss 0.12649944155474604


In [18]:
validate(model, train_loader, val_loader)           #now checking acc

Accuracy train: 0.96
Accuracy val: 0.90


In [19]:
optimizer = optim.SGD(model.parameters(), lr=0.0005, weight_decay=0.005)  # trying to inc acc a bit more, 
#further lowering LR

In [20]:
training_loop(             #another 10 iterations
    n_epochs = 10,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2021-07-29 13:35:15.584836 Epoch 1, Training loss 0.1171610201839977
2021-07-29 13:36:25.968531 Epoch 2, Training loss 0.11383380780896872
2021-07-29 13:37:35.910039 Epoch 3, Training loss 0.11141061599430678
2021-07-29 13:38:46.136000 Epoch 4, Training loss 0.10897924863588056
2021-07-29 13:39:56.394967 Epoch 5, Training loss 0.10631841458642231
2021-07-29 13:41:07.116456 Epoch 6, Training loss 0.106746020078983
2021-07-29 13:42:17.761997 Epoch 7, Training loss 0.10239207398389345
2021-07-29 13:43:28.322475 Epoch 8, Training loss 0.10060076590370187
2021-07-29 13:44:38.761763 Epoch 9, Training loss 0.09959321902574175
2021-07-29 13:45:49.203933 Epoch 10, Training loss 0.09747522666840755


In [21]:
validate(model, train_loader, val_loader)

Accuracy train: 0.97
Accuracy val: 0.90


In [27]:
from torchsummary import summary           #summary, according to it, it is a 132 layer model with 3M params.
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
            Conv2d-2           [-1, 32, 32, 32]           9,216
       BatchNorm2d-3           [-1, 32, 32, 32]              64
          resblock-4           [-1, 32, 32, 32]               0
            Conv2d-5           [-1, 32, 32, 32]           9,216
       BatchNorm2d-6           [-1, 32, 32, 32]              64
          resblock-7           [-1, 32, 32, 32]               0
            Conv2d-8           [-1, 32, 32, 32]           9,216
       BatchNorm2d-9           [-1, 32, 32, 32]              64
         resblock-10           [-1, 32, 32, 32]               0
           Conv2d-11           [-1, 32, 32, 32]           9,216
      BatchNorm2d-12           [-1, 32, 32, 32]              64
         resblock-13           [-1, 32, 32, 32]               0
           Conv2d-14           [-1, 32,