# ResNet

Papers:

[Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf)

[Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/pdf/1611.05431.pdf)

## Degradation of Training Accuracy in Deep Neural Networks

Researchers have identified an issue where adding more layers to deep networks result in poorer training errors. 

The authors of the first paper isolated this issue from vanishing gradients through an experiment involving stacking identity layers. This issue is termed as degradation and the paper aims to address this issue.

## Learning Residuals

The degradation problem highlighted difficulties in fitting optimal complex functions in Deep Neural Networks.

To tackle this, the authors reformulated the optimal function into 2 parts - the original inputs $x$ and the residuals $F(x)$

## Two Types of Residual Connections

There are 2 types of residual unit proposed in the paper:

<div>
<img src="./assets/ResNetUnit.png" width = 800px>
</div>

| Non-bottleneck | Bottleneck |
|----------------|------------|
| Effectively solves the degradation issue | Degradation still observed |
| Requires more computation resource | More economical |

In [3]:
import torch
import torch.nn as nn

In [4]:
def projection(inChannel, outChannel, stride=1):
    return nn.Sequential(
        nn.Conv2d(inChannel, outChannel, 1, stride=stride),
        nn.BatchNorm2d(outChannel)
    )

In [5]:
class ResBasic(torch.nn.Module):
    @staticmethod
    def unit(inChannel, outChannel, filterSize=3, stride=1):
        return nn.Sequential(
            nn.Conv2d(inChannel, outChannel, filterSize, stride=stride, padding=filterSize//2),
            nn.BatchNorm2d(outChannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outChannel, outChannel, filterSize, padding=filterSize//2),
            nn.BatchNorm2d(outChannel)
        )

    def __init__(self, inChannel, outChannel, filterSize=3, stride=1):
        super().__init__()

        self.residual = ResBasic.unit(inChannel, outChannel, filterSize, stride)
        self.projection = projection(inChannel, outChannel, stride) if stride > 1 else None
        self.relu = nn.ReLU(inplace=True)

    def forward(self, input):
        out = self.residual(input)
        eye = input
        if self.projection:
            eye = self.projection(eye)
        
        out += eye
        out = self.relu(out)

        return out

In [6]:
class ResBottleneck(torch.nn.Module):
    @staticmethod
    def unit(inChannel, embedDim, outChannel, filterSize=3, stride=1, groups=1):
        return nn.Sequential(
            nn.Conv2d(inChannel, embedDim, 1),
            nn.BatchNorm2d(embedDim),
            nn.ReLU(inplace=True),
            nn.Conv2d(embedDim, embedDim, filterSize, stride=stride, padding=filterSize//2, groups = groups),
            nn.BatchNorm2d(embedDim),
            nn.ReLU(inplace=True),
            nn.Conv2d(embedDim, outChannel, 1),
            nn.BatchNorm2d(outChannel)
        )

    def __init__(self, inChannel, embedDim, outChannel, filterSize=3, stride=1, groups=1):
        super().__init__()
        
        self.residual = ResBottleneck.unit(inChannel, embedDim, outChannel, filterSize, stride, groups)
        self.projection = projection(inChannel, outChannel, stride) if stride > 1 else None
        self.relu = nn.ReLU(inplace=True)

    def forward(self, input):
        out = self.residual(input)
        eye = input
        if self.projection:
            eye = self.projection(eye)
        
        out += eye
        out = self.relu(out)

        return out

In [7]:
class ResNet20(torch.nn.Module):
    def __init__(self, nClass):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, stride=1, padding=1)
        self.conv2 = nn.Sequential(
            ResBasic(16, 16, 3, stride = 1),
            ResBasic(16, 16, 3, stride = 1),
            ResBasic(16, 16, 3, stride = 1),
            ResBasic(16, 16, 3, stride = 1),
            ResBasic(16, 16, 3, stride = 1),
            ResBasic(16, 16, 3, stride = 1)
        )
        self.conv3 = nn.Sequential(
            ResBasic(16, 32, 3, stride = 2),
            ResBasic(32, 32, 3, stride = 1),
            ResBasic(32, 32, 3, stride = 1),
            ResBasic(32, 32, 3, stride = 1),
            ResBasic(32, 32, 3, stride = 1),
            ResBasic(32, 32, 3, stride = 1)
        )
        self.conv4 = nn.Sequential(
            ResBasic(32, 64, 3, stride = 2),
            ResBasic(64, 64, 3, stride = 1),
            ResBasic(64, 64, 3, stride = 1),
            ResBasic(64, 64, 3, stride = 1),
            ResBasic(64, 64, 3, stride = 1),
            ResBasic(64, 64, 3, stride = 1)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.4),
            nn.AvgPool2d(8),
            nn.Flatten(),
            nn.Linear(64, nClass)
        )
    
    def forward(self, input):
        out = self.conv1(input)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.classifier(out)

        return out

In [8]:
def resNetInit(l):
    if isinstance(l, nn.Conv2d):
        nn.init.kaiming_normal_(l.weight, mode='fan_out', nonlinearity='relu')
    elif isinstance(l, nn.BatchNorm2d):
        nn.init.constant_(l.weight, 1)
        nn.init.constant_(l.bias, 0)

In [11]:
from torch.optim import Adam
from torch.nn.init import kaiming_normal_, normal_
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision.datasets import CIFAR10

import training

In [12]:
import os

# seeding the random number generators
# ensures some form of determinism in the outputs 
seed = 2020
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
os.environ['PYTHONHASHSEED']=str(seed)

In [13]:
# We will be using the CIFAR-10 dataset
trainset = CIFAR10(
    root = "../data",
    train = True,
    download = True,
    transform = ToTensor()
)

testset = CIFAR10(
    root = "../data",
    train = False,
    download = True,
    transform = ToTensor()
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/cifar-10-python.tar.gz to ../data
Files already downloaded and verified


In [14]:
trainer = training.Trainer(nEpoch=30, logInterval=50)

In [15]:
trainer.addDataloader(
    dataloader = DataLoader(
        trainset, batch_size=256,
        shuffle=True, num_workers=0),
    loaderType = 'train')

trainer.addDataloader(
    dataloader = DataLoader(
        testset, batch_size=256, 
        shuffle=True, num_workers=0),
    loaderType = 'test')

trainer.addLossFn(nn.CrossEntropyLoss())

In [16]:
model = ResNet20(10)
model.apply(resNetInit)

optimizer = Adam(model.parameters())

In [17]:
trainer.train(model, optimizer)

Epoch [ 1 /30 ]  Batch [ 50  / 196 ]  Loss: 1.6961
Epoch [ 1 /30 ]  Batch [ 100 / 196 ]  Loss: 1.4852
Epoch [ 1 /30 ]  Batch [ 150 / 196 ]  Loss: 1.3472
Epoch [ 1 /30 ]  Batch [ 196 / 196 ]  Loss: 1.1108
Epoch [ 2 /30 ]  Batch [ 50  / 196 ]  Loss: 1.1717
Epoch [ 2 /30 ]  Batch [ 100 / 196 ]  Loss: 1.0661
Epoch [ 2 /30 ]  Batch [ 150 / 196 ]  Loss: 1.1081
Epoch [ 2 /30 ]  Batch [ 196 / 196 ]  Loss: 1.1111
Epoch [ 3 /30 ]  Batch [ 50  / 196 ]  Loss: 0.9284
Epoch [ 3 /30 ]  Batch [ 100 / 196 ]  Loss: 0.8890
Epoch [ 3 /30 ]  Batch [ 150 / 196 ]  Loss: 0.9364
Epoch [ 3 /30 ]  Batch [ 196 / 196 ]  Loss: 0.9380
Epoch [ 4 /30 ]  Batch [ 50  / 196 ]  Loss: 0.8116
Epoch [ 4 /30 ]  Batch [ 100 / 196 ]  Loss: 0.8093
Epoch [ 4 /30 ]  Batch [ 150 / 196 ]  Loss: 0.8441
Epoch [ 4 /30 ]  Batch [ 196 / 196 ]  Loss: 0.7622
Epoch [ 5 /30 ]  Batch [ 50  / 196 ]  Loss: 0.7218
Epoch [ 5 /30 ]  Batch [ 100 / 196 ]  Loss: 0.6327
Epoch [ 5 /30 ]  Batch [ 150 / 196 ]  Loss: 0.6332
Epoch [ 5 /30 ]  Batch [ 196 / 

In [18]:
trainer.test(model, 10)

In [19]:
# Accuracy
torch.true_divide(torch.diagonal(trainer.confMatrix).sum(), trainer.confMatrix.sum()).item()

0.7479000091552734

In [20]:
trainer.confMatrix

tensor([[867,  18,  90,  24,  43,  10,  12,  27, 113,  57],
        [  9, 881,   2,   2,   0,   3,   4,   2,  17,  79],
        [ 36,   0, 630,  45,  71,  42,  50,  23,   8,   7],
        [ 32,  27, 112, 691,  92, 276, 102,  70,  20,  29],
        [  7,   3,  54,  48, 677,  32,  11,  53,   2,   2],
        [  6,   1,  42, 104,  31, 584,  22,  47,   4,   6],
        [  5,   9,  38,  52,  46,  21, 788,  10,   4,   5],
        [  4,   5,  18,  22,  38,  29,   3, 760,   7,   7],
        [ 23,  11,   6,   6,   1,   1,   6,   0, 807,  14],
        [ 11,  45,   8,   6,   1,   2,   2,   8,  18, 794]])