**ResNet** is the name of a CNN which introduced the idea of Residual connections. Residual connections allow for training of much deeper Neural Networks. And is one of the key advances in Artificial Neural Network architecture. In this notebook we will explore **ResNets**.

https://arxiv.org/pdf/1512.03385.pdf

In [1]:
#Perform imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#Import models
from torchvision import models

In [5]:
#Lets download resnet18
resnet18 = models.resnet18(pretrained=True)

In [6]:
resnet18

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
#Lets count the number of parameters in ResNet18
#lets count the number of parameters in the model
def count_parameters(model):
    params = [p.numel() for p in model.parameters() if p.requires_grad]
    for item in params:
        print(f'{item:>6}')
    print(f'______\n{sum(params):>6}')
count_parameters(resnet18)

  9408
    64
    64
 36864
    64
    64
 36864
    64
    64
 36864
    64
    64
 36864
    64
    64
 73728
   128
   128
147456
   128
   128
  8192
   128
   128
147456
   128
   128
147456
   128
   128
294912
   256
   256
589824
   256
   256
 32768
   256
   256
589824
   256
   256
589824
   256
   256
1179648
   512
   512
2359296
   512
   512
131072
   512
   512
2359296
   512
   512
2359296
   512
   512
512000
  1000
______
11689512


There are 11 million parameters in ResNet18.

Lets apply **ResNet18** on CIFAR-10 dataset.

In [9]:
#Download the dataset and transform them to the format for ResNet
train_transform = transforms.Compose([
        transforms.Resize(224),             # resize shortest side to 224 pixels
        transforms.CenterCrop(224),         # crop longest side to 224 pixels at center
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

test_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

train_data = datasets.CIFAR10(root='./', train=True, download=True, transform=train_transform)
test_data = datasets.CIFAR10(root='./', train=False, download=True, transform=test_transform)

Files already downloaded and verified
Files already downloaded and verified


In [10]:
#Create dataloaders
torch.manual_seed(101)  # for reproducible results

train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [12]:
#Lets update the classification layer of AlexNet to take in CIFAR-10 dataset
for param in resnet18.parameters():
    param.requires_grad = False

In [14]:
torch.manual_seed(42)
resnet18.fc = nn.Sequential(nn.Linear(512, out_features=10, bias=True),
                                 nn.LogSoftmax(dim=1))

In [15]:
#Count parameters
count_parameters(resnet18)

  5120
    10
______
  5130


In [17]:
#Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet18.fc.parameters(), lr=0.001)

In [28]:
def trainResNet(resnetModel):
    #Lets train the model
    import time
    start_time = time.time()

    epochs = 1

    max_trn_batch = 800
    max_tst_batch = 300

    train_losses = []
    test_losses = []
    train_correct = []
    test_correct = []

    for i in range(epochs):
        trn_corr = 0
        tst_corr = 0
    
        # Run the training batches
        for b, (X_train, y_train) in enumerate(train_loader):
            if b == max_trn_batch:
                break
            b+=1
        
            # Apply the model
            y_pred = resnetModel(X_train)
            loss = criterion(y_pred, y_train)
 
            # Tally the number of correct predictions
            predicted = torch.max(y_pred.data, 1)[1]
            batch_corr = (predicted == y_train).sum()
            trn_corr += batch_corr
        
            # Update parameters
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Print interim results
            if b%200 == 0:
                print(f'epoch: {i:2}  batch: {b:4} [{10*b:6}/8000]  loss: {loss.item():10.8f}  \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')

        train_losses.append(loss)
        train_correct.append(trn_corr)

        # Run the testing batches
        with torch.no_grad():
            for b, (X_test, y_test) in enumerate(test_loader):
                if b == max_tst_batch:
                    break

                # Apply the model
                y_val = resnetModel(X_test)

                # Tally the number of correct predictions
                predicted = torch.max(y_val.data, 1)[1] 
                tst_corr += (predicted == y_test).sum()

        loss = criterion(y_val, y_test)
        test_losses.append(loss)
        test_correct.append(tst_corr)
        return test_correct

    print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed

In [29]:
test_correct = trainResNet(resnet18)

epoch:  0  batch:  200 [  2000/8000]  loss: 0.58395964  accuracy:  69.400%
epoch:  0  batch:  400 [  4000/8000]  loss: 0.45118099  accuracy:  70.050%
epoch:  0  batch:  600 [  6000/8000]  loss: 0.37145323  accuracy:  70.467%
epoch:  0  batch:  800 [  8000/8000]  loss: 0.58636081  accuracy:  70.713%


In [30]:
print(test_correct)
print(f'Test accuracy: {test_correct[-1].item()*100/3000:.3f}%')

[tensor(2147)]
Test accuracy: 71.567%


Lets try other larger **ResNet** models

**ResNet34**

In [33]:
#Lets download resnet34
resnet34 = models.resnet34(pretrained=True)
#take a look at the model
resnet34
#count parameters
print("Parameters in full model")
count_parameters(resnet34)
#reset grad
for param in resnet34.parameters():
    param.requires_grad = False
torch.manual_seed(42)
resnet34.fc = nn.Sequential(nn.Linear(512, out_features=10, bias=True),
                                 nn.LogSoftmax(dim=1))
print("Parameters to optimize")
count_parameters(resnet34)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet34.fc.parameters(), lr=0.001)
print("Training")
test_correct = trainResNet(resnet34)
print(test_correct)
print(f'Test accuracy: {test_correct[-1].item()*100/3000:.3f}%')

Parameters in full model
  9408
    64
    64
 36864
    64
    64
 36864
    64
    64
 36864
    64
    64
 36864
    64
    64
 36864
    64
    64
 36864
    64
    64
 73728
   128
   128
147456
   128
   128
  8192
   128
   128
147456
   128
   128
147456
   128
   128
147456
   128
   128
147456
   128
   128
147456
   128
   128
147456
   128
   128
294912
   256
   256
589824
   256
   256
 32768
   256
   256
589824
   256
   256
589824
   256
   256
589824
   256
   256
589824
   256
   256
589824
   256
   256
589824
   256
   256
589824
   256
   256
589824
   256
   256
589824
   256
   256
589824
   256
   256
1179648
   512
   512
2359296
   512
   512
131072
   512
   512
2359296
   512
   512
2359296
   512
   512
2359296
   512
   512
2359296
   512
   512
512000
  1000
______
21797672
Parameters to optimize
  5120
    10
______
  5130
Training
epoch:  0  batch:  200 [  2000/8000]  loss: 1.08344638  accuracy:  46.850%
epoch:  0  batch:  400 [  4000/8000]  loss: 1.12

**ResNet50**

In [37]:
#Lets download resnet50
resnet50 = models.resnet50(pretrained=True)
#take a look at the model
print(resnet50)
#count parameters
print("Parameters in full model")
count_parameters(resnet50)
#reset grad
for param in resnet50.parameters():
    param.requires_grad = False
torch.manual_seed(42)
resnet50.fc = nn.Sequential(nn.Linear(2048, out_features=10, bias=True),
                                 nn.LogSoftmax(dim=1))
print("Parameters to optimize")
count_parameters(resnet50)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet50.fc.parameters(), lr=0.001)
print("Training")
test_correct = trainResNet(resnet50)
print(test_correct)
print(f'Test accuracy: {test_correct[-1].item()*100/3000:.3f}%')

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

epoch:  0  batch:  200 [  2000/8000]  loss: 1.09733474  accuracy:  46.350%
epoch:  0  batch:  400 [  4000/8000]  loss: 0.89698684  accuracy:  55.200%
epoch:  0  batch:  600 [  6000/8000]  loss: 0.95046967  accuracy:  58.850%
epoch:  0  batch:  800 [  8000/8000]  loss: 0.53787303  accuracy:  61.650%
[tensor(1942)]
Test accuracy: 64.733%


**Resnet101**

In [41]:
#Lets download resnet101
resnet101 = models.resnet101(pretrained=True)
#take a look at the model
print(resnet101)
#count parameters
print("Parameters in full model")
count_parameters(resnet101)
#reset grad
for param in resnet101.parameters():
    param.requires_grad = False
torch.manual_seed(42)
resnet101.fc = nn.Sequential(nn.Linear(2048, out_features=10, bias=True),
                                 nn.LogSoftmax(dim=1))
print("Parameters to optimize")
count_parameters(resnet101)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet101.fc.parameters(), lr=0.001)
print("Training")
test_correct = trainResNet(resnet101)
print(test_correct)
print(f'Test accuracy: {test_correct[-1].item()*100/3000:.3f}%')

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

epoch:  0  batch:  200 [  2000/8000]  loss: 1.08625031  accuracy:  51.250%
epoch:  0  batch:  400 [  4000/8000]  loss: 1.18377185  accuracy:  59.575%
epoch:  0  batch:  600 [  6000/8000]  loss: 0.59641773  accuracy:  63.267%
epoch:  0  batch:  800 [  8000/8000]  loss: 0.42787209  accuracy:  65.700%
[tensor(2070)]
Test accuracy: 69.000%


**Resnet152**

In [40]:
#Lets download resnet152
resnet152 = models.resnet152(pretrained=True)
#take a look at the model
print(resnet152)
#count parameters
print("Parameters in full model")
count_parameters(resnet152)
#reset grad
for param in resnet152.parameters():
    param.requires_grad = False
torch.manual_seed(42)
resnet152.fc = nn.Sequential(nn.Linear(2048, out_features=10, bias=True),
                                 nn.LogSoftmax(dim=1))
print("Parameters to optimize")
count_parameters(resnet152)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet152.fc.parameters(), lr=0.001)
print("Training")
test_correct = trainResNet(resnet152)
print(test_correct)
print(f'Test accuracy: {test_correct[-1].item()*100/3000:.3f}%')

Downloading: "https://download.pytorch.org/models/resnet152-b121ed2d.pth" to /Users/jamieott/.cache/torch/hub/checkpoints/resnet152-b121ed2d.pth


HBox(children=(FloatProgress(value=0.0, max=241530880.0), HTML(value='')))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1,

epoch:  0  batch:  200 [  2000/8000]  loss: 0.93982857  accuracy:  51.350%
epoch:  0  batch:  400 [  4000/8000]  loss: 1.30545247  accuracy:  59.650%
epoch:  0  batch:  600 [  6000/8000]  loss: 0.59740496  accuracy:  63.100%
epoch:  0  batch:  800 [  8000/8000]  loss: 0.30629829  accuracy:  65.688%
[tensor(2096)]
Test accuracy: 69.867%


Larger **Resnets** need more training accuracy is still growing. To save compute resources we will leave it here.