### ResNet

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch as th
import torch.nn as nn
from dataset import mnist
from util import (
    get_torch_size_string,
    print_model_parameters,
    print_model_layers,
    model_train
)
np.set_printoptions(precision=3)
th.set_printoptions(precision=3)
%matplotlib inline
%config InlineBackend.figure_format='retina'
print ("PyTorch version:[%s]."%(th.__version__))

PyTorch version:[2.0.1].


In [2]:
device = 'cpu'

### Residual Block

In [3]:
class Residual(nn.Module):
    """The Residual block of ResNet models.""" 
    def __init__(self, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.LazyConv2d(
            num_channels, kernel_size=3, padding=1,stride=strides)
        self.conv2 = nn.LazyConv2d(
            num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.LazyConv2d(num_channels, kernel_size=1,
                stride=strides)
        else:
            self.conv3 = None 
        self.bn1  = nn.LazyBatchNorm2d() 
        self.bn2  = nn.LazyBatchNorm2d()
        self.actv = nn.ReLU()
    def forward(self,x):
        y = self.actv(self.bn1(self.conv1(x))) 
        y = self.bn2(self.conv2(y)) 
        if self.conv3:
            x = self.conv3(x) 
        y += x 
        return self.actv(y)
print ("Ready.")

Ready.


In [4]:
resblock = Residual(num_channels=3,use_1x1conv=False,strides=1)
x_torch = th.randn(16,3,224,224)
y_torch = resblock(x_torch,)
print ("x_torch:[%s] => y_torch:[%s]"%
       (get_torch_size_string(x_torch),get_torch_size_string(y_torch)))

x_torch:[16x3x224x224] => y_torch:[16x3x224x224]




In [5]:
resblock = Residual(num_channels=3,use_1x1conv=True,strides=1)
x_torch = th.randn(16,3,224,224)
y_torch = resblock(x_torch,)
print ("x_torch:[%s] => y_torch:[%s]"%
       (get_torch_size_string(x_torch),get_torch_size_string(y_torch)))

x_torch:[16x3x224x224] => y_torch:[16x3x224x224]


In [6]:
resblock = Residual(num_channels=3,use_1x1conv=True,strides=2)
x_torch = th.randn(16,3,224,224)
y_torch = resblock(x_torch,)
print ("x_torch:[%s] => y_torch:[%s]"%
       (get_torch_size_string(x_torch),get_torch_size_string(y_torch)))

x_torch:[16x3x224x224] => y_torch:[16x3x112x112]


### ResNet

In [7]:
class ResNetClass(nn.Module):
    
    def __init__(self, arch, lr=0.1, num_classes=10):
        super(ResNetClass, self).__init__()
        
        self.net = nn.Sequential(self.b1())
        for i, b in enumerate(arch):
            self.net.add_module(f'b{i+2}', self.block(*b, first_block=(i==0)))
        self.net.add_module('last', nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(),
            nn.LazyLinear(num_classes)))
        
        self.layer_names = []
        for l_idx,layer in enumerate(self.net):
            layer_name = "%s_%02d"%(type(layer).__name__.lower(),l_idx)
            self.layer_names.append(layer_name)
        
    def b1(self):
        return nn.Sequential(
            nn.LazyConv2d(64, kernel_size=7, stride=2, padding=3),
            nn.LazyBatchNorm2d(), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    def block(self, num_residuals, num_channels, first_block=False):
        blk = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.append(Residual(num_channels, use_1x1conv=True, strides=2)) 
            else:
                blk.append(Residual(num_channels)) 
        return nn.Sequential(*blk)
    
    def forward(self,x):
        """
            Forward propagate
        """
        intermediate_output_list = []
        for layer in self.net:
            x = layer(x)
            intermediate_output_list.append(x)
        final_output = x
        return final_output,intermediate_output_list
    
print ("Ready.")

Ready.


In [8]:
resnet = ResNetClass(
    arch = ((2, 64), (2, 128), (2, 256), (2, 512))
)
print ("Ready.")

Ready.


### Print model layers

In [9]:
x_torch = th.randn((16,3,224,224)).to(device)
print_model_layers(resnet,x_torch)

batch_size:[16]
[  ] layer:[          input] size:[  16x3x224x224]
[ 0] layer:[  sequential_00] size:[   16x64x56x56] numel:[   3211264]
[ 1] layer:[  sequential_01] size:[   16x64x56x56] numel:[   3211264]
[ 2] layer:[  sequential_02] size:[  16x128x28x28] numel:[   1605632]
[ 3] layer:[  sequential_03] size:[  16x256x14x14] numel:[    802816]
[ 4] layer:[  sequential_04] size:[    16x512x7x7] numel:[    401408]
[ 5] layer:[  sequential_05] size:[         16x10] numel:[       160]


### Print model parameters

In [10]:
print_model_parameters(resnet)

[ 0] parameter:[             net.0.0.weight] shape:[    64x3x7x7] numel:[      9408]
[ 1] parameter:[               net.0.0.bias] shape:[          64] numel:[        64]
[ 2] parameter:[             net.0.1.weight] shape:[          64] numel:[        64]
[ 3] parameter:[               net.0.1.bias] shape:[          64] numel:[        64]
[ 4] parameter:[      net.b2.0.conv1.weight] shape:[   64x64x3x3] numel:[     36864]
[ 5] parameter:[        net.b2.0.conv1.bias] shape:[          64] numel:[        64]
[ 6] parameter:[      net.b2.0.conv2.weight] shape:[   64x64x3x3] numel:[     36864]
[ 7] parameter:[        net.b2.0.conv2.bias] shape:[          64] numel:[        64]
[ 8] parameter:[        net.b2.0.bn1.weight] shape:[          64] numel:[        64]
[ 9] parameter:[          net.b2.0.bn1.bias] shape:[          64] numel:[        64]
[10] parameter:[        net.b2.0.bn2.weight] shape:[          64] numel:[        64]
[11] parameter:[          net.b2.0.bn2.bias] shape:[          64]