# ResNet

> Neural net model

In [None]:
#| default_exp models.resnet

In [None]:
#| hide
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import *

In [None]:
#| export
import torch.nn as nn

import torch
from torchinfo import summary
from torchvision.transforms import transforms
import torch.nn.functional as F

from omegaconf import OmegaConf
from hydra.utils import instantiate

from matplotlib import pyplot as plt
import math

from nimrod.models.conv import ConvBlock, PreActivationConvBlock
from nimrod.models.core import Classifier, weight_init
from nimrod.utils import get_device, set_seed
from nimrod.image.datasets import ImageDataModule

from typing import List, Optional, Callable, Any, Type
import logging
from functools import partial


In [None]:
#| export
logger = logging.getLogger(__name__)
set_seed()

Seed set to 42


## Res Block

In [None]:
#| export 
class ResBlock(nn.Module):
    def __init__(
            self,
            in_channels:int, # Number of input channels
            out_channels:int, # Number of output channels
            stride:int=2, # Stride
            kernel_size:int=3, # Kernel size
            activation:Optional[Type[nn.Module]]=nn.ReLU, # Activation class if no activatoin set to nn.Identity
            normalization:Optional[Type[nn.Module]]=nn.BatchNorm2d, # Normalization class
            pre_activation:bool=False # replace conv block by pre-act block. used in unets e.g.
        ):

        super().__init__()
        self.activation = activation()
        conv_block = []
        if pre_activation:
            conv_ = partial(PreActivationConvBlock, stride=1, activation=activation, normalization=normalization)
        else:
            conv_ = partial(ConvBlock, stride=1, activation=activation, normalization=normalization)
        # conv stride 1 to be able to go deeper while keeping the same spatial resolution
        c1 = conv_(in_channels, out_channels, stride=1, kernel_size=kernel_size)
        # conv stride to be able to go wider in number of channels
        # activation will be added at very end
        c2 = conv_(out_channels, out_channels, stride=stride, activation=None, kernel_size=kernel_size) #adding activation to the whole layer at the end c.f. forward
        conv_block += [c1,c2]
        self.conv_layer = nn.Sequential(*conv_block)

        if in_channels == out_channels:
            self.id = nn.Identity()
        else:
            # resize x to match channels
            self.id = conv_(in_channels, out_channels, kernel_size=1, stride=1, activation=None)
        
        if stride == 1:
            self.pooling = nn.Identity()
        else:
            # resize x to match the stride
            self.pooling = nn.AvgPool2d(stride, ceil_mode=True)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.activation(self.conv_layer(x) + self.id(self.pooling(x)))

### Usage

In [None]:
model = ResBlock(3, 8, stride=2, activation=partial(nn.LeakyReLU, negative_slope=0.1), normalization=nn.BatchNorm2d)
x = torch.randn(1, 3, 32, 32)
y = model(x)
print(y.shape)
summary(model=model, input_size=(1, 3, 32, 32), depth=2)



torch.Size([1, 8, 16, 16])


Layer (type:depth-idx)                   Output Shape              Param #
ResBlock                                 [1, 8, 16, 16]            --
├─Sequential: 1-1                        [1, 8, 16, 16]            --
│    └─ConvBlock: 2-1                    [1, 8, 32, 32]            232
│    └─ConvBlock: 2-2                    [1, 8, 16, 16]            592
├─AvgPool2d: 1-2                         [1, 3, 16, 16]            --
├─ConvBlock: 1-3                         [1, 8, 16, 16]            --
│    └─Sequential: 2-3                   [1, 8, 16, 16]            40
├─LeakyReLU: 1-4                         [1, 8, 16, 16]            --
Total params: 864
Trainable params: 864
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.37
Input size (MB): 0.01
Forward/backward pass size (MB): 0.20
Params size (MB): 0.00
Estimated Total Size (MB): 0.21

## ResNet

In [None]:
#| export
class ResNet(nn.Module):
    def __init__(
            self,
            n_features: List[int]=[1, 8, 16, 32, 64, 32], # Number of input & output channels
            num_classes: int=10, # Number of classes
            activation:Optional[Type[nn.Module]]=nn.ReLU, # Activation function if None set to nn.Identity
            normalization:Optional[Type[nn.Module]]=nn.BatchNorm2d, # Normalization function if None set to nn.Identity
            weight_initialization: bool = False, # weight init with kaiming
            pre_activation: bool = False # pre-activation block for deep nets
        ):

        super().__init__()
        logger.info("ResNet: init")
        layers = []
        res_ = partial(ResBlock, stride=2, activation=activation, normalization=normalization, pre_activation=pre_activation)

        layers.append(res_(in_channels=n_features[0], out_channels=n_features[1], stride=1))

        for i in range(1, len(n_features)-1):
            layers += [res_(in_channels=n_features[i], out_channels=n_features[i+1])]

        # last layer back to n_classes and flatten
        layers.append(res_(in_channels=n_features[-1], out_channels=num_classes))
        layers.append(nn.Flatten())

        # layers += [nn.Flatten(), nn.Linear(n_features[-1], num_classes, bias=False), nn.BatchNorm1d(num_classes)]
        self.layers = nn.Sequential(*layers)
        if weight_initialization:
            logger.info("Init conv & linear with kaiming")
            if isinstance(activation, partial):
                if activation.func == nn.LeakyReLU:
                    logger.info("LeakyRelu layers weight init")
                    wi = partial(weight_init, leaky=activation.keywords.get('negative_slope'))
                self.apply(wi)
            else:
                logger.info("ReLU layers weight init")
                self.apply(weight_init)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.layers(x)

### Usage

In [None]:
x = torch.randn(64, 3, 28, 28)
model = ResNet(
    n_features=[3, 8, 16, 32, 64, 32],
    num_classes=10,
    activation=partial(nn.LeakyReLU, negative_slope=0.1),
    # activation=nn.ReLU,
    normalization=torch.nn.BatchNorm2d,
    weight_initialization=True,
    pre_activation=True
    )
y = model(x)
print(y.shape)
# summary(model=model, input_size=(64, 3, 28, 28), depth=10)

[13:12:27] INFO - ResNet: init


[13:12:27] INFO - Init conv & linear with kaiming
[13:12:27] INFO - LeakyRelu layers weight init


torch.Size([64, 10])


### config

In [None]:
cfg = OmegaConf.load('../config/model/image/resnetx.yaml')
B, C, H, W = 64, 1, 28, 28
x = torch.randn(B, C, H, W)
nnet = instantiate(cfg.nnet)
y = nnet(x)
print(y.shape)
summary(nnet, input_size=(B, C, H, W), depth=10)

[14:43:51] INFO - ResNet: init


torch.Size([64, 40])


Layer (type:depth-idx)                             Output Shape              Param #
ResNet                                             [64, 40]                  --
├─Sequential: 1-1                                  [64, 40]                  --
│    └─ResBlock: 2-1                               [64, 8, 28, 28]           --
│    │    └─Sequential: 3-1                        [64, 8, 28, 28]           --
│    │    │    └─ConvBlock: 4-1                    [64, 8, 28, 28]           --
│    │    │    │    └─Sequential: 5-1              [64, 8, 28, 28]           --
│    │    │    │    │    └─Conv2d: 6-1             [64, 8, 28, 28]           72
│    │    │    │    │    └─BatchNorm2d: 6-2        [64, 8, 28, 28]           16
│    │    │    │    │    └─ReLU: 6-3               [64, 8, 28, 28]           --
│    │    │    └─ConvBlock: 4-2                    [64, 8, 28, 28]           --
│    │    │    │    └─Sequential: 5-2              [64, 8, 28, 28]           --
│    │    │    │    │    └─Conv2d: 

## ResNetX


In [None]:
#| export

class ResNetX(Classifier):
    def __init__(
        self,
        nnet:ResNet,
        num_classes:int,
        optimizer:Callable[...,torch.optim.Optimizer], # optimizer,
        scheduler: Optional[Callable[...,Any]]=None, # scheduler
        ):
        
        logger.info("ResNetX: init")
        super().__init__(
            nnet=nnet,
            num_classes=num_classes,
            optimizer=optimizer,
            scheduler=scheduler,
            )

    def _step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.loss(y_hat, y)
        preds = y_hat.argmax(dim=1)
        return loss, preds, y
    
    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch
        y_hat = self.forward(x)
        return y_hat.argmax(dim=1)

### Usage

- need to instantiate optimizer to get X models

In [None]:
cfg = OmegaConf.load('../config/optimizer/adam_w.yaml')
optimizer = instantiate(cfg)

cfg = OmegaConf.load('../config/scheduler/step_lr.yaml')
scheduler = instantiate(cfg)

cfg = OmegaConf.load('../config/model/image/resnetx.yaml')

B, C, H, W = 64, 1, 28, 28
x = torch.randn(B, C, H, W)

nnet = instantiate(cfg)(optimizer=optimizer, scheduler=scheduler)
y = nnet(x)
print(y.shape)

[14:57:43] INFO - ResNet: init
[14:57:43] INFO - ResNetX: init
[14:57:43] INFO - Classifier: init


torch.Size([64, 40])


/user/s/slegroux/miniconda3/envs/nimrod/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'nnet' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['nnet'])`.


In [None]:
summary(nnet, input_size=(B, C, H, W), depth=5)

Layer (type:depth-idx)                                  Output Shape              Param #
ResNetX                                                 [64, 40]                  --
├─ResNet: 1-1                                           [64, 40]                  --
│    └─Sequential: 2-1                                  [64, 40]                  --
│    │    └─ResBlock: 3-1                               [64, 8, 28, 28]           --
│    │    │    └─Sequential: 4-1                        [64, 8, 28, 28]           --
│    │    │    │    └─ConvBlock: 5-1                    [64, 8, 28, 28]           88
│    │    │    │    └─ConvBlock: 5-2                    [64, 8, 28, 28]           592
│    │    │    └─Identity: 4-2                          [64, 1, 28, 28]           --
│    │    │    └─ConvBlock: 4-3                         [64, 8, 28, 28]           --
│    │    │    │    └─Sequential: 5-3                   [64, 8, 28, 28]           24
│    │    │    └─LeakyReLU: 4-4                         [64

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()