# ResNet

## Sequential API 

### Basic Convolution Unit

In [1]:
import tensorflow as tf


class ConvBlock(tf.keras.layers.Layer):
    """A Conv2D + BatchNorm block with optional down-sampling."""

    def __init__(
        self,
        filters: int,
        *,
        shortcut: bool = False,
        downsample: bool = False,
    ):
        super(ConvBlock, self).__init__()
        self.filters = filters
        self.downsample = downsample
        """Initialize the ConvBlock.

        Args:
            filters (int): number of output channels
            shortcut (bool): projection shortcut flag i.e. match the number of
                filters (done by 1x1 convolutions) if True; otherwise, match
                the number of filters and the spatial dimensions by 3x3
                convolutions with same padding.
            downsample (bool): down-sampling flag i.e. set stride to 2 if
                downsample add stride 1 otherwise.
                Always do the 0 padding i.e. padding = 'same'
        """
        # if downsample, add stride without padding,
        # otherwise stride 1 with zero padding i.e. keep the spatial dimensions
        strides = 2 if downsample else 1
        if shortcut:
            # match the number of filters (done by 1x1 convolutions)
            kernel_size = 1
            padding = "valid"
        else:
            # match the number of filters and the spatial dimensions
            # by 3x3 convolutions with same padding
            kernel_size = 3
            padding = "same"

        self._model = tf.keras.Sequential()
        self._model.add(
            tf.keras.layers.Conv2D(
                filters=filters,
                kernel_size=kernel_size,
                strides=strides,
                use_bias=False,
                padding=padding,
            ))
        self._model.add(tf.keras.layers.BatchNormalization())

    def call(self, x: tf.Tensor, *, training: bool = False) -> tf.Tensor:
        """Forward pass of the ConvBlock.

        Args:
            x (tf.Tensor): input tensor
            training (bool): training flag i.e. batch norm uses different
                behaviour for training and inference

        Returns:
            tf.Tensor: output tensor
        """
        return self._model(x, training=training)

In [2]:
N, H, W, C = 2, 56, 56, 64
x = tf.random.normal((N, H, W, C))
# 3x3 convolutions without down-sampling
m1 = ConvBlock(C, shortcut=False, downsample=False)
# 1x1 convolutions without down-sampling
m2 = ConvBlock(C, shortcut=True, downsample=False)
# 3x3 convolutions with down-sampling
m3 = ConvBlock(C, shortcut=False, downsample=True)
# 1x1 convolutions with down-sampling
m4 = ConvBlock(C, shortcut=True, downsample=True)

assert m1(x, training=True).shape == (N, H, W, C)
assert m2(x, training=True).shape == (N, H, W, C)
assert m3(x, training=True).shape == (N, H // 2, W // 2, C)
assert m4(x, training=True).shape == (N, H // 2, W // 2, C)

2023-08-18 00:58:01.282349: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1


### Bottle Neck Block

In [3]:
class BottleNeck(tf.keras.layers.Layer):
    """A BottleNeck of ResNet framework."""

    def __init__(self, filters: int, *, ds: bool = False, lead: bool = False):
        """Initialize the BottleNeck.

        Args:
            filters (int): number of output channels
            ds (bool): down-sampling flag.
            lead (bool): leading block flag.
        """
        super(BottleNeck, self).__init__()
        # expanded output channels
        outs = filters * 4
        # projection shortcut
        if lead:
            self._shortcut = ConvBlock(outs, shortcut=True, downsample=ds)
        else:
            self._shortcut = None
        # a stack of 1x1, 3x3, 1x1 convolutional layers
        self._body = tf.keras.Sequential()
        self._body.add(ConvBlock(filters, shortcut=True, downsample=False))
        self._body.add(tf.keras.layers.ReLU())
        self._body.add(ConvBlock(filters, shortcut=False, downsample=ds))
        self._body.add(tf.keras.layers.ReLU())
        self._body.add(ConvBlock(outs, shortcut=True, downsample=False))

    def call(self, x: tf.Tensor, *, training: bool = False) -> tf.Tensor:
        """Forward pass of the BottleNeck.

        Args:
            x (tf.Tensor): input tensor
            training (bool): training flag for BN and Dropout

        Returns:
            tf.Tensor: output tensor
        """
        # shortcut branch
        if self._shortcut is None:
            identity = x
        else:
            identity = self._shortcut(x, training=training)
        # residual branch
        residual = self._body(x, training=training)
        # add shortcut and residual before activation
        return tf.nn.relu(identity + residual)

In [4]:
# Conv2_x
N1, H1, W1, C1 = 2, 56, 56, 64
N2, H2, W2, C2 = 2, 56, 56, 256
N3, H3, W3, C3 = 2, 28, 28, 512
N4, H4, W4, C4 = 2, 14, 14, 1024
N5, H5, W5, C5 = 2, 7, 7, 2048
y1 = tf.random.normal((N1, H1, W1, C1))

# conv2_x
y2 = BottleNeck(C1, ds=False, lead=True)(y1)
y2 = BottleNeck(C1, ds=False)(y2)
y2 = BottleNeck(C1, ds=False)(y2)
assert y2.shape == (N2, H2, W2, C2)

# conv3_x
y3 = BottleNeck(C2 // 2, ds=True, lead=True)(y2)
y3 = BottleNeck(C2 // 2, ds=False)(y3)
y3 = BottleNeck(C2 // 2, ds=False)(y3)
assert y3.shape == (N3, H3, W3, C3)

# conv4_x
y4 = BottleNeck(C3 // 2, ds=True, lead=True)(y3)
y4 = BottleNeck(C3 // 2, ds=False)(y4)
y4 = BottleNeck(C3 // 2, ds=False)(y4)
assert y4.shape == (N4, H4, W4, C4)

# conv5_x
y5 = BottleNeck(C4 // 2, ds=True, lead=True)(y4)
y5 = BottleNeck(C4 // 2, ds=False)(y5)
y5 = BottleNeck(C4 // 2, ds=False)(y5)
assert y5.shape == (N5, H5, W5, C5)

### ResNet-50

In [5]:
def bottleneck_stack(
    filter: int,
    num: int,
    *,
    ds: bool = False,
) -> tf.keras.Sequential:
    """A stack of BottleNeck blocks.

    Args:
        filter (int): number of output channels
        num (int): number of BottleNeck blocks
        ds (bool): down-sampling flag.

    Returns:
        tf.keras.Sequential: a stack of BottleNeck blocks
    """
    stack = tf.keras.Sequential()
    stack.add(BottleNeck(filter, ds=ds, lead=True))

    for _ in range(1, num):
        stack.add(BottleNeck(filter, ds=False, lead=False))

    return stack


class DeepResNet(tf.keras.Model):
    """Bottleneck style ResNet."""

    def __init__(self, num_classes: int):
        super(DeepResNet, self).__init__()
        self._model = tf.keras.Sequential()
        self._model.add(
            tf.keras.layers.Conv2D(
                filters=64,
                kernel_size=7,
                strides=2,
                padding="same",
                use_bias=False,
            ))
        self._model.add(tf.keras.layers.BatchNormalization())
        self._model.add(tf.keras.layers.ReLU())
        self._model.add(
            tf.keras.layers.MaxPool2D(
                pool_size=3,
                strides=2,
                padding="same",
            ))
        self._model.add(bottleneck_stack(64, 3, ds=False))
        self._model.add(bottleneck_stack(128, 4, ds=True))
        self._model.add(bottleneck_stack(256, 6, ds=True))
        self._model.add(bottleneck_stack(512, 3, ds=True))
        self._model.add(tf.keras.layers.GlobalAvgPool2D())
        self._model.add(tf.keras.layers.Dense(num_classes))

    def call(self, x: tf.Tensor, *, training: bool = False) -> tf.Tensor:
        return self._model(x, training=training)

In [6]:
in_shape = (224, 224, 3)
x = tf.random.normal(shape=(2, *in_shape))
model = DeepResNet(num_classes=1000)
y = model(x, training=False)
assert y.shape == (2, 1000)
model.summary()

Model: "deep_res_net"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_56 (Sequential)  (2, 1000)                 25610152  
                                                                 
Total params: 25610152 (97.69 MB)
Trainable params: 25557032 (97.49 MB)
Non-trainable params: 53120 (207.50 KB)
_________________________________________________________________


## Functional API

In [7]:
import tensorflow as tf


def conv_norm(
    x: tf.Tensor,
    filters: int,
    ksize: int,
    *,
    downsample: bool = False,
    act: bool = True,
):
    """Conv2D + BatchNorm with optional ReLU activation.

    Args:
        x (tf.Tensor): Input tensor.
        filters (int): Number of output channels.
        ksize (int): Kernel size. If 1 (i.e. projection shortcut), use valid
            padding; otherwise, use same padding.
        downsample (bool): down-sampling flag i.e. set stride to 2 if
            downsample add stride 1 otherwise.
            Always do the 0 padding i.e. padding = 'same'
        act (bool): ReLU activation flag.

    Returns:
        tf.Tensor: Output tensor after the Conv2D and BatchNormalization
            operations.
    """
    strides = 2 if downsample else 1

    if ksize == 1:
        padding = "valid"
    else:
        padding = "same"

    x = tf.keras.layers.Conv2D(
        filters=filters,
        kernel_size=ksize,
        strides=strides,
        use_bias=False,
        padding=padding,
    )(x)

    x = tf.keras.layers.BatchNormalization()(x)

    if act:
        x = tf.nn.relu(x)
    return x


def bottleneck(
    x: tf.Tensor,
    filters: int,
    *,
    ds: bool = False,
    lead: bool = False,
):
    """A bottleneck residual block using the functional API.

    It consists of 3 Conv2D layers with kernel size 1, 3, 1 respectively.
    Compared to the basic residual block, it has an 1x1 projection shortcuts,
    which is used to reduce the channel dimensionality to save computation.

    Args:
        x (tf.Tensor): Input tensor.
        filters (int): Number of output channels.
        ds (bool, optional): Down-sampling flag. Defaults to False.
        lead (bool, optional): Lead flag to determine if it's the first block.
            Defaults to False.

    Returns:
        tf.Tensor: Output tensor after applying the BottleNeck operations.
    """
    # expanded output channels
    outs = filters * 4

    if lead:
        identity = conv_norm(x, outs, ksize=1, downsample=ds, act=False)
    else:
        identity = x

    x = conv_norm(x, filters, ksize=1, downsample=False, act=True)
    x = conv_norm(x, filters, ksize=3, downsample=ds, act=True)
    x = conv_norm(x, outs, ksize=1, downsample=False, act=False)
    return tf.nn.relu(identity + x)


def res_stack(
    x: tf.Tensor,
    filter: int,
    num: int,
    *,
    ds: bool = False,
) -> tf.keras.Sequential:
    """A stack of bottleneck residual blocks.

    Args:
        x (tf.Tensor): Input tensor.
        filter (int): number of output channels
        num (int): number of bottleneck blocks
        ds (bool): down-sampling flag.

    Returns:
        tf.keras.Sequential: a stack of bottleneck blocks
    """
    x = bottleneck(x, filter, ds=ds, lead=True)
    for _ in range(1, num):
        x = bottleneck(x, filter, ds=False, lead=False)

    return x


def deep_resnet(input_shape, num_classes):
    """Constructs the DeepResNet model using the functional API.

    Args:
        input_shape (tuple): Shape of the input tensor.
        num_classes (int): Number of classification classes.

    Returns:
        tf.keras.Model: The DeepResNet model.
    """

    inputs = tf.keras.layers.Input(shape=input_shape)

    # Initial layers
    x = conv_norm(inputs, 64, ksize=7, downsample=True, act=True)
    x = tf.nn.max_pool2d(x, ksize=3, strides=2, padding="SAME")

    # bottleneck stacks
    x = res_stack(x, 64, 3, ds=False)
    x = res_stack(x, 128, 4, ds=True)
    x = res_stack(x, 256, 6, ds=True)
    x = res_stack(x, 512, 3, ds=True)

    # Final layers
    x = tf.keras.layers.GlobalAvgPool2D()(x)
    outputs = tf.keras.layers.Dense(num_classes)(x)

    return tf.keras.Model(inputs=inputs, outputs=outputs)

In [8]:
in_shape = (224, 224, 3)
x = tf.random.normal(shape=(2, *in_shape))
model = deep_resnet(in_shape, 1000)
model.summary()
y = model(x, training=False)
assert y.shape == (2, 1000)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv2d_97 (Conv2D)          (None, 112, 112, 64)         9408      ['input_1[0][0]']             
                                                                                                  
 batch_normalization_97 (Ba  (None, 112, 112, 64)         256       ['conv2d_97[0][0]']           
 tchNormalization)                                                                                
                                                                                                  
 tf.nn.relu (TFOpLambda)     (None, 112, 112, 64)         0         ['batch_normalization_97[0

## Reference

- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
- [ResNet Implementation using TensorFlow-2.0](https://github.com/calmiLovesAI/TensorFlow2.0_ResNet)
- [PyTorch ResNet](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py)