# ResNet

## Basic Convolution Unit

In [1]:
import tensorflow as tf


class ConvBlock(tf.keras.layers.Layer):
    """A Conv2D + BatchNorm block with optional down-sampling."""

    def __init__(
        self,
        filters: int,
        *,
        shortcut: bool = False,
        downsample: bool = False,
    ):
        super(ConvBlock, self).__init__()
        self.filters = filters
        self.downsample = downsample
        """Initialize the ConvBlock.

        Args:
            filters (int): number of output channels
            shortcut (bool): projection shortcut flag i.e. match the number of
                filters (done by 1x1 convolutions) if True; otherwise, match
                the number of filters and the spatial dimensions by 3x3
                convolutions with same padding.
            downsample (bool): down-sampling flag i.e. set stride to 2 if
                downsample add stride 1 otherwise.
                Always do the 0 padding i.e. padding = 'same'
        """
        # if downsample, add stride without padding,
        # otherwise stride 1 with zero padding i.e. keep the spatial dimensions
        strides = 2 if downsample else 1
        if shortcut:
            # match the number of filters (done by 1x1 convolutions)
            kernel_size = 1
            padding = "valid"
        else:
            # match the number of filters and the spatial dimensions
            # by 3x3 convolutions with same padding
            kernel_size = 3
            padding = "same"

        self._model = tf.keras.Sequential()
        self._model.add(
            tf.keras.layers.Conv2D(
                filters=filters,
                kernel_size=kernel_size,
                strides=strides,
                use_bias=False,
                padding=padding,
            ))
        self._model.add(tf.keras.layers.BatchNormalization())

    def call(self, x: tf.Tensor, *, training: bool = False) -> tf.Tensor:
        """Forward pass of the ConvBlock.

        Args:
            x (tf.Tensor): input tensor
            training (bool): training flag i.e. batch norm uses different
                behaviour for training and inference

        Returns:
            tf.Tensor: output tensor
        """
        return self._model(x, training=training)

In [2]:
N, H, W, C = 2, 56, 56, 64
x = tf.random.normal((N, H, W, C))
# 3x3 convolutions without down-sampling
m1 = ConvBlock(C, shortcut=False, downsample=False)
# 1x1 convolutions without down-sampling
m2 = ConvBlock(C, shortcut=True, downsample=False)
# 3x3 convolutions with down-sampling
m3 = ConvBlock(C, shortcut=False, downsample=True)
# 1x1 convolutions with down-sampling
m4 = ConvBlock(C, shortcut=True, downsample=True)

assert m1(x, training=True).shape == (N, H, W, C)
assert m2(x, training=True).shape == (N, H, W, C)
assert m3(x, training=True).shape == (N, H // 2, W // 2, C)
assert m4(x, training=True).shape == (N, H // 2, W // 2, C)

2023-08-17 01:46:11.503479: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1


## Bottle Neck Block

In [3]:
class BottleNeck(tf.keras.layers.Layer):
    """A BottleNeck of ResNet framework."""

    def __init__(self, filters: int, *, ds: bool = False, lead: bool = False):
        """Initialize the BottleNeck.

        Args:
            filters (int): number of output channels
            ds (bool): down-sampling flag.
            lead (bool): leading block flag.
        """
        super(BottleNeck, self).__init__()
        # expanded output channels
        outs = filters * 4
        # projection shortcut
        if lead:
            self._shortcut = ConvBlock(outs, shortcut=True, downsample=ds)
        else:
            self._shortcut = None
        # a stack of 1x1, 3x3, 1x1 convolutional layers
        self._body = tf.keras.Sequential()
        self._body.add(ConvBlock(filters, shortcut=True, downsample=False))
        self._body.add(tf.keras.layers.ReLU())
        self._body.add(ConvBlock(filters, shortcut=False, downsample=ds))
        self._body.add(tf.keras.layers.ReLU())
        self._body.add(ConvBlock(outs, shortcut=True, downsample=False))

    def call(self, x: tf.Tensor, *, training: bool = False) -> tf.Tensor:
        """Forward pass of the BottleNeck.

        Args:
            x (tf.Tensor): input tensor
            training (bool): training flag for BN and Dropout

        Returns:
            tf.Tensor: output tensor
        """
        # shortcut branch
        if self._shortcut is None:
            identity = x
        else:
            identity = self._shortcut(x, training=training)
        # residual branch
        residual = self._body(x, training=training)
        # add shortcut and residual before activation
        return tf.nn.relu(identity + residual)

In [4]:
# Conv2_x
N1, H1, W1, C1 = 2, 56, 56, 64
N2, H2, W2, C2 = 2, 56, 56, 256
N3, H3, W3, C3 = 2, 28, 28, 512
N4, H4, W4, C4 = 2, 14, 14, 1024
N5, H5, W5, C5 = 2, 7, 7, 2048
y1 = tf.random.normal((N1, H1, W1, C1))

# conv2_x
y2 = BottleNeck(C1, ds=False, lead=True)(y1)
y2 = BottleNeck(C1, ds=False)(y2)
y2 = BottleNeck(C1, ds=False)(y2)
assert y2.shape == (N2, H2, W2, C2)

# conv3_x
y3 = BottleNeck(C2 // 2, ds=True, lead=True)(y2)
y3 = BottleNeck(C2 // 2, ds=False)(y3)
y3 = BottleNeck(C2 // 2, ds=False)(y3)
assert y3.shape == (N3, H3, W3, C3)

# conv4_x
y4 = BottleNeck(C3 // 2, ds=True, lead=True)(y3)
y4 = BottleNeck(C3 // 2, ds=False)(y4)
y4 = BottleNeck(C3 // 2, ds=False)(y4)
assert y4.shape == (N4, H4, W4, C4)

# conv5_x
y5 = BottleNeck(C4 // 2, ds=True, lead=True)(y4)
y5 = BottleNeck(C4 // 2, ds=False)(y5)
y5 = BottleNeck(C4 // 2, ds=False)(y5)
assert y5.shape == (N5, H5, W5, C5)