In [1]:
import torch
import torch.nn as nn

import numpy as np

In [2]:
from resnet import conv3x3

# Documentation

## Conv2d

This is PyTorch's implementation of a 2D convolutional layer. Note that the layer exists as its own object. This seems counterintuitive, but it makes chaining layers together with different effects much easier. Doing so is commonly done by creating a super class which extends nn.Conv2d (for example) and implements a forward(self, x) method, indicating how an input to the layer (or layer block, etc.) is passed through.

### This code excerpt does the following:
- Generates a random tensor of size (batch_size, channel_size, h, w)
- Pads tensor with (left, right, top, bot)
- Convolves tensor with a convolutional layer of shape (in_channels, out_channels, (h, w)) (This means that the number of filters in the convolutional layer is equal to out_channels)

### Note:
- Calling nn.Conv2d returns a function. Calling that returned function equates to passing the input tensor through the filter, i.e. performing the convolution
- Conv2d defaults to performing "valid" convolution, and as far as I know there is no easy way to change this to "same" or "full". One way to fix this would be to define a custom "Conv2dAuto" class which supercedes Conv2d and implements the desired padding. (See resnet_test.ipynb)

In [3]:
x = torch.randn(64, 3, 64, 64)

nn.Conv2d(3, 3, (3, 3), 
    stride=1,
    padding=0
)(x).shape

torch.Size([64, 3, 62, 62])

In [4]:
# Convolution Block with auto padding (sets padding amount to half of kernel size)
class Conv2dAuto(nn.Conv2d):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # overrides padding parameter
        self.padding = (self.kernel_size[0] // 2, self.kernel_size[1] // 2)
        
Conv2dAuto(3, 3, (3, 3), 
    stride=1,
    padding=0   
)(x).shape

torch.Size([64, 3, 64, 64])

## Residual Mapping (Skip Connections)

- ResidualBlock extends nn.Module, which is meant to represent extensions or combinations of layers
- When you initialize a ResidualBlock now, it also returns a function just like Conv2d
    - For this to work, ResidualBlock must implement `__init__()` and `forward()`
    - the `@property` decorator here is like using a getter or setter

In [29]:
def activation_func(activation):
    return nn.ModuleDict([
        ['relu', nn.ReLU(inplace=True)],
        ['leaky_relu', nn.LeakyReLU(negative_slope=0.01, inplace=True)],
        ['selu', nn.SELU(inplace=True)],
        ['none', nn.Identity()]
    ])[activation]

def conv_bn(in_channels, out_channels, conv, *args, **kwargs):
    return nn.Sequential(conv(in_channels, out_channels, *args, **kwargs), nn.BatchNorm2d(out_channels))

# Tried to simplify some things, idk if this works fully
class ResidualBlock(nn.Module):

    def __init__(
            self, 
            in_channels, 
            out_channels, 
            activation='relu', 
            expansion=1, 
            downsampling=1, 
            conv=conv3x3, 
            *args, 
            **kwargs
        ):
        super().__init__()
        self.in_channels, self.out_channels, self.activation = in_channels, out_channels, activation
        self.activate = activation_func(activation)
        self.expansion, self.downsampling, self.conv = expansion, downsampling, conv
        self.shortcut = nn.Sequential(
            nn.Conv2d(
                self.in_channels,
                self.expanded_channels,
                kernel_size=1,
                stride=self.downsampling,
                bias=False
            ),
            nn.BatchNorm2d(self.expanded_channels)
        ) if self.should_apply_shortcut else None
        self.blocks = nn.Sequential(
            conv_bn(self.in_channels, self.out_channels, conv=self.conv, bias=False, stride=self.downsampling),
            activation_func(self.activation),
            conv_bn(self.out_channels, self.out_channels, conv=self.conv, bias=False),
        )

    def forward(self, x):
        residual = x
        if self.should_apply_shortcut:
            residual = self.shortcut(x)
        x = self.blocks(x)
        x += residual
        x = self.activate(x)
        return x

    @property
    def should_apply_shortcut(self):
        return self.in_channels != self.out_channels

In [32]:
x = torch.randn(64, 3, 64, 64)

y = ResidualBlock(3, 3)(x)
y.shape

torch.Size([64, 3, 64, 64])