### ResNet and DenseNet

In [1]:
import os
import time
import copy
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms

torch.use_deterministic_algorithms(True)

In [3]:
class BasicBlock(nn.Module):
    multiplier = 1
    def __init__(self, input_num_planes, num_planes, strd=1):
        super(BasicBlock, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channles=input_num_planes, out_channels=num_planes, kernel_size=3, stride=strd, padding=1, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(num_planes)
        self.conv_layer2 = nn.Conv2d(in_channels=num_planes, out_channels=num_planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(num_planes)
        self.res_connection = nn.Sequential()
        # ResNet uses identity shortcuts (i.e., adding the input directly to the output), 
        # but when downsampling (stride > 1) or changing channels, a direct addition is not possible.
        # So, a 1×1 convolution with BatchNorm is used in the shortcut to transform the input appropriately.
        if strd > 1 or input_num_planes != self.multiplier * num_planes:
            self.res_connection = nn.Sequential(
                nn.Conv2d(in_channels=input_num_planes, out_channels=self.multiplier * num_planes, kernel_size=1, stride=strd, bias=False),
                nn.BatchNorm2d(self.multiplier * num_planes)
            )

    def forward(self, x):
        op = F.relu(self.batch_norm1(self.conv_layer1(x)))
        op = self.batch_norm2(self.conv_layer2(op))
        op += self.res_connection(x)
        op = F.relu(op)
        return op

In [4]:
model = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/lyteatnyte/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:47<00:00, 2.16MB/s]


***DenseNet, or dense networks***, introduced the idea of connecting every convolutional layer with every
other layer within what is called a dense block. And every dense block is connected to every other
dense block in the overall DenseNet. A dense block is simply a module of two 3x3 densely connected
convolutional layers.

In [5]:
class DenseBlock(nn.Module):
    def __init__(self, input_num_planes, rate_inc):
        super(DenseBlock, self).__init__()
        self.batch_norm1 = nn.BatchNorm2d(input_num_planes)
        self.conv_layer1 = nn.Conv2d(in_channels=input_num_planes, out_channels=4*rate_inc, kernel_size=1, stride=1, padding=0, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(4*rate_inc)
        self.conv_layer2 = nn.Conv2d(in_channels=4*rate_inc, out_channels=rate_inc, kernel_size=3, stride=1, padding=1, bias=False)

    def forward(self, x):
        op = self.conv_layer1(F.relu(self.batch_norm1(x)))
        op = self.conv_layer2(F.relu(self.batch_norm2(op)))
        op = torch.cat((op, x), 1) # concatenate the input and output
        return op
    
class TransBlock(nn.Module):
    def __init__(self, input_num_planes, output_num_planes):
        super(TransBlock, self).__init__()
        self.batch_norm1 = nn.BatchNorm2d(input_num_planes)
        self.conv_layer = nn.Conv2d(in_channels=input_num_planes, out_channels=output_num_planes, kernel_size=1, stride=1, padding=0, bias=False)

    def forward(self, x):
        op = self.conv_layer(F.relu(self.batch_norm1(x)))
        op = F.avg_pool2d(op, stride=2)
        return op