# Dependencies
   - torchvision models:
      - import the class
         - This is the actual implementation of the model architecture
         - You can create an instance of the model directly using the class
         - You have more control over the initialization parameters if needed
      - improt the function
         - This is a convenience function that typically wraps around the class
         - It provides a simplified way to create a model instance with predefined or default settings
         - It often has additional logic for loading pretrained weights

In [35]:
import torch
from torch import nn
from torch.functional import F
from torchsummary import summary
from torchvision.models import DenseNet, densenet121, densenet161, densenet169, densenet201

# DenseNet
   - Densely Connected Convolutional Network (DenseNet), developed in 2017 by [Gao Huang](https://scholar.google.com.hk/citations?user=-P9LwcgAAAAJ&hl) and collaborators from [Cornell University](https://www.cornell.edu/) and [Tsinghua University](https://www.tsinghua.edu.cn/en/)
   - It is based on the [Densely Connected Convolutional Networks](https://openaccess.thecvf.com/content_cvpr_2017/papers/Huang_Densely_Connected_Convolutional_CVPR_2017_paper.pdf) paper
   - It was trained on the [ImageNet](https://www.image-net.org/) dataset (first resized to 256x256 then center cropped to 224x224) [[ImageNet viewer](https://navigu.net/#imagenet)]
   - Known for its innovative use of `dense connections` where each layer receives the feature maps of all preceding layers, enhancing gradient flow and feature reuse
   - It comes in several variants, primarily `DenseNet-121`, `DenseNet-161`, `DenseNet-169` and `DenseNet-201`, indicating the depth and complexity of the network
   - Achieved high performance in various benchmarks and demonstrated significant parameter efficiency and feature reuse due to its dense connectivity

<figure style="text-align: center;">
    <img src="../resources/images/SVGs/densenet-architecture.svg" alt="densenet-architecture.svg" style="width: 100%;">
    <figcaption>DenseNet Architecture</figcaption>
</figure>

## Custom DenseNet
   - `Softmax` is missing due to internal implementation of `LogSoftmax` in the `CrossEntropyLoss` function.

In [36]:
class DenseLayer(nn.Module):
    def __init__(self, in_channels, growth_rate) -> None:
        super(DenseLayer, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.conv1 = nn.Conv2d(in_channels, 4 * growth_rate, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
        self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat([x, out], 1)
        return out

In [37]:
class DenseBlock(nn.Module):
    def __init__(self, num_layers, in_channels, growth_rate) -> None:
        super(DenseBlock, self).__init__()
        layers = []
        for i in range(num_layers):
            layers.append(DenseLayer(in_channels + i * growth_rate, growth_rate))
        self.block = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.block(x)

In [38]:
class TransitionLayer(nn.Module):
    def __init__(self, in_channels, out_channels) -> None:
        super(TransitionLayer, self).__init__()
        self.bn = nn.BatchNorm2d(in_channels)
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.pool = nn.AvgPool2d(2, stride=2)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out = self.conv(F.relu(self.bn(x)))
        out = self.pool(out)
        return out

In [39]:
class CustomDenseNet(nn.Module):
    def __init__(self, num_layers_per_block, growth_rate, num_classes=1000) -> None:
        super(CustomDenseNet, self).__init__()
        self.growth_rate = growth_rate
        num_channels = 2 * growth_rate

        # densenet-121, densenet-169, densenet-201 : 3x224x224 -> 64x112x112
        # densenet-161                             : 3x224x224 -> 96x112x112
        self.conv1 = nn.Conv2d(3, num_channels, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.relu = nn.ReLU(inplace=True)

        # densenet-121, densenet-169, densenet-201 : 64x112x112 -> 64x56x56
        # densenet-161                             : 96x112x112 -> 96x56x56
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # densenet-121 : 64x56x56 -> 1024x7x7
        # densenet-161 : 96x56x56 -> 2208x7x7
        # densenet-169 : 64x56x56 -> 1664x7x7
        # densenet-201 : 64x56x56 -> 1920x7x7
        blocks = []
        for i in range(len(num_layers_per_block)):
            blocks.append(DenseBlock(num_layers_per_block[i], num_channels, growth_rate))
            num_channels += num_layers_per_block[i] * growth_rate
            if i != len(num_layers_per_block) - 1:
                blocks.append(TransitionLayer(num_channels, num_channels // 2))
                num_channels = num_channels // 2

        self.blocks = nn.Sequential(*blocks)
        self.bn2 = nn.BatchNorm2d(num_channels)

        # densenet-121 : 1024x7x7 -> 1024x1x1
        # densenet-161 : 2208x7x7 -> 2208x1x1
        # densenet-169 : 1664x7x7 -> 1664x1x1
        # densenet-201 : 1920x7x7 -> 1920x1x1
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        # densenet-121 : 1024 -> 1000
        # densenet-161 : 2208 -> 1000
        # densenet-169 : 1664 -> 1000
        # densenet-201 : 1920 -> 1000
        self.fc = nn.Linear(num_channels, num_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # feature extractor
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.pool1(out)
        out = self.blocks(out)
        out = self.bn2(out)
        out = self.relu(out)

        # adaptive average pooling
        out = self.avgpool(out)

        # flatten:
        # densenet-121 : 1024x1x1 -> 1024
        # densenet-161 : 1536x1x1 -> 1536
        # densenet-169 : 1664x1x1 -> 1664
        # densenet-201 : 1920x1x1 -> 1920
        out = torch.flatten(out, 1)

        # classifier
        out = self.fc(out)
        return out

### DenseNet-121

In [40]:
densenet_121_1 = CustomDenseNet(num_layers_per_block=[6, 12, 24, 16], growth_rate=32)
densenet_121_1

CustomDenseNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (blocks): Sequential(
    (0): DenseBlock(
      (block): Sequential(
        (0): DenseLayer(
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (1): DenseLayer(
          (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv1): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      

In [41]:
summary(densenet_121_1, (3, 224, 224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
       BatchNorm2d-5           [-1, 64, 56, 56]             128
            Conv2d-6          [-1, 128, 56, 56]           8,192
       BatchNorm2d-7          [-1, 128, 56, 56]             256
            Conv2d-8           [-1, 32, 56, 56]          36,864
        DenseLayer-9           [-1, 96, 56, 56]               0
      BatchNorm2d-10           [-1, 96, 56, 56]             192
           Conv2d-11          [-1, 128, 56, 56]          12,288
      BatchNorm2d-12          [-1, 128, 56, 56]             256
           Conv2d-13           [-1, 32, 56, 56]          36,864
       DenseLayer-14          [-1, 128,

### DenseNet-161

In [42]:
densenet_161_1 = CustomDenseNet(num_layers_per_block=[6, 12, 36, 24], growth_rate=48)
densenet_161_1

CustomDenseNet(
  (conv1): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (blocks): Sequential(
    (0): DenseBlock(
      (block): Sequential(
        (0): DenseLayer(
          (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (1): DenseLayer(
          (bn1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv1): Conv2d(144, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
    

In [43]:
summary(densenet_161_1, (3, 224, 224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 96, 112, 112]          14,112
       BatchNorm2d-2         [-1, 96, 112, 112]             192
              ReLU-3         [-1, 96, 112, 112]               0
         MaxPool2d-4           [-1, 96, 56, 56]               0
       BatchNorm2d-5           [-1, 96, 56, 56]             192
            Conv2d-6          [-1, 192, 56, 56]          18,432
       BatchNorm2d-7          [-1, 192, 56, 56]             384
            Conv2d-8           [-1, 48, 56, 56]          82,944
        DenseLayer-9          [-1, 144, 56, 56]               0
      BatchNorm2d-10          [-1, 144, 56, 56]             288
           Conv2d-11          [-1, 192, 56, 56]          27,648
      BatchNorm2d-12          [-1, 192, 56, 56]             384
           Conv2d-13           [-1, 48, 56, 56]          82,944
       DenseLayer-14          [-1, 192,

### DenseNet-169

In [44]:
densenet_169_1 = CustomDenseNet(num_layers_per_block=[6, 12, 32, 32], growth_rate=32)
densenet_169_1

CustomDenseNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (blocks): Sequential(
    (0): DenseBlock(
      (block): Sequential(
        (0): DenseLayer(
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (1): DenseLayer(
          (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv1): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      

In [45]:
summary(densenet_169_1, (3, 224, 224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
       BatchNorm2d-5           [-1, 64, 56, 56]             128
            Conv2d-6          [-1, 128, 56, 56]           8,192
       BatchNorm2d-7          [-1, 128, 56, 56]             256
            Conv2d-8           [-1, 32, 56, 56]          36,864
        DenseLayer-9           [-1, 96, 56, 56]               0
      BatchNorm2d-10           [-1, 96, 56, 56]             192
           Conv2d-11          [-1, 128, 56, 56]          12,288
      BatchNorm2d-12          [-1, 128, 56, 56]             256
           Conv2d-13           [-1, 32, 56, 56]          36,864
       DenseLayer-14          [-1, 128,

### DenseNet-201

In [46]:
densenet_201_1 = CustomDenseNet(num_layers_per_block=[6, 12, 48, 32], growth_rate=32)
densenet_201_1

CustomDenseNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (blocks): Sequential(
    (0): DenseBlock(
      (block): Sequential(
        (0): DenseLayer(
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (1): DenseLayer(
          (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv1): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      

In [47]:
summary(densenet_201_1, (3, 224, 224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
       BatchNorm2d-5           [-1, 64, 56, 56]             128
            Conv2d-6          [-1, 128, 56, 56]           8,192
       BatchNorm2d-7          [-1, 128, 56, 56]             256
            Conv2d-8           [-1, 32, 56, 56]          36,864
        DenseLayer-9           [-1, 96, 56, 56]               0
      BatchNorm2d-10           [-1, 96, 56, 56]             192
           Conv2d-11          [-1, 128, 56, 56]          12,288
      BatchNorm2d-12          [-1, 128, 56, 56]             256
           Conv2d-13           [-1, 32, 56, 56]          36,864
       DenseLayer-14          [-1, 128,

## PyTorch DenseNet
   - DenseNet is available in PyTorch: [pytorch.org/vision/main/models/densenet.html](https://pytorch.org/vision/main/models/densenet.html)
   - There is a bug with executing `torchsummary` for DenseNet [[details](https://github.com/sksq96/pytorch-summary/issues/2)] (`torch v2.3.1+cu121`, `torchsummary v1.5.1`)
      ```python
      summary(densenet_121_2, (3, 224, 224), device= 'cpu')
      AttributeError: 'list' object has no attribute 'size'
      ```

### DenseNet-121

In [48]:
densenet_121_2 = densenet121(weights=None)
densenet_121_2

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

### DenseNet-161

In [49]:
densenet_161_2 = densenet161(weights=None)
densenet_161_2

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (rel

### DenseNet-169

In [50]:
densenet_169_2 = densenet169(weights=None)
densenet_169_2

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

### DenseNet-201

In [51]:
densenet_201_2 = densenet201(weights=None)
densenet_201_2

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu