In [1]:
!pip install timm



In [2]:
from functools import partial
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from timm.models.layers import trunc_normal_, DropPath
from timm.models.registry import register_model
import tarfile

In [5]:
from numpy import identity
import torch
import torch.nn as  nn
import torch.nn.functional as F
from timm.models.layers import DropPath

In [25]:
from torchsummary import summary

In [34]:
class LayerNorm(nn.Module):
    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 
    shape (batch_size, height, width, channels) while channels_first corresponds to inputs 
    with shape (batch_size, channels, height, width).
    """
    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(normalized_shape))
        self.bias = nn.Parameter(torch.zeros(normalized_shape))
        self.eps = eps
        self.data_format = data_format
        if self.data_format not in ["channels_last", "channels_first"]:
            raise NotImplementedError 
        self.normalized_shape = (normalized_shape, )
    
    def forward(self, x):
        if self.data_format == "channels_last":
            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
        elif self.data_format == "channels_first":
            u = x.mean(1, keepdim=True)
            s = (x - u).pow(2).mean(1, keepdim=True)
            x = (x - u) / torch.sqrt(s + self.eps)
            x = self.weight[:, None, None] * x + self.bias[:, None, None]
            return x

In [71]:
# class Block(nn.Module):
#     r""" ConvNeXt Block. There are two equivalent implementations:
#     (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
#     (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
#     We use (2) as we find it slightly faster in PyTorch
    
#     Args:
#         dim (int): Number of input channels.
#         drop_path (float): Stochastic depth rate. Default: 0.0
#         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
#     """
#     def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
#         super().__init__()
        
#         self.conv1 = nn.Conv2d(dim, dim, kernel_size=1, stride=1, padding=0, bias=False)
#         self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, bias=False)
#         self.conv3 = nn.Conv2d(dim, dim, kernel_size=1, stride=1, padding=0, bias=False)
        
#         self.bn = nn.BatchNorm2d(dim)

#         self.relu = nn.ReLU()

#         self.drop_path = DropPath(drop_path) if drop_path > 0 else nn.Identity()
        

#     def forward(self, x):
#         identity = x.clone()
        
#         x = self.conv1(x)
#         x = self.bn(x)
#         x = self.relu(x)
        
#         x = self.conv2(x)
#         x = self.bn(x)
#         x = self.relu(x)
        
#         x = self.conv3(x)
#         x = self.bn(x)
#         x = self.relu(x)
        

#         x = self.drop_path(x) + identity
#         x = self.relu(x)
        
#         return x

In [170]:
class Bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, drop_path=0, stride=1): # , increase_channels=False
        super().__init__()

        self.expansion = 4
        
#         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
#         self.bn1 = nn.BatchNorm2d(out_channels)
#         self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
#         self.bn2 = nn.BatchNorm2d(out_channels)
#         self.conv3 = nn.Conv2d(out_channels, self.expansion*out_channels, kernel_size=1, stride=1, padding=0, bias=False)
#         self.bn3 = nn.BatchNorm2d(self.expansion*out_channels)
        
        self.conv1 = nn.Conv2d(in_channels,out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, self.expansion*out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*out_channels)

        self.relu = nn.ReLU()
        
        self.drop_path = DropPath(drop_path) if drop_path > 0 else nn.Identity()
        
    def forward(self, x):

        identity = x.clone()

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.conv3(x)
        x = self.bn3(x)
        
        x = self.drop_path(x) #+ identity
        x = self.relu(x)
        
        return 

In [171]:
class Resnet_C1(nn.Module):
    r""" ConvNeXt
        A PyTorch impl of : `A ConvNet for the 2020s`  -
          https://arxiv.org/pdf/2201.03545.pdf
    Args:
        in_chans (int): Number of input image channels. Default: 3
        num_classes (int): Number of classes for classification head. Default: 1000
        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
        drop_path_rate (float): Stochastic depth rate. Default: 0.
        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
    """
    def __init__(self, block = Bottleneck,
                 in_chans=3, num_classes=120, 
                 depths=[3, 4, 6, 3],  # num_layers in resnet
                 dims=[64, 128, 256, 512], 
                 drop_path_rate=0., 
                 ):
        super().__init__()
        self.expansion = 4
        self.inplanes = 64
        
        # downsample layers
        self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
        stem = nn.Sequential(
            nn.Conv2d(in_chans, dims[0], kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.downsample_layers.append(stem)
        
        for i in range(3):
            downsample_layer = nn.Sequential(
                    LayerNorm(dims[i] * self.expansion, eps=1e-6, data_format="channels_first"),
                    nn.Conv2d(dims[i] * self.expansion, dims[i] * self.expansion, kernel_size=2, stride=2),
            )
            self.downsample_layers.append(downsample_layer)

                
        # bottleneck stages
        self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
        drop_path_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  #drop_path_rates
        
        layer1 = self.make_layers(block, depths[0], dims[0], drop_path=drop_path_rates[0:depths[0]], stride=1)
#         layer2 = self.make_layers(block, depths[1], dims[1], drop_path=drop_path_rates[depths[0]:sum(depths[:2])], stride=1)
#         layer3 = self.make_layers(block, depths[2], dims[2], drop_path=drop_path_rates[sum(depths[:2]):sum(depths[:3])], stride=1)
#         layer4 = self.make_layers(block, depths[3], dims[3], drop_path=drop_path_rates[sum(depths[:3]):sum(depths[:4])], stride=1)
        self.stages.append(layer1)
#         self.stages.append(layer2)
#         self.stages.append(layer3)
#         self.stages.append(layer4)

        self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):

        x = self.downsample_layers[0](x)
        x = self.stages[0](x)

        return x    
    
    def make_layers(self, block, depth, dim, drop_path=[], stride=1):

        layers = []
    
        #first layer of each stage
        layers.append(block(self.inplanes, dim, drop_path=drop_path[0], stride=stride))
        self.inplanes = dim*self.expansion

        for i in range(depth-1):
            layers.append(block(self.inplanes, dim, drop_path=drop_path[i+1], stride=stride))

        return nn.Sequential(*layers)


In [172]:
model = Resnet_C1()

In [173]:
# model

In [174]:
# pytorch_total_params = sum(p.numel() for p in model.parameters())
# pytorch_total_params 

In [175]:
summary(model, (3, 224, 224));

RuntimeError: Failed to run torchsummary. See above stack traces for more details. Executed layers up to: [Sequential: 2-1, Conv2d: 3-1, BatchNorm2d: 3-2, ReLU: 3-3, MaxPool2d: 3-4, Conv2d: 4-1, BatchNorm2d: 4-2, ReLU: 4-3, Conv2d: 4-4, BatchNorm2d: 4-5, ReLU: 4-6, Conv2d: 4-7, BatchNorm2d: 4-8, Identity: 4-9, ReLU: 4-10]

In [169]:
[64, 56, 56]    

[64, 56, 56]