# 参考资料
- https://www.jb51.net/article/213731.htm

In [1]:
import torch
from torch import nn
from torchinfo import summary
import torchvision


# Demo

## LeNet

In [49]:
# 定义一个网络模型类
class LeNet5(nn.Module):
    # 初始化网络
    def __init__(self):
        super(LeNet5, self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
        self.Sigmoid = nn.Sigmoid()
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c5 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
 
        self.flatten = nn.Flatten()
        self.f6 = nn.Linear(120, 84)
        self.output = nn.Linear(84, 10)
 
    def forward(self, x):
        # x输入为1*28*28
        x = self.Sigmoid(self.c1(x))
        x = self.s2(x)
        x = self.Sigmoid(self.c3(x))
        x = self.s4(x)
        x = self.c5(x)
        x = self.flatten(x)
        x = self.f6(x)
        x = self.output(x)
        return x
    
    def count_params(self):
        total = sum([param.nelement() for param in self.parameters()])
        print("Number of parameter: %.2fM" % (total/1e6))
        
    def summary(self):
        print(summary(self, (1, 1, 28, 28)))
        

In [46]:
lenet = LeNet5()

In [47]:
lenet.summary()

Layer (type:depth-idx)                   Output Shape              Param #
LeNet5                                   [1, 10]                   --
├─Conv2d: 1-1                            [1, 6, 28, 28]            156
├─Sigmoid: 1-2                           [1, 6, 28, 28]            --
├─AvgPool2d: 1-3                         [1, 6, 14, 14]            --
├─Conv2d: 1-4                            [1, 16, 10, 10]           2,416
├─Sigmoid: 1-5                           [1, 16, 10, 10]           --
├─AvgPool2d: 1-6                         [1, 16, 5, 5]             --
├─Conv2d: 1-7                            [1, 120, 1, 1]            48,120
├─Flatten: 1-8                           [1, 120]                  --
├─Linear: 1-9                            [1, 84]                   10,164
├─Linear: 1-10                           [1, 10]                   850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
Total mult-adds (M): 0.42
Input size (MB): 0.00
Forward/backward pass si

# ResNet

## ResNet18

In [54]:
model = torchvision.models.resnet18()
summary(model, (1, 3, 1024, 1756))#, depth=3)

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 512, 878]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 512, 878]         128
├─ReLU: 1-3                              [1, 64, 512, 878]         --
├─MaxPool2d: 1-4                         [1, 64, 256, 439]         --
├─Sequential: 1-5                        [1, 64, 256, 439]         --
│    └─BasicBlock: 2-1                   [1, 64, 256, 439]         --
│    │    └─Conv2d: 3-1                  [1, 64, 256, 439]         36,864
│    │    └─BatchNorm2d: 3-2             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-3                    [1, 64, 256, 439]         --
│    │    └─Conv2d: 3-4                  [1, 64, 256, 439]         36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-6                    [1, 64, 256, 439]         --
│

## ResNet34

In [55]:
model = torchvision.models.resnet34()
summary(model, (1, 3, 1024, 1756))#, depth=3)

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 512, 878]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 512, 878]         128
├─ReLU: 1-3                              [1, 64, 512, 878]         --
├─MaxPool2d: 1-4                         [1, 64, 256, 439]         --
├─Sequential: 1-5                        [1, 64, 256, 439]         --
│    └─BasicBlock: 2-1                   [1, 64, 256, 439]         --
│    │    └─Conv2d: 3-1                  [1, 64, 256, 439]         36,864
│    │    └─BatchNorm2d: 3-2             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-3                    [1, 64, 256, 439]         --
│    │    └─Conv2d: 3-4                  [1, 64, 256, 439]         36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-6                    [1, 64, 256, 439]         --
│

## ResNet50

In [56]:
model = torchvision.models.resnet50()
summary(model, (1, 3, 1024, 1756))#, depth=3)

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 512, 878]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 512, 878]         128
├─ReLU: 1-3                              [1, 64, 512, 878]         --
├─MaxPool2d: 1-4                         [1, 64, 256, 439]         --
├─Sequential: 1-5                        [1, 256, 256, 439]        --
│    └─Bottleneck: 2-1                   [1, 256, 256, 439]        --
│    │    └─Conv2d: 3-1                  [1, 64, 256, 439]         4,096
│    │    └─BatchNorm2d: 3-2             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-3                    [1, 64, 256, 439]         --
│    │    └─Conv2d: 3-4                  [1, 64, 256, 439]         36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-6                    [1, 64, 256, 439]         --
│ 

## ResNet101

In [57]:
model = torchvision.models.resnet101()
summary(model, (1, 3, 1024, 1756))#, depth=3)

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 512, 878]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 512, 878]         128
├─ReLU: 1-3                              [1, 64, 512, 878]         --
├─MaxPool2d: 1-4                         [1, 64, 256, 439]         --
├─Sequential: 1-5                        [1, 256, 256, 439]        --
│    └─Bottleneck: 2-1                   [1, 256, 256, 439]        --
│    │    └─Conv2d: 3-1                  [1, 64, 256, 439]         4,096
│    │    └─BatchNorm2d: 3-2             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-3                    [1, 64, 256, 439]         --
│    │    └─Conv2d: 3-4                  [1, 64, 256, 439]         36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-6                    [1, 64, 256, 439]         --
│ 

## ResNet152

In [2]:
model = torchvision.models.resnet152()
summary(model, (1, 3, 1024, 1756))

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 512, 878]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 512, 878]         128
├─ReLU: 1-3                              [1, 64, 512, 878]         --
├─MaxPool2d: 1-4                         [1, 64, 256, 439]         --
├─Sequential: 1-5                        [1, 256, 256, 439]        --
│    └─Bottleneck: 2-1                   [1, 256, 256, 439]        --
│    │    └─Conv2d: 3-1                  [1, 64, 256, 439]         4,096
│    │    └─BatchNorm2d: 3-2             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-3                    [1, 64, 256, 439]         --
│    │    └─Conv2d: 3-4                  [1, 64, 256, 439]         36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 256, 439]         128
│    │    └─ReLU: 3-6                    [1, 64, 256, 439]         --
│ 

# ResNeXt

## resnext50_32x4d

In [52]:
model = torchvision.models.resnext50_32x4d()
summary(model, (1, 3, 1024, 1756))#, depth=3)

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 512, 878]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 512, 878]         128
├─ReLU: 1-3                              [1, 64, 512, 878]         --
├─MaxPool2d: 1-4                         [1, 64, 256, 439]         --
├─Sequential: 1-5                        [1, 256, 256, 439]        --
│    └─Bottleneck: 2-1                   [1, 256, 256, 439]        --
│    │    └─Conv2d: 3-1                  [1, 128, 256, 439]        8,192
│    │    └─BatchNorm2d: 3-2             [1, 128, 256, 439]        256
│    │    └─ReLU: 3-3                    [1, 128, 256, 439]        --
│    │    └─Conv2d: 3-4                  [1, 128, 256, 439]        4,608
│    │    └─BatchNorm2d: 3-5             [1, 128, 256, 439]        256
│    │    └─ReLU: 3-6                    [1, 128, 256, 439]        --
│  

## resnext101_32x8d

In [58]:
model = torchvision.models.resnext101_32x8d()
summary(model, (1, 3, 1024, 1756))#, depth=3)

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 512, 878]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 512, 878]         128
├─ReLU: 1-3                              [1, 64, 512, 878]         --
├─MaxPool2d: 1-4                         [1, 64, 256, 439]         --
├─Sequential: 1-5                        [1, 256, 256, 439]        --
│    └─Bottleneck: 2-1                   [1, 256, 256, 439]        --
│    │    └─Conv2d: 3-1                  [1, 256, 256, 439]        16,384
│    │    └─BatchNorm2d: 3-2             [1, 256, 256, 439]        512
│    │    └─ReLU: 3-3                    [1, 256, 256, 439]        --
│    │    └─Conv2d: 3-4                  [1, 256, 256, 439]        18,432
│    │    └─BatchNorm2d: 3-5             [1, 256, 256, 439]        512
│    │    └─ReLU: 3-6                    [1, 256, 256, 439]        --
│

## resnext101_64x4d

# CenterNet

## ResNet18

## DLA

## Hourglass

# DCNv2

In [6]:
import torch.nn as nn
import math

from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule, auto_fp16

In [7]:
class CTResNetNeck(BaseModule):
    """The neck used in `CenterNet <https://arxiv.org/abs/1904.07850>`_ for
    object classification and box regression.

    Args:
         in_channel (int): Number of input channels.
         num_deconv_filters (tuple[int]): Number of filters per stage.
         num_deconv_kernels (tuple[int]): Number of kernels per stage.
         use_dcn (bool): If True, use DCNv2. Default: True.
         init_cfg (dict or list[dict], optional): Initialization config dict.
    """

    def __init__(self,
                 in_channel,
                 num_deconv_filters,
                 num_deconv_kernels,
                 use_dcn=True,
                 init_cfg=None):
        super(CTResNetNeck, self).__init__(init_cfg)
        assert len(num_deconv_filters) == len(num_deconv_kernels)
        self.fp16_enabled = False
        self.use_dcn = use_dcn
        self.in_channel = in_channel
        self.deconv_layers = self._make_deconv_layer(num_deconv_filters,
                                                     num_deconv_kernels)

    def _make_deconv_layer(self, num_deconv_filters, num_deconv_kernels):
        """use deconv layers to upsample backbone's output."""
        layers = []
        for i in range(len(num_deconv_filters)):
            feat_channel = num_deconv_filters[i]
            conv_module = ConvModule(
                self.in_channel,
                feat_channel,
                3,
                padding=1,
                conv_cfg=dict(type='DCNv2') if self.use_dcn else None,
                norm_cfg=dict(type='BN'))
            layers.append(conv_module)
            upsample_module = ConvModule(
                feat_channel,
                feat_channel,
                num_deconv_kernels[i],
                stride=2,
                padding=1,
                conv_cfg=dict(type='deconv'),
                norm_cfg=dict(type='BN'))
            layers.append(upsample_module)
            self.in_channel = feat_channel

        return nn.Sequential(*layers)

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.ConvTranspose2d):
                # In order to be consistent with the source code,
                # reset the ConvTranspose2d initialization parameters
                m.reset_parameters()
                # Simulated bilinear upsampling kernel
                w = m.weight.data
                f = math.ceil(w.size(2) / 2)
                c = (2 * f - 1 - f % 2) / (2. * f)
                for i in range(w.size(2)):
                    for j in range(w.size(3)):
                        w[0, 0, i, j] = \
                            (1 - math.fabs(i / f - c)) * (
                                    1 - math.fabs(j / f - c))
                for c in range(1, w.size(0)):
                    w[c, 0, :, :] = w[0, 0, :, :]
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            # self.use_dcn is False
            elif not self.use_dcn and isinstance(m, nn.Conv2d):
                # In order to be consistent with the source code,
                # reset the Conv2d initialization parameters
                m.reset_parameters()

    @auto_fp16()
    def forward(self, inputs):
        assert isinstance(inputs, (list, tuple))
        outs = self.deconv_layers(inputs[-1])
        return outs,

In [10]:
neck = CTResNetNeck(in_channel=512,
                    num_deconv_filters=(256, 128, 64),
                    num_deconv_kernels=(4, 4, 4),
                    use_dcn=False)

In [11]:
neck.eval()

CTResNetNeck(
  (deconv_layers): Sequential(
    (0): ConvModule(
      (conv): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activate): ReLU(inplace=True)
    )
    (1): ConvModule(
      (conv): ConvTranspose2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activate): ReLU(inplace=True)
    )
    (2): ConvModule(
      (conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activate): ReLU(inplace=True)
    )
    (3): ConvModule(
      (conv): ConvTranspose2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, tr

In [16]:
import math
from typing import Optional, Tuple, Union

import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single

class ModulatedDeformConv2d(nn.Module):

    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 kernel_size: Union[int, Tuple[int]],
                 stride: int = 1,
                 padding: int = 0,
                 dilation: int = 1,
                 groups: int = 1,
                 deform_groups: int = 1,
                 bias: Union[bool, str] = True):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = _pair(kernel_size)
        self.stride = _pair(stride)
        self.padding = _pair(padding)
        self.dilation = _pair(dilation)
        self.groups = groups
        self.deform_groups = deform_groups
        # enable compatibility with nn.Conv2d
        self.transposed = False
        self.output_padding = _single(0)

        self.weight = nn.Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
        self.init_weights()

    def init_weights(self):
        n = self.in_channels
        for k in self.kernel_size:
            n *= k
        stdv = 1. / math.sqrt(n)
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.zero_()

    def forward(self, x: torch.Tensor, offset: torch.Tensor,
                mask: torch.Tensor) -> torch.Tensor:
        return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
                                       self.stride, self.padding,
                                       self.dilation, self.groups,
                                       self.deform_groups)

class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
    """A ModulatedDeformable Conv Encapsulation that acts as normal Conv
    layers.

    Args:
        in_channels (int): Same as nn.Conv2d.
        out_channels (int): Same as nn.Conv2d.
        kernel_size (int or tuple[int]): Same as nn.Conv2d.
        stride (int): Same as nn.Conv2d, while tuple is not supported.
        padding (int): Same as nn.Conv2d, while tuple is not supported.
        dilation (int): Same as nn.Conv2d, while tuple is not supported.
        groups (int): Same as nn.Conv2d.
        bias (bool or str): If specified as `auto`, it will be decided by the
            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
            False.
    """

    _version = 2

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.conv_offset = nn.Conv2d(
            self.in_channels,
            self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
            kernel_size=self.kernel_size,
            stride=self.stride,
            padding=self.padding,
            dilation=self.dilation,
            bias=True)
        self.init_weights()

    def init_weights(self) -> None:
        super().init_weights()
        if hasattr(self, 'conv_offset'):
            self.conv_offset.weight.data.zero_()
            self.conv_offset.bias.data.zero_()

    def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore
        out = self.conv_offset(x)
        o1, o2, mask = torch.chunk(out, 3, dim=1)
        offset = torch.cat((o1, o2), dim=1)
        mask = torch.sigmoid(mask)
        return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
                                       self.stride, self.padding,
                                       self.dilation, self.groups,
                                       self.deform_groups)

    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
                              missing_keys, unexpected_keys, error_msgs):
        version = local_metadata.get('version', None)

        if version is None or version < 2:
            # the key is different in early versions
            # In version < 2, ModulatedDeformConvPack
            # loads previous benchmark models.
            if (prefix + 'conv_offset.weight' not in state_dict
                    and prefix[:-1] + '_offset.weight' in state_dict):
                state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
                    prefix[:-1] + '_offset.weight')
            if (prefix + 'conv_offset.bias' not in state_dict
                    and prefix[:-1] + '_offset.bias' in state_dict):
                state_dict[prefix +
                           'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
                                                                '_offset.bias')

        if version is not None and version > 1:
            print_log(
                f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to '
                'version 2.',
                logger='root')

        super()._load_from_state_dict(state_dict, prefix, local_metadata,
                                      strict, missing_keys, unexpected_keys,
                                      error_msgs)


In [17]:
test = ModulatedDeformConv2dPack(512, 256, 4)

In [18]:
test.eval()

ModulatedDeformConv2dPack(
  (conv_offset): Conv2d(512, 48, kernel_size=(4, 4), stride=(1, 1))
)