# 一、图片处理

# 二、神经网络

## (一)、主干网络

In [1]:
import torch
from torch import nn

### 1、ResNet

#### Residual block
<center class="half">
    <img src='./Resnet_img/bottleneck.png'  height="420"></img><img src='./Resnet_img/bottleneck_extend.png'  height="420"></img>
</center>

In [6]:
class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channel: int, out_channel: int, ibn: bool = False):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=1, bias=False)
        if ibn:
            self.bn1 = IBN(out_channel, type='a')
        else:
            self.bn1 = nn.BatchNorm2d(out_channel)
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
        # self.conv3 = nn.Conv2d(out_channel, out_channel * self.expansion, kernel_size=1, bias=False)
        # self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, input):
        residual = input
        out = self.conv1(input)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        # out = self.conv3(out)
        # out = self.bn3(out)
        out += residual
        out = self.relu(out)
        return out


In [7]:
class IBN(nn.Module):
    def __init__(self, in_channel: int, type: str = 'a'):
        super(IBN, self).__init__()
        self.type = type
        self.in_channel = in_channel
        self.half = int(in_channel/2)
        self.half_IN = nn.InstanceNorm2d(self.half, affine=True)
        self.half_BN = nn.BatchNorm2d(self.in_channel - self.half)
        self.IN = nn.InstanceNorm2d(self.in_channel, affine=True)
        self.BN = nn.BatchNorm2d(self.in_channel)
        
    def forward(self, input):
        if self.type == 'a':
            split = torch.split(input, self.half, 1)
            out1 = self.half_IN(split[0].contiguous())
            out2 = self.half_BN(split[1].contiguous())
            return torch.cat((out1, out2), 1)
        if self.type == 'b':
            return self.IN(input)
        if self.type == 'c':
            in1 = input.contiguous()
            out1 = self.IN(input)
            out2 = self.BN(in1)
            return out1 + out2
        if self.type == 'd':
            split = torch.split(input, self.half, 1)
            out1 = self.IN(split[0].contiguous())
            out2 = split[1].contiguous()
            return torch.cat((out1, out2), 1)

In [8]:
input = torch.randn(1, 3, 224, 224)
conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
out = conv1(input)
b = Bottleneck(64, 64, True)
out1 = b.forward(out)

#### Model
<center class="half">
<img src="./Resnet_img/layer.jpg" width="520"></img><img src='./Resnet_img/resnet.jpg' width="920"></img>
</center>

### 2、YOLO

#### YOLOv1
<center class="half">
<img src="./YOLO_img/layer.jpg">
</center>

In [54]:
class YOLO(nn.Module):
    def __init__(self, cell: int = 7, cls: int = 20, boxes: int = 2):
        super(YOLO, self).__init__()
        self.C = cell
        self.CLS = cls
        self.B = boxes

        self.conv1 = nn.Conv2d(3, 192, kernel_size=7, stride=2, padding=1)
        self.max_pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)

        self.conv2 = nn.Conv2d(192, 256, kernel_size=3)
        self.max_pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)

        self.conv3_1 = nn.Conv2d(256, 128, kernel_size=1)
        self.conv3_2 = nn.Conv2d(128, 256, kernel_size=3)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=1)
        self.conv3_4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.max_pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        
        self.conv4_1 = nn.Conv2d(512, 256, kernel_size=1)
        self.conv4_2 = nn.Conv2d(256, 512, kernel_size=3, padding=2)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=1)
        self.conv4_4 = nn.Conv2d(512, 1024, kernel_size=3)
        self.max_pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv5_1 = nn.Conv2d(1024, 512, kernel_size=1)
        self.conv5_2 = nn.Conv2d(512, 1024, kernel_size=3)
        self.conv5_3 = nn.Conv2d(1024, 1024, kernel_size=3, padding=2)
        self.max_pool5 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.conv6_1 = nn.Conv2d(1024, 1024, kernel_size=3)
        self.conv6_2 = nn.Conv2d(1024, 1024, kernel_size=3, padding=2)

        self.conv7 = nn.Conv2d(1024, 4096, kernel_size=7)

        self.conv8 = nn.Conv2d(4096, self.C*self.C*(self.CLS+self.B*5), kernel_size=1)

    def forward(self, input: torch.Tensor):
        out = self.conv1(input)
        out1 = self.max_pool1(out)
        
        out = self.conv2(out1)
        out2 = self.max_pool2(out)

        out = self.conv3_1(out2)
        out = self.conv3_2(out)
        out = self.conv3_3(out)
        out = self.conv3_4(out)
        out3 = self.max_pool3(out)

        for i in range(4):
            out = self.conv4_1(out3)
            out = self.conv4_2(out)
        out = self.conv4_3(out)
        out = self.conv4_4(out)
        out4 = self.max_pool4(out)

        for i in range(2):
            out = self.conv5_1(out4)
            out = self.conv5_2(out)
        out = self.conv5_3(out)
        out5 = self.max_pool5(out)

        out = self.conv6_1(out5)
        out6 = self.conv6_2(out)

        out7 = self.conv7(out6)

        out8 = self.conv8(out7)

        return out8.reshape((self.C, self.C, self.CLS+self.B*5))

In [55]:
input = torch.randn(1, 3, 448, 448)
yolo = YOLO()
out = yolo.forward(input)

### 3、HRNet

#### Backbone
#### Model
<center class="half">
<img src="./HRNet_img/backbone.jpg"></img>
</center>