In [2]:
import time
import torch
from torch import nn, optim
import torch.nn.functional as F
# import sys
# sys.path.append("..") 
# import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from torchsnooper import snoop
class Residual(nn.Module):  # 本类已保存在d2lzh_pytorch包中方便以后使用
    def __init__(self, in_channels, out_channels, use_1x1conv=False, stride=1):
        super(Residual, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
    @snoop()
    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return F.relu(Y + X)

* residual 有改变通道数和不改变通道数的，如果改变了通道数的话，那么输入进来的x就需要一个1维卷积来保证与H(x) - x的输出的维度一样
* kernel = 3, padding = 1, stride = 1 刚好可以保证各个都不变化
* (dim + pad*2 - kernel_Size)/stride + 1
* conv3 直接减kernel_size = 1, 与conv1 先减kernel_size = 3 再 + padding = 1 * 2，两个是一样的，都是减1，除的话也是一样都是除以stride，所以它们的维度是一样的

In [4]:
blk = Residual(3, 3)
X = torch.rand((4, 3, 8, 6))
blk(X).shape # torch.Size([4, 3, 6, 6])
# (dim + pad*2 - kernel_Size)/stride + 1

Starting var:.. self = Residual(  (conv1): Conv2d(3, 3, kernel_size=(3,...ntum=0.1, affine=True, track_running_stats=True))
Starting var:.. X = tensor<(4, 3, 8, 6), float32, cpu>
14:23:04.201640 call        22     def forward(self, X):
14:23:04.203635 line        23         Y = F.relu(self.bn1(self.conv1(X)))
New var:....... Y = tensor<(4, 3, 8, 6), float32, cpu, grad>
14:23:04.216602 line        24         Y = self.bn2(self.conv2(Y))
14:23:04.223581 line        25         if self.conv3:
14:23:04.224578 line        27         return F.relu(Y + X)
14:23:04.226573 return      27         return F.relu(Y + X)
Return value:.. tensor<(4, 3, 8, 6), float32, cpu, grad>
Elapsed time: 00:00:00.025930


torch.Size([4, 3, 8, 6])

In [5]:
blk = Residual(3, 6, use_1x1conv=True, stride=2)
blk(X).shape # torch.Size([4, 6, 3, 3])


Starting var:.. self = Residual(  (conv1): Conv2d(3, 6, kernel_size=(3,...ntum=0.1, affine=True, track_running_stats=True))
Starting var:.. X = tensor<(4, 3, 8, 6), float32, cpu>
15:10:32.647186 call        22     def forward(self, X):
15:10:32.651175 line        23         Y = F.relu(self.bn1(self.conv1(X)))
New var:....... Y = tensor<(4, 6, 4, 3), float32, cpu, grad>
15:10:32.722983 line        24         Y = self.bn2(self.conv2(Y))
15:10:32.725974 line        25         if self.conv3:
15:10:32.726971 line        26             X = self.conv3(X)
Modified var:.. X = tensor<(4, 6, 4, 3), float32, cpu, grad>
15:10:32.784833 line        27         return F.relu(Y + X)
15:10:32.786812 return      27         return F.relu(Y + X)
Return value:.. tensor<(4, 6, 4, 3), float32, cpu, grad>
Elapsed time: 00:00:00.141620


torch.Size([4, 6, 4, 3])

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


# 用于ResNet18和34的残差块，用的是2个3x3的卷积
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        # 经过处理后的x要与x的维度相同(尺寸和深度)
        # 如果不相同，需要添加卷积+BN来变换为同一维度
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


# 用于ResNet50,101和152的残差块，用的是1x1+3x3+1x1的卷积
class Bottleneck(nn.Module):
    # 前面1x1和3x3卷积的filter个数相等，最后1x1卷积是其expansion倍
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes,
                               kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)# 也就是我每一层要多少个block
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)# 后面的stride都是1
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])


def test():
    net = ResNet18()
    y = net(torch.randn(1,3,32,32))
    print(y.size())

# test()


