In [14]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
from torchsummary import summary

torch.__version__, np.__version__, pd.__version__

('2.0.1+cu117', '1.21.5', '1.2.4')

In [15]:
focue_CPU = False
#focue_CPU = True

def get_device():
    curr_dev = torch.device("cpu")
    if focue_CPU == True:
        curr_dev = torch.device("cpu")
    else:
        curr_dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    return curr_dev

curr_device = get_device()
print("current device : ", curr_device)
#currr_device = torch.device("cpu")

current device :  cuda:0


残差块， 此代码生成两种类型的网络： 一种是当use_1x1conv=False时，应用ReLU非线性函数之前，将输入添加到输出。 另一种是当use_1x1conv=True时，添加通过
卷积调整通道和分辨率。

In [16]:
class Residual(nn.Module):
    def __init__(self, input_channels, num_channels, use_1x1conv = False, strides =1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels,num_channels,kernel_size=3, padding=1,stride=strides)
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.conv2 = nn.Conv2d(num_channels,num_channels,kernel_size=3, padding=1,)
        self.bn2 = nn.BatchNorm2d(num_channels)

        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels,num_channels, kernel_size=1, stride=strides)
        else:
            self.conv3 = None
    
    def forward(self, X):
        Y = self.conv1(X)
        Y = self.bn1(Y)
        Y = F.relu(Y)
        Y = self.conv2(Y)
        Y = self.bn2(Y)

        if self.conv3:
            X = self.conv3(X)
        Y += X
        
        Y = F.relu(Y)
        return Y
    

In [17]:
X = torch.rand(4,3,6,6)
X.shape

torch.Size([4, 3, 6, 6])

In [18]:
X

tensor([[[[0.7416, 0.7291, 0.9835, 0.7133, 0.1973, 0.2975],
          [0.0790, 0.5636, 0.8449, 0.2952, 0.3250, 0.0426],
          [0.6782, 0.0596, 0.6338, 0.8064, 0.0425, 0.7615],
          [0.1581, 0.8710, 0.2313, 0.4694, 0.0579, 0.4571],
          [0.3981, 0.0747, 0.7654, 0.4609, 0.1881, 0.8554],
          [0.6245, 0.8420, 0.1658, 0.8097, 0.2662, 0.0670]],

         [[0.6067, 0.3294, 0.4620, 0.6909, 0.4530, 0.5104],
          [0.7359, 0.7488, 0.4599, 0.5737, 0.1995, 0.8910],
          [0.6521, 0.5742, 0.0155, 0.1807, 0.1966, 0.0634],
          [0.6160, 0.9850, 0.3736, 0.7348, 0.6956, 0.9504],
          [0.2790, 0.2419, 0.7334, 0.7819, 0.9091, 0.4606],
          [0.9278, 0.7353, 0.4115, 0.1779, 0.3135, 0.9706]],

         [[0.5418, 0.0868, 0.1191, 0.8510, 0.6750, 0.2867],
          [0.9639, 0.9688, 0.0916, 0.5213, 0.3677, 0.9330],
          [0.9594, 0.4953, 0.9981, 0.3980, 0.2613, 0.9494],
          [0.4724, 0.1637, 0.0898, 0.0025, 0.3053, 0.3971],
          [0.0795, 0.5246, 0.6940, 0

In [19]:
blk = Residual(3,9,use_1x1conv=True, strides=2)
Y = blk(X)
Y.shape

torch.Size([4, 9, 3, 3])

ResNet 模块

In [21]:
def resnet_block(input_channels, num_channels, num_residuals, first_block = False):
    blk = []
    for i in range(num_residuals):
        if i ==0 and not first_block:
            blk.append(Residual(input_channels, num_channels, use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk

Residual Net

In [35]:
def residualNet( in_channels=1, num_classes =10):
    
    b1 =nn.Sequential(nn.Conv2d(in_channels,64, kernel_size=7, stride=2, padding=3),
                  nn.BatchNorm2d(64), nn.ReLU(),
                  nn.MaxPool2d(kernel_size=3, stride= 2, padding=1)                  
                  )
    b2 = nn.Sequential(*resnet_block(64,64,2,first_block=True))
    b3 = nn.Sequential(*resnet_block(64,128,2))
    b4 = nn.Sequential(*resnet_block(128,256,2))
    b5 = nn.Sequential(*resnet_block(256,512,2))
    net = nn.Sequential(b1, b2,b3,b4,b5, 
                        nn.AdaptiveAvgPool2d((1,1)),
                        nn.Flatten(),
                        nn.Linear(512,num_classes))
    return net

In [41]:
net = residualNet(3,10)

In [48]:
X = torch.rand(1,3,244,244)

In [49]:
Y = net(X)
Y

tensor([[ 0.0781, -1.1674,  1.0902,  0.3165,  0.3017, -0.0799,  0.1147, -0.1893,
         -0.1398, -0.1650]], grad_fn=<AddmmBackward0>)

In [39]:
summary(net,(1,244,244),device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 122, 122]           3,200
       BatchNorm2d-2         [-1, 64, 122, 122]             128
              ReLU-3         [-1, 64, 122, 122]               0
         MaxPool2d-4           [-1, 64, 61, 61]               0
            Conv2d-5           [-1, 64, 61, 61]          36,928
       BatchNorm2d-6           [-1, 64, 61, 61]             128
            Conv2d-7           [-1, 64, 61, 61]          36,928
       BatchNorm2d-8           [-1, 64, 61, 61]             128
          Residual-9           [-1, 64, 61, 61]               0
           Conv2d-10           [-1, 64, 61, 61]          36,928
      BatchNorm2d-11           [-1, 64, 61, 61]             128
           Conv2d-12           [-1, 64, 61, 61]          36,928
      BatchNorm2d-13           [-1, 64, 61, 61]             128
         Residual-14           [-1, 64,

In [40]:
X = torch.rand(size=(1, 1, 224, 224))
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape:\t', X.shape)

Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 128, 28, 28])
Sequential output shape:	 torch.Size([1, 256, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
AdaptiveAvgPool2d output shape:	 torch.Size([1, 512, 1, 1])
Flatten output shape:	 torch.Size([1, 512])
Linear output shape:	 torch.Size([1, 10])
