In [4]:
import torch
import torch.nn as nn
from d2l import torch as d2l

def corr2d(X, K):
    """计算二维互相关运算"""
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1)) # 定义输出张量大小，为输入张量减去卷积核大小加1
    # 遍历输入张量X的每一个元素，计算与卷积核的互相关运算
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i: i + h, j: j + w] * K).sum() # 互相关运算，对应元素相乘后求和
    return Y

x = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]]) # 输入张量
k = torch.tensor([[0.0, 1.0], [2.0, 3.0]]) # 卷积核
print(corr2d(x, k)) # 输出张量


tensor([[19., 25.],
        [37., 43.]])


## 二维卷积层

In [5]:
# 二维卷积层
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super(Conv2D, self).__init__()
        # torch.nn.Parameter 的主要作用是将普通的张量转换为模型的参数，使其在调用 model.parameters() 时会被包含在内，并在训练过程中通过反向传播进行更新。
        self.weight = nn.Parameter(torch.randn(kernel_size)) # 卷积核
        self.bias = nn.Parameter(torch.randn(1)) # 偏置

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

In [6]:
# 简单应用
# 检测不同颜色的边缘
X = torch.ones(6, 8)
X[:, 2:6] = 0
print(X)

K = torch.tensor([[1.0, -1.0]])
Y = corr2d(X, K)
print(Y)

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])
tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])


In [7]:
conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)
print(conv2d.weight.data)
# 批量大小、通道数、高度、宽度
X = X.reshape(1, 1, 6, 8)
Y = Y.reshape(1, 1, 6, 7)

# 梯度下降
for i in range(50):
    Y_hat = conv2d(X)
    l = (Y_hat - Y) ** 2
    conv2d.zero_grad()
    l.sum().backward() # 通过反向传播，计算每个参数的梯度
    conv2d.weight.data -= 3e-2 * conv2d.weight.grad # 3e-2为学习率
    if (i + 1) % 2 == 0:
        print(f'batch {i + 1}, loss {l.sum():.3f}')

print(conv2d.weight.data)

tensor([[[[-0.1576,  0.6141]]]])
batch 2, loss 11.441
batch 4, loss 2.403
batch 6, loss 0.601
batch 8, loss 0.182
batch 10, loss 0.064
batch 12, loss 0.024
batch 14, loss 0.010
batch 16, loss 0.004
batch 18, loss 0.002
batch 20, loss 0.001
batch 22, loss 0.000
batch 24, loss 0.000
batch 26, loss 0.000
batch 28, loss 0.000
batch 30, loss 0.000
batch 32, loss 0.000
batch 34, loss 0.000
batch 36, loss 0.000
batch 38, loss 0.000
batch 40, loss 0.000
batch 42, loss 0.000
batch 44, loss 0.000
batch 46, loss 0.000
batch 48, loss 0.000
batch 50, loss 0.000
tensor([[[[ 1.0000, -1.0000]]]])


## 填充和步幅

In [8]:
# 填充和步幅
def  comp_conv2d(conv2d, X):
    # (1, 1)代表批量大小和通道数（“多输入通道和多输出通道”一节将介绍）
    X = X.reshape((1, 1) + X.shape)
    Y = conv2d(X)
    # 排除不关心的前两维：批量和通道
    return Y.reshape(Y.shape[2:])

conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)
X = torch.rand(size=(8, 8))
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [9]:
# 填充不同的高度和宽度
conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1))
comp_conv2d(conv2d, X).shape


torch.Size([8, 8])

In [10]:
# 将高度和宽度的步幅设置为2
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
comp_conv2d(conv2d, X).shape

torch.Size([4, 4])

In [11]:
# 稍微复杂的例子
conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))
comp_conv2d(conv2d, X).shape

torch.Size([2, 2])

## 多输入多输出通道

In [12]:
def corr2d_multi_in(X, K):
    # 首先沿着X和K的第0维（通道维）遍历。然后使用*将结果列表变成add函数的位置参数
    return sum(d2l.corr2d(x, k) for x, k in zip(X, K)) # zip函数将X和K的每个元素打包成一个元组

In [13]:
# 验证多输入通道的互相关运算
X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
                    [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])
K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

corr2d_multi_in(X, K)


tensor([[ 56.,  72.],
        [104., 120.]])

In [14]:
# 计算多个通道输出的互相关运算
def corr2d_multi_in_out(X, K):
    # 对K的第0维遍历，每次同输入X做互相关运算。所有结果使用stack函数合并在一起
    return torch.stack([corr2d_multi_in(X, k) for k in K], 0)


K = torch.stack((K, K + 1, K + 2), 0)
K.shape

torch.Size([3, 2, 2, 2])

In [15]:
# 验证多输出通道的互相关运算
corr2d_multi_in_out(X, K)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

In [16]:
# 实现一个全连接层
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.reshape((c_i, h * w))
    K = K.reshape((c_o, c_i))
    Y = torch.matmul(K, X) # 全连接层的矩阵乘法
    return Y.reshape((c_o, h, w))

X = torch.normal(0, 1, (3, 3, 3))
K = torch.normal(0, 1, (2, 3, 1, 1))

Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)

assert float(torch.abs(Y1 - Y2).sum()) < 1e-6

## 池化层

In [17]:
# 池化层
def pool2d(X, pool_size, mode='max'):
    p_h, p_w = pool_size
    Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
    return Y

X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
pool2d(X, (2, 2))

tensor([[4., 5.],
        [7., 8.]])

In [18]:
pool2d(X, (2, 2), 'avg')

tensor([[2., 3.],
        [5., 6.]])

In [19]:
# 填充和步幅
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]]])

In [20]:
pool2d = nn.MaxPool2d(2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

In [21]:
# 池化层在每个输入通道上单独运算
X = torch.cat((X, X + 1), 1)
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]],

         [[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [22]:
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])