# 图像卷积
互相关运算

In [3]:
import torch
from torch import nn
from d2l import torch as d2l

def corr2d(X, K):
    # 计算二维互相关运算
    h, w = K.shape
    # 定义输出矩阵
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            # 进行矩阵的对应元素相乘
            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()
    return Y

验证上述二维互相关运算的输出

In [4]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

实现二维卷积层

In [5]:
class Conv2d(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeors(1))
        
    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

卷积的一个简单应用：检测图像中不同颜色的边缘

In [6]:
X = torch.ones((6, 8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [7]:
K = torch.tensor([[1.0, -1.0]])

Y中的1代表从白色到黑色的边缘，-1代表从黑色到白色的边缘

In [8]:
Y = corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

卷积核K只可以检测垂直边缘

In [9]:
corr2d(X.t(), K)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

学习由X生成Y的卷积核

In [19]:
# 输入通道，输出通道，核的size
conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)

X = X.reshape((1, 1, 6, 8))  # 输入
Y = Y.reshape((1, 1, 6, 7))  # 输出

for i in range(30):
    Y_hat = conv2d(X)
    l = (Y_hat - Y)**2  # loss
    conv2d.zero_grad()
    l.sum().backward()
    #  d -= lr * d
    conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad
    if (i + 1) % 2 == 0:
        print(f'epoch {i+1}, loss {l.sum():.3f}')

epoch 2, loss 8.345
epoch 4, loss 2.836
epoch 6, loss 1.064
epoch 8, loss 0.419
epoch 10, loss 0.169
epoch 12, loss 0.069
epoch 14, loss 0.028
epoch 16, loss 0.011
epoch 18, loss 0.005
epoch 20, loss 0.002
epoch 22, loss 0.001
epoch 24, loss 0.000
epoch 26, loss 0.000
epoch 28, loss 0.000
epoch 30, loss 0.000


所学的卷积核的权重张量

In [20]:
conv2d.weight.data.reshape((1, 2))

tensor([[ 0.9995, -1.0005]])