In [38]:
import torch
from torch import nn
import d2l_self as d2l

In [23]:
def corr2d(X,K):
    h,w=K.shape
    Y=torch.zeros((X.shape[0]-h+1),(X.shape[1]-w+1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j]=(X[i:i+h,j:j+w]*K).sum()
    return Y

In [24]:
X=torch.tensor([[1.0,2.0,3.0],[2.0,3.0,4.0],[5.0,6.0,7.0]])
Y=torch.tensor([[2.0,3.0],[2.0,4.0]])
corr2d(X,Y)

tensor([[24., 35.],
        [47., 58.]])

In [25]:
class Conv2D(nn.Module):
    def __init__(self,kernel_size):
        super().__init__()
        self.weight=nn.Parameter(torch.rand(kernel_size))
        self.bias=nn.Parameter(torch.zeros(1))
    def forward(self,x):
        return corr2d(x,self.weight)+self.bias

In [26]:
#图像目标检测
X=torch.ones(6,8)
X[:,2:6]=0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [27]:
K=torch.tensor([[1.0,-1.0]])#因为是自定义二维互相关函数，所有期待输入的卷积核至少是二维张量，必须写成[[]]
Y=corr2d(X,K)

In [28]:
#学习卷积核
conv2d=nn.Conv2d(1,1,kernel_size=(1,2),bias=False)
X=X.reshape((1,1,6,8))
Y=Y.reshape((1,1,6,7))
for i in range(10):
    Y_hat=corr2d(X[0,0],conv2d.weight[0,0])
    l=(Y-Y_hat)**2
    conv2d.zero_grad()
    l.sum().backward()
    conv2d.weight.data[:]-=3e-2*conv2d.weight.grad
    if (i+1)%2==0:
        print(f'epoch{i+1},loss{l.sum():.3f}')

epoch2,loss8.741
epoch4,loss1.969
epoch6,loss0.536
epoch8,loss0.174
epoch10,loss0.064


In [29]:
#padding和stride
def comp_conv2d(conv2d,X):
    X=X.reshape((1,1)+X.shape)
    Y=conv2d(X)
    return Y.reshape(Y.shape[2:])
conv2d=nn.Conv2d(1,1,kernel_size=3,padding=1)
X=torch.rand((8,8))
comp_conv2d(conv2d,X).shape

torch.Size([8, 8])

In [30]:
conv2d=nn.Conv2d(1,1,kernel_size=(5,3),padding=(2,1))
comp_conv2d(conv2d,X).shape

torch.Size([8, 8])

In [31]:
conv2d=nn.Conv2d(1,1,kernel_size=(3,5),padding=(0,1),stride=(3,4))
comp_conv2d(conv2d,X).shape

torch.Size([2, 2])

In [32]:
#多输入通道
#X(C_in,H,w),K(C_in,kh,kw)

def corr2d_multi_in(X,K):
#zip(X,K):把每个通道x,k进行一一匹配
    return sum(d2l.corr2d(x,k) for x,k in zip(X,K))

In [33]:
#多输出通道:希望得到多个不同的特征图，每个输出通道有独立的多通道卷积核
#K.shape=(C_out,C_in,kh,kw)
#X.shape=(C_in,H,w)
#output.shape=(C_out,H_out,W_out)
#stack :把C_out个2D 卷积结果堆叠起来形成输出张量，shape=(C_out,H_out,W_out)
def corr2d_multi_in_out(X,K):
    return torch.stack([corr2d_multi_in(X,k)for k in K],0)

In [34]:
K=torch.tensor([[[0.0,1.0],[2.0,3.0]],[[3.0,4.0],[5.0,6.0]]])
K=torch.stack((K,K+1,K+2),dim=0)
K.shape

torch.Size([3, 2, 2, 2])

In [39]:
X=torch.tensor([[[1.0,2.0,3.0],[3.0,4.0,5.0]],[[1.0,3.0,5.0],[9.0,1.0,2.0]]])
corr2d_multi_in_out(X,K)

tensor([[[ 86.,  72.]],

        [[110.,  97.]],

        [[134., 122.]]])

In [40]:
#1*1卷积层
#在空间维度上不做滑动窗口的加权计算，只在通道方向上做加权融合
def corr2d_multi_in_out_1x1(X,K):
    c_i,h,w=X.shape
    c_o=K.shape[0]
    X=X.reshape((c_i,h*w))
    K=K.reshape((c_o,c_i))
    Y=torch.matmul(K,X)
    return Y.reshape(c_o,c_i,h,w)

In [None]:
X=torch.normal(0,1,(3,3,3))
K=torch.normal(0,1,(2,3,1,1))
#把K的维度设置成kh=1,kw=1,则也是1x1卷积
Y1=corr2d_multi_in_out_1x1(X,K)
Y2=corr2d_multi_in_out(X,K)
#assert语句：验证两个是否基本等价
assert float(torch.abs(Y1-Y2).sum())<1e-6

In [41]:
#池化层
def pool2d(X,pool_size,mode='max'):
    p_h,p_w=pool_size
    Y=torch.zeros((X.shape[0]-p_h+1),X.shape[1]-p_w+1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode=='max':
                Y[i,j]=X[i:i+p_h,j:j+p_w].max()
            if mode=='avg':
                Y[i,j]=X[i:i+p_h,j:j+p_w].mean()
    return Y

In [45]:
X=torch.tensor([[2.0,3.0,4.0],[2.0,3.0,5.0],[2.0,4.0,7.0]])
pool2d(X,(2,2))

tensor([[3., 5.],
        [4., 7.]])

In [46]:
pool2d(X,(2,2),'avg')

tensor([[2.5000, 3.7500],
        [2.7500, 4.7500]])

In [47]:
X=torch.arange(16).reshape(1,1,4,4)
X

tensor([[[[ 0,  1,  2,  3],
          [ 4,  5,  6,  7],
          [ 8,  9, 10, 11],
          [12, 13, 14, 15]]]])

In [48]:
pool2d=nn.MaxPool2d(3)
pool2d(X)

tensor([[[[10]]]])

In [49]:
pool2d=nn.MaxPool2d((2,3),stride=(2,3),padding=(0,1))
pool2d(X)

tensor([[[[ 5,  7],
          [13, 15]]]])

In [None]:
#多个通道
#是在每个输入通道上单独计算，而不是在通道上对输入进行汇总
X=torch.cat((X,X+1),1)
X

tensor([[[[ 0,  1,  2,  3],
          [ 4,  5,  6,  7],
          [ 8,  9, 10, 11],
          [12, 13, 14, 15]],

         [[ 1,  2,  3,  4],
          [ 5,  6,  7,  8],
          [ 9, 10, 11, 12],
          [13, 14, 15, 16]],

         [[ 1,  2,  3,  4],
          [ 5,  6,  7,  8],
          [ 9, 10, 11, 12],
          [13, 14, 15, 16]],

         [[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13],
          [14, 15, 16, 17]]]])

In [53]:
pool2d=nn.MaxPool2d((2,3),stride=(1,2),padding=1)
pool2d(X)

tensor([[[[ 1,  3],
          [ 5,  7],
          [ 9, 11],
          [13, 15],
          [13, 15]],

         [[ 2,  4],
          [ 6,  8],
          [10, 12],
          [14, 16],
          [14, 16]],

         [[ 2,  4],
          [ 6,  8],
          [10, 12],
          [14, 16],
          [14, 16]],

         [[ 3,  5],
          [ 7,  9],
          [11, 13],
          [15, 17],
          [15, 17]]]])