## Part 1 - 卷积基本知识

In [19]:
import torch 
from torch import nn

#手动实现一个二维卷积算子，stride为1
def corr2d(X, K):  # 本函数已保存在d2lzh_pytorch包中方便以后使用
    h, w = K.shape    
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i: i + h, j: j + w] * K).sum()
    return Y


# 注意点乘和叉乘的用法
a = torch.tensor([[1,1],[1,1]])
b = torch.tensor([[1,1],[1,1]])

print(a*b)

print(a.mm(b))

tensor([[1, 1],
        [1, 1]])
tensor([[2, 2],
        [2, 2]])


In [20]:
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
K = torch.tensor([[0, 1], [2, 3]])
corr2d(X, K)


tensor([[19., 25.],
        [37., 43.]])

In [43]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super(Conv2D, self).__init__()
        self.weight = nn.Parameter(torch.randn(kernel_size))
        self.bias = nn.Parameter(torch.randn(1))
        #print(self.weight.shape)
        print(self.weight)
        print(self.bias)

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias


In [37]:
Y = corr2d(X, K)
Y

tensor([[-1., -1.],
        [-1., -1.],
        [-1., -1.]])

In [74]:
#注意：此处使用的X与原文不一样， 元素并非0或1值， bias的求解结果并不趋于0，除非我们在
#     损失函数中主动加上

X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
#X = torch.ones(6, 8)
#X[:, 2:6] = 0
K = torch.tensor([[1, -1]])
Y = corr2d(X, K)

print(Y)

# 构造一个核数组形状是(1, 2)的二维卷积层
conv2d = Conv2D(kernel_size=(1, 2))

step = 5000
lr = 0.001
for i in range(step):
    Y_hat = conv2d(X)
    l = ((Y_hat - Y) ** 2).sum()
    l += (conv2d.bias ** 2).sum() * 100
    l.backward()

    # 梯度下降
    conv2d.weight.data -= lr * conv2d.weight.grad
    conv2d.bias.data -= lr * conv2d.bias.grad

    # 梯度清0
    conv2d.weight.grad.fill_(0)
    conv2d.bias.grad.fill_(0)
    if (i + 1) % 200 == 0:
        print('Step %d, loss %.3f' % (i + 1, l.item()))


print(conv2d.weight, conv2d.bias)

tensor([[-1., -1.],
        [-1., -1.],
        [-1., -1.]])
Parameter containing:
tensor([[-0.5968, -0.7940]], requires_grad=True)
Parameter containing:
tensor([0.7691], requires_grad=True)
Step 200, loss 0.776
Step 400, loss 0.402
Step 600, loss 0.208
Step 800, loss 0.108
Step 1000, loss 0.056
Step 1200, loss 0.029
Step 1400, loss 0.015
Step 1600, loss 0.008
Step 1800, loss 0.004
Step 2000, loss 0.002
Step 2200, loss 0.001
Step 2400, loss 0.001
Step 2600, loss 0.000
Step 2800, loss 0.000
Step 3000, loss 0.000
Step 3200, loss 0.000
Step 3400, loss 0.000
Step 3600, loss 0.000
Step 3800, loss 0.000
Step 4000, loss 0.000
Step 4200, loss 0.000
Step 4400, loss 0.000
Step 4600, loss 0.000
Step 4800, loss 0.000
Step 5000, loss 0.000
Parameter containing:
tensor([[ 0.9997, -0.9998]], requires_grad=True) Parameter containing:
tensor([-4.2163e-06], requires_grad=True)


## Part 2 - 填充与步长

In [82]:
import torch
from torch import nn

# 定义一个函数来计算卷积层。它对输入和输出做相应的升维和降维
def comp_conv2d(conv2d, X):
    # (1, 1)代表批量大小和通道数（“多输入通道和多输出通道”一节将介绍）均为1
    X = X.view((1, 1) + X.shape)
    Y = conv2d(X)
    return Y.view(Y.shape[2:])  # 排除不关心的前两维：批量和通道

# 注意这里是两侧分别填充1行或列，所以在两侧一共填充2行或列
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)

X = torch.rand(8, 8)
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [79]:
# 使用高为5、宽为3的卷积核。在高和宽两侧的填充数分别为2和1
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(5, 3), padding=(2, 1))
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [80]:
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
comp_conv2d(conv2d, X).shape

torch.Size([4, 4])

In [81]:
conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))
comp_conv2d(conv2d, X).shape

torch.Size([2, 2])

## Part 3 - 多输入通道、多输出通道

In [94]:
import torch
from torch import nn
import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l

def corr2d_multi_in(X, K):
    # 沿着X和K的第0维（通道维）分别计算再相加
    res = d2l.corr2d(X[0, :, :], K[0, :, :])
    for i in range(1, X.shape[0]):
        res += d2l.corr2d(X[i, :, :], K[i, :, :])
    return res


X = torch.tensor([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
              [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
K = torch.tensor([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])

out = corr2d_multi_in(X, K)

## 2通道输入，原始图像为3x3
print('X.shape: ', X.size())
## 2通道卷积核，尺寸是2x2
print('K.shape: ', K.size())
## 输出为1通道2x2，因为运算中将不同的输出通道进行了叠加
print('out.shape: ', out.size())

X.shape:  torch.Size([2, 3, 3])
K.shape:  torch.Size([2, 2, 2])
out.shape:  torch.Size([2, 2])


In [95]:
def corr2d_multi_in_out(X, K):
    # 对K的第0维遍历，每次同输入X做互相关计算。所有结果使用stack函数合并在一起
    return torch.stack([corr2d_multi_in(X, k) for k in K])

## conv kernel原为2x2x2的tensor，此处扩展为3x2x2x2的tensor，从而使得
## 输出也变为3通道
K = torch.stack([K, K + 1, K + 2])
print(K.shape) # torch.Size([3, 2, 2, 2])

out = corr2d_multi_in_out(X, K)
print(out.shape)

torch.Size([3, 2, 2, 2])
torch.Size([3, 2, 2])


In [98]:
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.view(c_i, h * w)
    K = K.view(c_o, c_i)
    Y = torch.mm(K, X)  # 全连接层的矩阵乘法
    return Y.view(c_o, h, w)

X = torch.rand(3, 3, 3)
K = torch.rand(2, 3, 1, 1)

Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)

print(Y1)
print(Y2)

(Y1 - Y2).norm().item() < 1e-6

tensor([[[0.5403, 0.5927, 0.8335],
         [1.0067, 0.1544, 1.0499],
         [0.7364, 0.7604, 0.7290]],

        [[0.4948, 0.4038, 0.5090],
         [0.9144, 0.2914, 1.0752],
         [0.9712, 0.4192, 0.7137]]])
tensor([[[0.5403, 0.5927, 0.8335],
         [1.0067, 0.1544, 1.0499],
         [0.7364, 0.7604, 0.7290]],

        [[0.4948, 0.4038, 0.5090],
         [0.9144, 0.2914, 1.0752],
         [0.9712, 0.4192, 0.7137]]])


True

## Part 4 - 池化

In [100]:
import torch
from torch import nn

def pool2d(X, pool_size, mode='max'):
    X = X.float()
    p_h, p_w = pool_size
    Y = torch.zeros(X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()       
    return Y

X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
print(X)
print(pool2d(X, (2, 2)))
print(pool2d(X, (2, 2), 'avg'))

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
tensor([[4., 5.],
        [7., 8.]])
tensor([[2., 3.],
        [5., 6.]])


In [102]:
X = torch.arange(16, dtype=torch.float).view((1, 1, 4, 4))

X = torch.cat((X, X + 1), dim=1)
X


tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]],

         [[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [103]:
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
pool2d(X)


tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])

## Part 5 - LeNet

![LeNet](https://engmrk.com/wp-content/uploads/2018/09/LeNet_Original_Image.jpg)

In [7]:
import time
import torch
from torch import nn, optim

import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class LeNet_orig(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2), # kernel_size, stride
            nn.Conv2d(6, 16, 5),
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(16*4*4, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )

    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output

LeNet = nn.Sequential(
        nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size
        nn.Sigmoid(),
        nn.MaxPool2d(2, 2), # kernel_size, stride
        nn.Conv2d(6, 16, 5),
        nn.Sigmoid(),
        nn.MaxPool2d(2, 2),
        #below is fc
        nn.Linear(16*4*4, 120),
        nn.Sigmoid(),
        nn.Linear(120, 84),
        nn.Sigmoid(),
        nn.Linear(84, 10), 
        )


print(net)

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): Sigmoid()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): Sigmoid()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Linear(in_features=256, out_features=120, bias=True)
  (7): Sigmoid()
  (8): Linear(in_features=120, out_features=84, bias=True)
  (9): Sigmoid()
  (10): Linear(in_features=84, out_features=10, bias=True)
)


In [9]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进。
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            n += y.shape[0]
    return acc_sum / n

# 本函数已保存在d2lzh_pytorch包中方便以后使用
def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            print(X.shape)
            y = y.to(device)
            #X = X.view()
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
torch.Size([256, 1, 28, 28])


RuntimeError: mat1 dim 1 must match mat2 dim 0