In [64]:
import torch
import torchvision # torchvision 用来处理图像
import torch.utils.data as Data
from torch import nn

In [65]:
EPOCH = 1 # 为了节省时间就只训练一轮
BATCH_SIZE = 50 # 一组训练50个数据
LR = 0.001 # 学习率learn rate
DOWNLOAD_MNIST = False # 控制数据集是否下载，如果已经之前下载了就可以把这里调成False而避免重复下载
TEST_SIZE = 2000 # 测试集的大小

In [66]:
train_data = torchvision.datasets.MNIST(
    root='./data',
    train=True, # true则意味着使用训练集，而False就意味着使用测试集。
    transform=torchvision.transforms.ToTensor(),  # 传入一个用于将图片（0-255）转为Tensor（0-1）的函数，在enumerate(train_data时表现为0-1)
    download=DOWNLOAD_MNIST,
)

print(f"datasize:{train_data.data.shape},labelsize:{train_data.targets.shape}") # traindata中有两部分数据
print(f"train_data:{train_data.data[0]}")
for i, (data, target) in enumerate(train_data):
    if i == 0:
        print(f"{i}:{data.shape},{target}")
        print(f"{i}:{data}")
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

datasize:torch.Size([60000, 28, 28]),labelsize:torch.Size([60000])
train_data:tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,
          18,  18, 126, 136, 175,  26, 166, 255,

In [67]:
test_data = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform=torchvision.transforms.ToTensor()
)
# test_x = torch.unsqueeze(test_data.test_data, dim=1).float()[:TEST_SIZE]/255  # 原本视频中的写法
print(f"datasize:{test_data.data.shape},labelsize:{test_data.targets.shape}")
test_x = test_data.data.unsqueeze(dim=1).float()[:TEST_SIZE]
print(f"test_x:{test_x.data[0]}")
print(test_x.shape)
test_x = test_x/255  # 由于test_data不会经过enumerate遍历，所以其中的ToTensor不会被应用到索引，所以需要手动归一化。
test_y = test_data.targets[:TEST_SIZE]

datasize:torch.Size([10000, 28, 28]),labelsize:torch.Size([10000])
test_x:tensor([[[  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
            0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
            0.,   0.,   0.,   0.,   0.,   0.],
         [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
            0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
            0.,   0.,   0.,   0.,   0.,   0.],
         [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
            0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
            0.,   0.,   0.,   0.,   0.,   0.],
         [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
            0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
            0.,   0.,   0.,   0.,   0.,   0.],
         [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
            0.,   0.,   0.,   0.,   0.,   0.,   0.,   

In [68]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d( # 1*28*28，1是高度（色彩这个维度），28是宽和高
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2 # padding = (kernel_size - stride)/2=(5-1)/2=2 如何理解呢？假设输入一个只有一像素的图片，那么边缘需要向四周各自扩展两个单位才能完成一个5*5的取样
            ),# to (16*28*28)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # to (16*14*14)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2 ),# to (32*14*14)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # to (32*7*7)
        )
        self.out = nn.Linear(in_features=32*7*7, out_features=10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x) # (batch, 32, 7,7)
        x = x.view(x.size(0), -1) # (batch, 32*7*7)

        return self.out(x)

上面的代码就是“CNN”，卷积神经网络

输入一个图片，

In [69]:
cnn = CNN()

optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
loss_fn = nn.CrossEntropyLoss()
for epoch in range(EPOCH):
    for batch_idx, (data, target) in enumerate(train_loader):
        output = cnn(data)
        loss = loss_fn(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_idx % BATCH_SIZE == 0:
            test_output = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = sum(pred_y == test_y) / len(test_y)
            print(f"batch_idx:{batch_idx}|train loss:{round(loss.item(),4)}|test accuracy:{round(accuracy.item(),4)}")

batch_idx:0|train loss:2.303|test accuracy:0.071
batch_idx:50|train loss:0.4036|test accuracy:0.8115
batch_idx:100|train loss:0.4622|test accuracy:0.8515
batch_idx:150|train loss:0.2297|test accuracy:0.9175
batch_idx:200|train loss:0.1077|test accuracy:0.9365
batch_idx:250|train loss:0.5461|test accuracy:0.9375
batch_idx:300|train loss:0.1482|test accuracy:0.9575
batch_idx:350|train loss:0.0821|test accuracy:0.9595
batch_idx:400|train loss:0.1539|test accuracy:0.9655
batch_idx:450|train loss:0.0309|test accuracy:0.9675
batch_idx:500|train loss:0.0688|test accuracy:0.966
batch_idx:550|train loss:0.1055|test accuracy:0.9715
batch_idx:600|train loss:0.1204|test accuracy:0.9725
batch_idx:650|train loss:0.0452|test accuracy:0.97
batch_idx:700|train loss:0.1255|test accuracy:0.959
batch_idx:750|train loss:0.0707|test accuracy:0.97
batch_idx:800|train loss:0.0616|test accuracy:0.974
batch_idx:850|train loss:0.0857|test accuracy:0.978
batch_idx:900|train loss:0.0296|test accuracy:0.974
batch_i

In [70]:
test_output = cnn(test_x[:10])
pred_y = torch.max(test_output, 1)[1].data.squeeze()
print(pred_y, "pred number")
print(test_y[:10], "real number")

tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9]) pred number
tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9]) real number
