In [1]:
import numpy as np
from torch import nn, optim
import torch
import random
import time

🤔思考：如何加载数据？

In [2]:
with open("labels.txt", "r") as f:
    raw = f.readlines()

tags = []
data = []
for l in raw:
    tags.append(int(l[0]))#每行的第一个字符是标签
    d = l[1:-1]#去掉标签和换行符
    d = map(float, tuple(d)) #将字符串转换为tuple，数字转换为float，方便后续转为tensor
    #tuple相对于list更省内存，因为tuple是不可变的，对象所含method更少
    data.append(tuple(d))

#将标签和数据转为tensor，方便后续切分训练集和测试集
data = torch.tensor(data)
tags = torch.tensor(tags)

#划分训练集和测试集
train_test_ratio = 0.8
train_size = int(train_test_ratio * len(data))
test_size = len(data) - train_size
data_train = data[:train_size]
data_test  = data[train_size:]
tags_train = tags[:train_size]
tags_test  = tags[train_size:]

In [3]:
# 直接套用d2l网站上的代码，没有改动
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i: min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]


batch_size = 10

w = torch.normal(0, 0.01, size=(len(data[0]), 1), requires_grad=True, dtype=torch.float32) #对每个像素都有一个权重
b = torch.zeros(1, requires_grad=True)

👇方案1：Linear Regression

In [4]:
# 训练模型
def linreg(X, w, b):
    return torch.matmul(X, w) + b

# 损失函数
def mse_loss(y_pred, y_true):
    return torch.mean((y_pred - y_true)**2)

# 优化器
def sgd(params, lr, batch_size):
    """小批量随机梯度下降"""
    with torch.no_grad():
        for param in params:    
            param -= lr * param.grad / batch_size
            param.grad.zero_()

sgd优化器数学原理：
- 计算梯度：$\mathbf{g} \leftarrow \partial_{(\mathbf{w},b)} \frac{1}{|\mathcal{B}|} \sum_{i \in \mathcal{B}} l(\mathbf{x}^{(i)}, y^{(i)}, \mathbf{w}, b)$
- 更新参数：$\mathbf{w} \leftarrow \mathbf{w} - \eta \cdot \mathbf{g}$
- $\eta$ 是学习率，控制更新步长


In [5]:
lr = 0.02
num_epochs = 16
net = linreg
loss = mse_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, data_train, tags_train):
        l = loss(net(X, w, b), y)  # X和y的小批量损失
        # 因为l形状是(batch_size,1)，而不是一个标量。l中的所有元素被加到一起，
        # 并以此计算关于[w,b]的梯度
        l.sum().backward() # 求梯度
        sgd([w, b], lr, batch_size)  # 使用参数的梯度更新参数
    with torch.no_grad():
        train_l = torch.round(net(data_train, w, b)).reshape(tags_train.shape) == tags_train  # 训练集上的损失
        print(f'epoch {epoch + 1:02d}, correct rate on training set: {train_l.sum()/len(data_train)*100:.02f}%')

epoch 01, correct rate on training set: 1.47%
epoch 02, correct rate on training set: 12.61%
epoch 03, correct rate on training set: 1.17%
epoch 04, correct rate on training set: 21.99%
epoch 05, correct rate on training set: 21.70%
epoch 06, correct rate on training set: 21.99%
epoch 07, correct rate on training set: 10.26%
epoch 08, correct rate on training set: 1.17%
epoch 09, correct rate on training set: 10.26%
epoch 10, correct rate on training set: 21.99%
epoch 11, correct rate on training set: 10.26%
epoch 12, correct rate on training set: 12.02%
epoch 13, correct rate on training set: 10.26%
epoch 14, correct rate on training set: 1.17%
epoch 15, correct rate on training set: 0.00%
epoch 16, correct rate on training set: 9.97%


👆可以看到，正确率上不去，说明可能不存在线性可分的情况。

In [6]:
with torch.no_grad():
    train_l = torch.round(net(data_test, w, b)).reshape(tags_test.shape) == tags_test  # 训练集上的损失
    print(f'correct rate on test set: {train_l.sum()/len(tags_test)*100:.02f}%')

correct rate on test set: 6.98%


👇方案2：softmax回归

In [7]:
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
def accuracy(y_hat, y):  
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum()/len(y))


# 定义数据集
batch_size = 32
net = nn.Sequential(nn.Flatten(), nn.Linear(384, 10))
net.apply(init_weights)
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=0.015)
num_epochs = 32


for epoch in range(num_epochs):
    net.train()  # 切换到训练模式
    for X, y in data_iter(batch_size, data_train, tags_train):
        # 计算梯度并更新参数
        y_hat = net(X)
        l = loss(y_hat, y)

        # 使用PyTorch内置的优化器和损失函数
        trainer.zero_grad()
        l.mean().backward()
        trainer.step()

    # 计算训练准确度
    with torch.no_grad():
        acc = accuracy(net(data_train), tags_train)
    print(f'epoch {epoch+1:2d}, loss: {l.mean().item():.3f}, acc: {acc:.3f}')
    # net.eval()
    # acc = accuracy(net(data_train), tags_train)
    # print(f'epoch {epoch+1:2d}, loss: {l.mean().item():.3f}, acc: {acc:.3f}')


epoch  1, loss: 1.636, acc: 0.974
epoch  2, loss: 1.229, acc: 0.974
epoch  3, loss: 0.769, acc: 0.974
epoch  4, loss: 0.691, acc: 0.974
epoch  5, loss: 0.443, acc: 0.974
epoch  6, loss: 0.343, acc: 0.974
epoch  7, loss: 0.268, acc: 0.974
epoch  8, loss: 0.253, acc: 0.974
epoch  9, loss: 0.204, acc: 0.974
epoch 10, loss: 0.359, acc: 0.974
epoch 11, loss: 0.191, acc: 0.974
epoch 12, loss: 0.152, acc: 0.974
epoch 13, loss: 0.307, acc: 0.974
epoch 14, loss: 0.140, acc: 0.974
epoch 15, loss: 0.616, acc: 0.974
epoch 16, loss: 0.259, acc: 0.974
epoch 17, loss: 0.411, acc: 0.974
epoch 18, loss: 0.241, acc: 0.974
epoch 19, loss: 0.283, acc: 0.974
epoch 20, loss: 0.097, acc: 0.974
epoch 21, loss: 0.267, acc: 0.974
epoch 22, loss: 0.234, acc: 0.974
epoch 23, loss: 0.091, acc: 0.974
epoch 24, loss: 0.256, acc: 0.974
epoch 25, loss: 0.226, acc: 0.974
epoch 26, loss: 0.081, acc: 0.974
epoch 27, loss: 0.491, acc: 0.974
epoch 28, loss: 0.220, acc: 0.974
epoch 29, loss: 0.078, acc: 0.974
epoch 30, loss

In [8]:
with torch.no_grad():
    y_hat = net(data_test)
    y = tags_test  # 标签转为LongTensor类型
    acc = accuracy(y_hat, y)
    print(f'loss: {l.mean().item():.3f}, acc: {acc:.3f}')

loss: 0.319, acc: 0.965


👇方案3：Multi-Layer Perceptron (MLP)

In [17]:
net = nn.Sequential(nn.Flatten(),
                    nn.Linear(384, 128),
                    nn.ReLU(),
                    nn.Linear(128, 10))
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)

net.apply(init_weights)
batch_size, lr, num_epochs = 32, 0.1, 16
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=lr)

In [18]:
for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
    for X, y in data_iter(batch_size, data_train, tags_train):
        y_hat = net(X)
        l = loss(y_hat, y)
        l = l.mean()
        trainer.zero_grad()
        l.backward()
        trainer.step()
        train_l_sum += l.item()
        train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
        n += y.shape[0]
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter(batch_size, data_test, tags_test):
            y_hat = net(X)
            acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
    # n = test_size
    test_acc = acc_sum / n
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
          % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

epoch 1, loss 0.2860, train acc 0.814, test acc 0.256
epoch 2, loss 0.2464, train acc 1.593, test acc 0.523
epoch 3, loss 0.1567, train acc 2.977, test acc 0.837
epoch 4, loss 0.0746, train acc 3.663, test acc 0.837
epoch 5, loss 0.0421, train acc 3.837, test acc 0.965
epoch 6, loss 0.0299, train acc 3.860, test acc 0.965
epoch 7, loss 0.0235, train acc 3.860, test acc 0.965
epoch 8, loss 0.0210, train acc 3.860, test acc 0.965
epoch 9, loss 0.0191, train acc 3.860, test acc 0.965
epoch 10, loss 0.0171, train acc 3.860, test acc 0.965
epoch 11, loss 0.0183, train acc 3.860, test acc 0.965
epoch 12, loss 0.0158, train acc 3.860, test acc 0.965
epoch 13, loss 0.0160, train acc 3.860, test acc 0.965
epoch 14, loss 0.0155, train acc 3.860, test acc 0.965
epoch 15, loss 0.0148, train acc 3.860, test acc 0.965
epoch 16, loss 0.0155, train acc 3.860, test acc 0.965


👇方案4：Convolutional Neural Network (CNN)