In [1]:
## 3.10 多层感知机的简洁实现
# start at 01-26 on mac

In [2]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

# 1. 定义模型

## 1.1 定义一个FlattenLayer

In [4]:
# 因为前面我们数据返回的每个batch样本x的形状为(batch_size, 1, 28, 28),
# 所以我们要先用view()将x的形状转换成(batch_size, 784)才送入全连接层。
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): 
        return x.view(x.shape[0], -1)

## 1.2 定义模型

In [6]:
# 隐藏层单元设置为256
num_inputs, num_outputs, num_hidden = 784, 10, 256


net = nn.Sequential(
    FlattenLayer(),
    nn.Linear(num_inputs, num_hidden), 
    nn.ReLU(),
    nn.Linear(num_hidden, num_outputs)
)


for params in net.parameters():
    init.normal_(params, mean = 0, std = 0.01)

# 2. 读取数据并训练模型

## 2.1 读取数据

In [8]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

## 2.2 损失函数

In [9]:
loss = torch.nn.CrossEntropyLoss()

## 2.3 优化器

In [11]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

# 2.4 训练模型

In [21]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X,y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
        
    return acc_sum / n

num_epochs = 5 

def train(net, train_iter, test_iter, loss, num_eophs, batch_size, params=None, lr=None, optimizer=None):
    for epoch in range(num_epoch):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        # train_l_sum, train_acc_sum, n= train_one_epoch(train_iter)
        for X,y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
                    
            
            l.backward()
            
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step()
                
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum/n, train_acc_sum/n, test_acc))
        
        
train(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0015, train acc 0.854, test acc 0.845
epoch 2, loss 0.0014, train acc 0.866, test acc 0.810
epoch 3, loss 0.0014, train acc 0.870, test acc 0.843
epoch 4, loss 0.0013, train acc 0.876, test acc 0.866
epoch 5, loss 0.0013, train acc 0.880, test acc 0.844


In [None]:
# finished at 0:26