In [None]:
### 3.6 softmax回归的简洁实现
# start coding at 01-23 13:33 on Mac
# target1: 加入一个FlattenLayer把（batch_size, 1, 28, 28）的小批量图片转换成（batch_size, 784）

In [1]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l

# 1. 获取和读取数据

In [2]:
# 仍然使用FMnist数据集和上一节中使用的批量大小
batch_size = 356
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

# 2.定义模型和初始化模型

In [3]:
# softmax回归的输出层是一个全连接层，所以我们用一个线性模块就可以了。
# 因为前面我们数据返回的每个batch样本x的形状为(batch, 1, 28, 28), 所以我们要先用view()将x的形状转换成(batch_size, 784)才送入全连接层。
num_inputs = 784
num_outputs = 10


class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1)
    

from collections import OrderedDict

net = nn.Sequential(
    OrderedDict([
        ('flatten', FlattenLayer()),
        ('linear', nn.Linear(num_inputs, num_outputs))
    ])
)

In [4]:
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

# 3. softmax和交叉熵损失函数 

In [9]:
loss = nn.CrossEntropyLoss()

# 4.定义优化算法

In [6]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

# 5.训练模型

In [7]:
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            
            # 根据损失函数求梯度
            l.backward()
            
            # 让优化器去优化参数
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.setp()
                
            # 全局损失
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
        
        test_acc = evaluate_accuracy(test_iter, net)
        print("epoch %d, loss %.3f, train acc %.3f test acc %.3f"
             %(epoch+1, train_l_sum, train_acc_sum/n, test_acc))

In [10]:
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0024, train acc 0.732, test acc 0.772
epoch 2, loss 0.0017, train acc 0.806, test acc 0.801
epoch 3, loss 0.0016, train acc 0.820, test acc 0.812
epoch 4, loss 0.0015, train acc 0.827, test acc 0.810
epoch 5, loss 0.0014, train acc 0.831, test acc 0.818


In [None]:
# Finished at 14:00