# 3.7 softmax回归的简洁实现

In [1]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

## 3.7.1 获取和读取数据

In [2]:
batch_size=256
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)

## 3.7.2 定义和初始化模型

softmax 回归的输出层是一个全连接层，所以我们用一个线性模块就可以了。因为前面我们数据返回的每个batch样本x的形状为(batch_size, 1, 28, 28), 所以我们要先用 view()将x的形状转换成(batch_size, 784)才送入全连接层。

In [3]:
num_inputs=784
num_outputs=10

class LinearNet(nn.Module):
    def __init__(self,num_inputs,num_outputs):
        super(LinearNet,self).__init__()
        self.linear=nn.Linear(num_inputs,num_outputs)
    def forward(self,x): #x shape:(batch,1,28,28)
        y=self.linear(x.view(x.shape[0],-1))
        return y 
net=LinearNet(num_inputs,num_outputs)  

我们将对x的形状转换的这个功能自定义一个FlattenLayer并记录在d2lzh_pytorch中方便后面使用。

In [4]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer,self).__init__()
    def forward(self,x):
        return x.view(x.shape[0],-1)

使用nn.Sequential 搭建网络，Sequential是有序容器。网络层将按照在传入Sequential的顺序依次被添加到计算图中。

In [5]:
from collections import OrderedDict    #有序字典
net=nn.Sequential(
    OrderedDict([
        ('flatten',FlattenLayer()),
        ('linear',nn.Linear(num_inputs,num_outputs))
    ])
)
# softmax(torch.mm(X.view(-1,num_inputs),W)+b)
print(net)
print([param for param in net.parameters()],[param.shape for param in net.parameters()])

Sequential(
  (flatten): FlattenLayer()
  (linear): Linear(in_features=784, out_features=10, bias=True)
)
[Parameter containing:
tensor([[ 0.0010,  0.0273, -0.0051,  ..., -0.0028,  0.0262,  0.0083],
        [-0.0255, -0.0110,  0.0096,  ..., -0.0072, -0.0285, -0.0322],
        [-0.0191,  0.0177, -0.0349,  ..., -0.0231, -0.0057,  0.0244],
        ...,
        [ 0.0150,  0.0349, -0.0279,  ..., -0.0260, -0.0208, -0.0297],
        [ 0.0017, -0.0107, -0.0057,  ...,  0.0300,  0.0073,  0.0058],
        [-0.0175, -0.0054, -0.0231,  ..., -0.0009, -0.0231, -0.0198]],
       requires_grad=True), Parameter containing:
tensor([-0.0341, -0.0022, -0.0195,  0.0226,  0.0188, -0.0326, -0.0317,  0.0012,
        -0.0049,  0.0255], requires_grad=True)] [torch.Size([10, 784]), torch.Size([10])]


In [6]:
#使用均值为0、标准差为0.01的正态分布随机初始化模型的权重参数

init.normal_(net.linear.weight,mean=0,std=0.01)
init.constant_(net.linear.bias,val=0)
print(net.linear.weight.shape)

torch.Size([10, 784])


## 3.7.3 softmax和交叉熵损失函数

In [7]:
#分开定义softmax运算和交叉熵损失函数会造成数值不稳定

loss=nn.CrossEntropyLoss()#softmax运算和交叉熵损失计算的函数

## 3.7.4 定义优化算法

In [8]:
#学习率为0.1的小批量随机梯度下降
optimizer=torch.optim.SGD(net.parameters(),lr=0.1)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.1
    momentum: 0
    nesterov: False
    weight_decay: 0
)


## 3.7.5 训练模型

In [9]:
num_epochs=5
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,None,None,optimizer)

epoch 1, loss 0.0031, train acc 0.747, test acc 0.787
epoch 2, loss 0.0022, train acc 0.811, test acc 0.806
epoch 3, loss 0.0021, train acc 0.826, test acc 0.812
epoch 4, loss 0.0020, train acc 0.833, test acc 0.818
epoch 5, loss 0.0019, train acc 0.837, test acc 0.754
