<a href="https://colab.research.google.com/github/sakurasakura1996/Pytorch-start-learning/blob/master/Dive_into_DL_pytorch_3_8_3_9_%E5%A4%9A%E5%B1%82%E6%84%9F%E7%9F%A5%E6%9C%BA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# 深度学习还是多层的神经网络，所以从多层感知机开始了解多层神经网络
# MLP多层感知机 本质上全连接层只是对数据做仿射变换，多个仿射变换的叠加还是仿射变换。解决办法是引入一个非线性变换，被称为激活函数
# relu   sigmod   tanh等等
# MLP多层感知机就是含有至少一个隐藏层的由全连接层组成的神经网络，且每个隐藏层的输出通过激活函数进行变换。

In [0]:
# 3.9 多层感知机的从零开始实现
import torch
import numpy as np
import torchvision
import torchvision.transforms as transforms


In [4]:
# 获取和读取数据
# 这里继续使用之前的fashion-mnist数据集，我们将使用多层感知机多图像进行分类
# 不妨再来记忆一下数据部分

mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',train=True,download=True,transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',train=False,download=True,transform=transforms.ToTensor())

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/Datasets/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/Datasets/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/Datasets/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/Datasets/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw
Processing...
Done!


In [0]:
batch_size = 256
train_iter = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=4)
test_iter = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size,shuffle=True,num_workers=4)

In [0]:
# 定义模型参数
num_inputs=28 *28
num_hiddens = 256
num_outputs = 10
W1 = torch.tensor(np.random.normal(0, 0.01,(num_inputs, num_hiddens)),dtype=torch.float)
b1 = torch.zeros(num_hiddens,dtype=torch.float)

W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)),dtype=torch.float)
b2 = torch.zeros(num_outputs,dtype=torch.float)

params = [W1, b1, W2, b2]    # 一定要记得给这些带训练的参数附上梯度属性
for param in params:
  param.requires_grad_(requires_grad=True)

In [0]:
# 定义激活函数
def relu(x):
  return torch.max(input=x, other=torch.tensor(0.0))   # 一定要记住这里面的运算都是以tensor张量进行的，所以不要用max，而且0也是tensor(0.0)

In [0]:
# 定义模型 同softmax一样，我们通过view函数将每张原始图像改成长度为 num_inputs的向量，然后我们实现多层感知机的计算
def net(X):
  X = X.view(-1, num_inputs)
  H = relu(torch.mm(X,W1)+b1)
  return torch.mm(H,W2)+b2

In [0]:
# 定义损失函数 由于是分类问题，那么还是用交叉熵损失函数吧
# 教程中说pytorch提供的包括softmax和交叉熵损失函数的函数可以得到更好的数值稳定性
loss = torch.nn.CrossEntropyLoss()     # 一定要熟悉nn包啊

In [0]:
def sgd(params, lr, batch_size):
  # params也就是网络中带训练的参数，是一个列表性质的
  for param in params:
    param.data -= lr * param.grad / batch_size

In [0]:
def evaluate_accuracy(data_iter, net):
  acc_sum = 0.0
  n = 0
  for X, y in data_iter:
    acc_sum += (net(X).argmax(dim=1)==y).float().sum().item()
    n += y.shape[0]
  return acc_sum / n

In [0]:
# 训练模型
num_epochs = 20
lr = 128
# 由于原书的mxnet中的损失函数再反向传播的时候中相对于沿batch维求和了，而pytorch默认的是求平均，所以用pytorch计算得到的loss比mxnet小很多
# 所以反向传播得到的梯度小很多，所以我们把学习率提高batch_size倍，。之所以这么大，是因为sgd函数里函数在更新的时候除以了batch_size，其实pytorch在计算loss时已经
# 除过一次了，sgd里卖弄应该不用除了
# 因为训练多层感知机和之前训练softmax回归的步骤一样，这里还是再写一遍，之前都是照着写，自己还是掌握不住
def train_net(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr, optimizer=None):
  for epoch in range(num_epochs):
    train_l_sum = 0.0
    train_acc_sum = 0.0
    n = 0
    for X,y in train_iter:
      y_hat=net(X)
      l = loss(y_hat, y).sum()

      #梯度清零 这里写的是通用的模板，因为可能使用了optimizer。
      if optimizer is not None:
        optimizer.zero_grad()
      elif params is not None and params[0].grad is not None:
        for param in params:
          param.grad.data.zero_()
      
      # 反向传播
      l.backward()
      if optimizer is None:
        sgd(params, lr, batch_size)
      else:
        optimizer.step()
      
      train_l_sum += l.item()
      train_acc_sum += (y_hat.argmax(dim=1)==y).float().sum().item()
      n += y.shape[0]
    test_acc = evaluate_accuracy(test_iter, net)
    print("epoch : %d , loss : %.4f, train acc : %.3f, test acc : %.3f" % (epoch+1, train_l_sum / n, train_acc_sum / n, test_acc ))

In [29]:
train_net(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch : 1 , loss : 0.0012, train acc : 0.888, test acc : 0.855
epoch : 2 , loss : 0.0011, train acc : 0.892, test acc : 0.840
epoch : 3 , loss : 0.0011, train acc : 0.895, test acc : 0.859
epoch : 4 , loss : 0.0011, train acc : 0.898, test acc : 0.848
epoch : 5 , loss : 0.0011, train acc : 0.900, test acc : 0.869
epoch : 6 , loss : 0.0010, train acc : 0.903, test acc : 0.876
epoch : 7 , loss : 0.0010, train acc : 0.904, test acc : 0.873
epoch : 8 , loss : 0.0010, train acc : 0.907, test acc : 0.869
epoch : 9 , loss : 0.0010, train acc : 0.910, test acc : 0.883
epoch : 10 , loss : 0.0009, train acc : 0.910, test acc : 0.856
epoch : 11 , loss : 0.0009, train acc : 0.912, test acc : 0.852
epoch : 12 , loss : 0.0009, train acc : 0.915, test acc : 0.885
epoch : 13 , loss : 0.0009, train acc : 0.916, test acc : 0.883
epoch : 14 , loss : 0.0009, train acc : 0.917, test acc : 0.875
epoch : 15 , loss : 0.0008, train acc : 0.919, test acc : 0.889
epoch : 16 , loss : 0.0008, train acc : 0.921, te

In [0]:
# 3.10 多层感知机的简洁实现
from torch import nn
from torch.nn import init


In [0]:
class FlattenLayer(nn.Module):
  def __init__(self):
    super(FlattenLayer, self).__init__()
  def forward(self, x):
    return x.view(x.shape[0], -1)

In [0]:
# 定义模型
net = nn.Sequential(
    FlattenLayer(),
    nn.Linear(num_inputs, num_hiddens),
    nn.ReLU(),
    nn.Linear(num_hiddens, num_outputs),
)
for params in net.parameters():
  init.normal_(params, mean=0, std=0.01)

In [35]:
# 读取数据并训练模型  上面已经有了
# batch_size = 256
# train_iter, test_iter 
# loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

train_net(net, train_iter, test_iter, loss, num_epochs,
batch_size, None, None, optimizer)

epoch : 1 , loss : 0.0031, train acc : 0.697, test acc : 0.769
epoch : 2 , loss : 0.0019, train acc : 0.821, test acc : 0.817
epoch : 3 , loss : 0.0016, train acc : 0.844, test acc : 0.825
epoch : 4 , loss : 0.0015, train acc : 0.854, test acc : 0.844
epoch : 5 , loss : 0.0014, train acc : 0.863, test acc : 0.852
epoch : 6 , loss : 0.0014, train acc : 0.873, test acc : 0.857
epoch : 7 , loss : 0.0013, train acc : 0.875, test acc : 0.833
epoch : 8 , loss : 0.0013, train acc : 0.880, test acc : 0.868
epoch : 9 , loss : 0.0012, train acc : 0.883, test acc : 0.848
epoch : 10 , loss : 0.0012, train acc : 0.886, test acc : 0.856
epoch : 11 , loss : 0.0012, train acc : 0.891, test acc : 0.867
epoch : 12 , loss : 0.0011, train acc : 0.894, test acc : 0.867
epoch : 13 , loss : 0.0011, train acc : 0.895, test acc : 0.875
epoch : 14 , loss : 0.0011, train acc : 0.898, test acc : 0.862
epoch : 15 , loss : 0.0011, train acc : 0.900, test acc : 0.853
epoch : 16 , loss : 0.0010, train acc : 0.902, te