In [1]:
import numpy as np
from torch import nn,optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch

In [2]:
# 训练集
train_dataset = datasets.MNIST(root='./',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)
# 测试集
test_dataset = datasets.MNIST(root='./',
                               train=False,
                               transform=transforms.ToTensor(),
                               download=True)

In [3]:
# 批次大小  想想数据为为什么要分批
batch_size =64
# 装载训练集
train_loader = DataLoader(dataset=train_dataset,
                         batch_size=batch_size,
                         shuffle=True)
# 装载测试集
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=batch_size,
                         shuffle=True)

In [4]:
print(len(train_dataset))
print(len(test_dataset))
print(len(train_loader))
print(len(test_loader))

60000
10000
938
157


In [5]:
for i,data in enumerate(train_loader):
    inputs,lables = data
    print(inputs.shape)
    print(lables.shape)
    break

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [6]:
len(train_loader)

938

In [7]:
#定义网络结构
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM,self).__init__()
        #input_size:输入的特征大小
        #hidden_size,LSTM模块的数量
        #num_layer,LSTM的层数
        #LSTM默认input(seq_len,batch,feature) 即下面forward x 的输入类型 和运行lstm(x)的输出结果类型
        #batch_first=Ture 使得 input和output (batch,seq_len,feature)
        self.lstm = torch.nn.LSTM(
        input_size=28,
        hidden_size=64,
        num_layers=1,
        batch_first=True
        )
        
        self.out = torch.nn.Linear(in_features=64,out_features=10)
        self.softmax = torch.nn.Softmax(dim=1)
        

    def forward(self,x):
        #[batch,seq_len,feature]
        x = x.view(-1,28,28)
        #output[batch,seq_len,hidden_size]包含每个序列的输出结果
        #h_n:[num_layer,batch,hidden_size]只包含最后一个序列的输出结果
        #c_n:[num_layer,batch,hidden_size]只包含最后一个序列的输出结果
        output,(h_n,c_n) = self.lstm(x)
        output_in_last_timestep = h_n[-1,:,:]
        x = self.out(output_in_last_timestep)
        x = self.softmax(x)

        return x

In [8]:
LR = 0.0003 #Adam优化器的学习率一般设置的较低
#定义模型
model = LSTM()
#定义代价函数
mes_loss = nn.MSELoss()
#定义优化器
optimizer = optim.Adam(model.parameters(),LR)

In [9]:
#训练模型
def train():
    for i ,data in enumerate(train_loader):
        #获得一个批次的数据和标签
        inputs,lables = data
        #获得模型预测结果
        out = model(inputs) #[64,10]
        #把数据标签变成独热编码
        #[64]->[64,1]
        lables = lables.reshape(-1,1)
        #tensor.scatter(dim,index,scr)
        
        #dim:对那个维度进行独热编码
        #index:要将scr中的值放在tensor中的哪个位置
        #scr:插入index中的值
        one_hot = torch.zeros(inputs.shape[0],10).scatter(1,lables,1)
        #计算loss
        loss = mes_loss(out,one_hot)
        #梯度清零
        optimizer.zero_grad()
        #计算梯度
        loss.backward()
        #修改权值
        optimizer.step()

In [10]:
def test():
    correct=0
    for i,data in enumerate(test_loader):
        #获得一个批次的数据和标签
        inputs,lables = data
        #获得模型的预测结果 [64,10]
        out = model(inputs)
        #获得最大值以及最大值所在的位置
        _, predicted = torch.max(out,1)
        correct += (predicted==lables).sum()
        
    print("Test acc:{0}".format(correct.item()/len(test_dataset)))
    print(correct)
        

In [11]:
for epoch in range(10):
    print("epoch:",epoch)
    train()
    test()

epoch: 0
Test acc:0.8899
tensor(8899)
epoch: 1
Test acc:0.9211
tensor(9211)
epoch: 2


KeyboardInterrupt: 