In [1]:
#导入包
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets,transforms
from torch.utils.data import DataLoader

In [2]:
#构建transform

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5,))
])

In [3]:
#下载，加载数据集

trainset = datasets.MNIST('data',train = True, download = True, transform = transform)
testset = datasets.MNIST('data',train = False, download = True, transform = transform)

device = 'cuda'
BATCH_SIZE = 128

train_loader = DataLoader(trainset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 16, pin_memory = True)
test_loader = DataLoader(testset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 16, pin_memory = True)

  cpuset_checked))


In [4]:
#构建网络模型

class RNN_Net(nn.Module):
    def __init__(self):
        super(RNN_Net,self).__init__()
        #图片 1*28*28
        self.hidden_dim = 128
        self.layer_dim = 3
        #(input_dim, hidden_dim, layer_dim)
        self.rnn = nn.RNN(28, 128, 3, batch_first=True, nonlinearity='relu')
        self.fc = nn.Linear(128, 10)
    def forward(self,x):
        # （layer_dim, batch_size, hidden_dim)
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
        # 分离隐藏状态，避免梯度爆炸
        out, hn = self.rnn(x, h0.detach().cuda())
        out = self.fc(out[:, -1, :])
        return out
#创建模型
net = RNN_Net().to(device)

In [5]:
#定义优化器和损失函数

criterion = nn.CrossEntropyLoss() # 交叉式损失函数
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # 优化器

In [None]:
#训练模型

EPOCH = 15

for epoch in range(EPOCH):
    train_loss = 0.0
    for i,(datas,labels) in enumerate(train_loader):
        #数据处理
        datas = datas.view(-1, 28, 28).requires_grad_().to(device)
        labels = labels.to(device)
        #梯度置零
        optimizer.zero_grad()
        #训练
        outputs = net(datas)
        #计算损失
        loss = criterion(outputs,labels)
        #反向传播
        loss.backward()
        #参数更新
        optimizer.step()
        
        train_loss += loss.item()
    
    print("Epoch :%d , Loss : %.3f" %(epoch+1, train_loss/len(train_loader.dataset)))

Epoch :1 , Loss : 0.018
Epoch :2 , Loss : 0.018
Epoch :3 , Loss : 0.017
Epoch :4 , Loss : 0.010
Epoch :5 , Loss : 0.005
Epoch :6 , Loss : 0.003
Epoch :7 , Loss : 0.002
Epoch :8 , Loss : 0.002
Epoch :9 , Loss : 0.002
Epoch :10 , Loss : 0.001
Epoch :11 , Loss : 0.001


In [None]:
# 测试

correct = 0
total = 0
with torch.no_grad():
    for i , (datas, labels) in enumerate(test_loader):
        datas = datas.view(-1, 28, 28).to(device)
        outputs = net(datas)
        _, predicted = torch.max(outputs.data, dim=1) # 第一个是值的张量，第二个是序号的张量
        total += labels.size(0)  
        correct += (predicted.cuda() == labels.cuda()).sum()
    print("Accuracy：{:.3f}%".format(correct / total * 100))