<a href="https://colab.research.google.com/github/kennycaiguo/-LearnPytorchWithColab/blob/main/torch_tutorial_lesson3_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# class NN(nn.Module):
#     def __init__(self, input_size, num_classes):
#         super(NN, self).__init__()
#         self.fc1 = nn.Linear(input_size, 50)
#         self.fc2 = nn.Linear(50, num_classes)

#     def forward(self,x):
#       x = F.relu(self.fc1(x))
#       x = self.fc2(x)
#       return x




# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# print(device)
#超参数设置
input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learnng_rate = 0.001

batch_size = 64
num_epochs = 2

# 创建双向LSTM
class BRNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers, num_classes):
        super(BRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size,hidden_size,num_layers,batch_first=True,
                            bidirectional=True)
        self.fc = nn.Linear(hidden_size*2,num_classes)

    def forward(self,x):
      h0 = torch.zeros(self.num_layers*2,x.size(0),self.hidden_size).to(device)
      c0 = torch.zeros(self.num_layers*2,x.size(0),self.hidden_size).to(device)
      out,_ = self.lstm(x,(h0,c0))  # nn.LSTM()
      out = self.fc(out[:,-1,:])

      return out




# 加载数据集
train_dataset = datasets.MNIST(root="dataset/",train=True,transform=transforms.ToTensor(),download=True)
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)

test_dataset = datasets.MNIST(root="dataset/",train=False,transform=transforms.ToTensor(),download=True)
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)
# 初始化神经网络
model = BRNN(input_size,hidden_size,num_layers, num_classes).to(device)

#损失函数和优化器
criteron = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learnng_rate)

#训练模型
for epoch in range(num_epochs):
  for batch_idx,(data,targets) in enumerate(train_loader):
    # 把数据发送到cuda(如果可用)
    data=data.to(device=device).squeeze(1)
    targets=targets.to(device=device)
    # print(data.shape)
    # forward
    scores = model(data)
    # loss
    loss = criteron(scores,targets)
    # 反向传播
    optimizer.zero_grad() # 清空优化器的梯度缓存
    loss.backward()

    #梯度下降炒操作
    optimizer.step()

#模型验证,检验准确率
def check_accuracy(loader,model):
  if loader.dataset.train:
    print("Checking accuracy on Train data")
  else:
    print("Checking accuracy on Test data")
  num_correct = 0
  num_samples = 0
  model.eval()

  with torch.no_grad():
    for x,y in loader:
      x = x.to(device=device)
      y = y.to(device=device)
      # The input to the RNN should be 3D (batch_size, sequence_length, input_size)
      # and the MNIST data is (batch_size, 1, 28, 28).
      # We need to remove the channel dimension and treat the 28x28 image as a sequence of 28 vectors of size 28.
      x = x.squeeze(1)


      scores = model(x)
      _,predictions = scores.max(1)
      num_correct += (predictions==y).sum()
      num_samples += predictions.size(0)
    print(f"got {num_correct}/{num_samples} with accuracy:{float(num_correct)/float(num_samples)*100:.2f}")

  model.train()
  return num_correct / num_samples

check_accuracy(train_loader,model)
check_accuracy(test_loader,model)


100%|██████████| 9.91M/9.91M [00:00<00:00, 17.7MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 475kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.46MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.68MB/s]


Checking accuracy on Train data
got 58597/60000 with accuracy:97.66
Checking accuracy on Test data
got 9725/10000 with accuracy:97.25


tensor(0.9725, device='cuda:0')