In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# 1. Hyper-parameters and Dataset

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 2
learning_rate = 0.003

In [3]:
from google.colab import drive
drive.mount('/content/drive')
data_dir = '/content/drive/My Drive/PyTorch/Github_Series/02-intermediate/'

Mounted at /content/drive


In [4]:
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root=data_dir,
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root=data_dir,
                                          train=False,
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

# 2. Modeling and Training

**Implementation** \\
The dimensions of the parameters of each layer can be refered to [the documentation of LSTM](https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html).

In [7]:
# Bidirectional recurrent neural network (many-to-one)
class BiRNN(nn.Module):

  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super().__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.bi_lstm = nn.LSTM(input_size, hidden_size, num_layers,
                           batch_first=True, bidirectional=True)
    self.fc = nn.Linear(2*hidden_size, num_classes)   # 2 for bidirection

  def forward(self, x):
    # Set initial hidden and cell states 
    h0 = torch.zeros(2*self.num_layers, x.size(0), self.hidden_size).to(device)
    c0 = torch.zeros(2*self.num_layers, x.size(0), self.hidden_size).to(device)

    # Forward propagate LSTM
    out, _ = self.bi_lstm(x, (h0, c0))   
    
    # Decode the hidden state of the last time step
    out = self.fc(out[:,-1,:])  # shape of out: (batch_size, sequence_length, 2*hidden_size)
    return out

In [8]:
model = BiRNN(input_size, hidden_size, num_layers, num_classes).to(device)

# loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
  for batch_id, (images, labels) in enumerate(train_loader):
    input = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)

    # Feedforward
    output = model(input)
    loss = loss_fn(output, labels)

    # Backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (batch_id + 1) % 100 == 0:
      print('Epoch: [{}/{}], Step: [{}/{}], Loss: {:.4f}'
            .format(epoch+1, num_epochs, batch_id+1, total_step, loss.item()))

Epoch: [1/2], Step: [100/600], Loss: 0.5016
Epoch: [1/2], Step: [200/600], Loss: 0.3830
Epoch: [1/2], Step: [300/600], Loss: 0.1203
Epoch: [1/2], Step: [400/600], Loss: 0.1547
Epoch: [1/2], Step: [500/600], Loss: 0.1318
Epoch: [1/2], Step: [600/600], Loss: 0.1186
Epoch: [2/2], Step: [100/600], Loss: 0.0786
Epoch: [2/2], Step: [200/600], Loss: 0.1329
Epoch: [2/2], Step: [300/600], Loss: 0.1073
Epoch: [2/2], Step: [400/600], Loss: 0.1544
Epoch: [2/2], Step: [500/600], Loss: 0.0749
Epoch: [2/2], Step: [600/600], Loss: 0.0425


# 3. Test the model

In [9]:
# Test the model
model.eval()
with torch.no_grad():
  total = 0
  correct = 0
  for images, labels in test_loader:
    input = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)
    output = model(input)
    _, pred = torch.max(output, dim=1)
    total += labels.size(0)
    correct += (pred == labels).sum()

  print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

Test Accuracy of the model on the 10000 test images: 97.77999877929688 %
