In [1]:
import torch 
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [11]:
# image shape: (h, w)

input_size = 28   # w
seq_len = 28      # h
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.0001
batch_size = 64
num_epochs = 5

In [5]:
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [6]:
# Bidirectional RNN
class BRNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(BRNN, self).__init__()
    self.hidden_size = hidden_size
    self.input_size = input_size
    self.num_layers = num_layers
    self.num_classes = num_classes

    self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
    self.fc = nn.Linear(hidden_size*2, num_classes)

  def forward(self, x):
    # shape of x: (batch_size, seq_len, input_size)
    h = torch.zeros(2*self.num_layers, x.shape[0], self.hidden_size).to(device)
    c = torch.zeros(2*self.num_layers, x.shape[0], self.hidden_size).to(device) 
    # shape of h,c => (2*num_layers, batch_size, hidden_size)

    out, _ = self.rnn(x, (h,c))
    # shape of out: (batch_size, seq_len, hidden_size * 2)

    out = out[:, -1, :]   # ((batch_size, hidden_size * 2))
    
    out = self.fc(out)

    return out # (batch_size, num_classes)

In [12]:
X = torch.rand((batch_size, seq_len, input_size), device=device)
model = BRNN(input_size, hidden_size, num_layers, num_classes).to(device)
output = model(X)
print(output.shape)

torch.Size([64, 10])


In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [14]:
# train
for epoch in range(num_epochs):
  running_loss = 0.0
  for batch_idx, (data, targets) in enumerate(train_loader):
    # shape of data: (batch_size, channels, seq_len, input_size) => (batch_size, 1, height, width)

    data = data.to(device).squeeze(1)
    # shape of data after squeezing : (batch_size, seq_len, input_size) => (batch_size, height, width)
    targets = targets.to(device)    # shape : (batch_size)

    outputs = model(data)   # shape: (batch_size, num_classes)

    loss = criterion(outputs, targets)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    running_loss += loss.detach().cpu().item()

  print(f'Epoch: {epoch+1}/{num_epochs} Loss: {running_loss / len(train_loader):.6f}')

Epoch: 1/5 Loss: 0.764424
Epoch: 2/5 Loss: 0.223170
Epoch: 3/5 Loss: 0.156726
Epoch: 4/5 Loss: 0.124093
Epoch: 5/5 Loss: 0.100942


In [15]:
# check accuracy
def check_accuracy(loader, model):
  if loader.dataset.train:
    print ('Checking accuracy on training data')
  else:
    print ('Checking accuracy on test data')
  num_correct = 0
  num_samples = 0
  model.eval()

  with torch.no_grad():
    for x, y in loader:
      x = x.squeeze(1).to(device = device) # (batch_size, seq_len, input_size)
      y = y.to(device = device) # (batch_size)

      scores = model(x) # (batch_size, num_classes)
      _, pred = scores.max(1)
      num_correct += (pred == y).sum()
      num_samples += pred.size(0)

    print (f'Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100 :.2f}')
  
  model.train()

In [16]:
check_accuracy(train_loader, model)
check_accuracy(test_loader,model)

Checking accuracy on training data
Got 58530/60000 with accuracy 97.55
Checking accuracy on test data
Got 9724/10000 with accuracy 97.24
