Importing Libraries 

In [1]:
import torch
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torchvision.transforms import transforms

Hyperparameter Declaration

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Declaring Hyper-parameters
input_size = 28
sequence_len = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 10

Defining RNN

In [3]:
class RNN(nn.Module):
   
   def __init__(self, input_size, hidden_size, num_layers, num_classes, sequence_len):
      super(RNN, self).__init__()
      self.hidden_size = hidden_size
      self.num_layers = num_layers
      self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
      self.fc = nn.Linear(hidden_size*sequence_len, num_classes)
   
   def forward(self, data):
      h0 = torch.zeros(self.num_layers, data.size(0), self.hidden_size).to(device)
      c0 = torch.zeros(self.num_layers, data.size(0), self.hidden_size).to(device)
      
      out, _ = self.lstm(data, (h0, c0))
      out = out.reshape(out.shape[0], -1)
      out = self.fc(out)
      return out

Loading MNIST Dataset

In [4]:

train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



Training the Model

In [5]:

model = RNN(input_size, hidden_size, num_layers, num_classes, sequence_len).to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


ep = 1
for epoch in tqdm(range(num_epochs), desc=f'Training model for epoch {ep}/{num_epochs}', total=num_epochs):
   for batch_idx, (data, target) in enumerate(train_loader):
      data = data.to(device).squeeze(1)
      targets = target.to(device)
      scores = model(data)
      loss = criterion(scores, targets)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
   print(f'epoch: {epoch + 1} step: {batch_idx + 1}/{len(train_loader)} loss: {loss}')
   ep += 1

Training model for epoch 1/10:  10%|█         | 1/10 [00:08<01:15,  8.43s/it]

epoch: 1 step: 938/938 loss: 0.1090955063700676


Training model for epoch 1/10:  20%|██        | 2/10 [00:16<01:06,  8.34s/it]

epoch: 2 step: 938/938 loss: 0.054485347121953964


Training model for epoch 1/10:  30%|███       | 3/10 [00:24<00:58,  8.29s/it]

epoch: 3 step: 938/938 loss: 0.07047758251428604


Training model for epoch 1/10:  40%|████      | 4/10 [00:33<00:49,  8.28s/it]

epoch: 4 step: 938/938 loss: 0.014325334690511227


Training model for epoch 1/10:  50%|█████     | 5/10 [00:41<00:41,  8.40s/it]

epoch: 5 step: 938/938 loss: 0.00046906518400646746


Training model for epoch 1/10:  60%|██████    | 6/10 [00:50<00:33,  8.34s/it]

epoch: 6 step: 938/938 loss: 0.07154867053031921


Training model for epoch 1/10:  70%|███████   | 7/10 [00:58<00:24,  8.29s/it]

epoch: 7 step: 938/938 loss: 0.12261276692152023


Training model for epoch 1/10:  80%|████████  | 8/10 [01:06<00:16,  8.29s/it]

epoch: 8 step: 938/938 loss: 0.03354686498641968


Training model for epoch 1/10:  90%|█████████ | 9/10 [01:14<00:08,  8.28s/it]

epoch: 9 step: 938/938 loss: 0.0003564717771951109


Training model for epoch 1/10: 100%|██████████| 10/10 [01:23<00:00,  8.30s/it]

epoch: 10 step: 938/938 loss: 0.0025204652920365334





Testing The Model

In [6]:
# Evaluating our RNN model
def check_accuracy(loader, modelrnn):
   if loader.dataset.train:
      print('Checking accuracy on training data')
   else:
      print('Checking accuracy on test data')
   
   num_correct = 0
   num_samples = 0
   modelrnn.eval()
   with torch.no_grad():
      for x,y in loader:
         x = x.to(device).squeeze(1)
         y = y.to(device)
         score = modelrnn(x)
         _, predictions = score.max(1)
         num_correct += (predictions == y).sum()
         num_samples += predictions.size(0)
      
      print(f'Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
   model.train()
   

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on training data
Got 59802/60000 with accuracy 99.67
Checking accuracy on test data
Got 9911/10000 with accuracy 99.11
