# **Implementation of built-in RNN in Pytorch for Digit classification on MNIST dataset**

In [32]:
import torch
import torch.nn as nn
import torchvision
import torchvision 
import torchvision.transforms as transforms

In [34]:
#device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [35]:
#hyperparameters
num_classes = 10
input_size = 28
sequence_length = 28
hidden_size = 128 #so you can try out different sizes here
num_layers = 2 #stacking two rnns together and 2 rnn takes input from 1 so this improves our model
num_epochs = 2
batch_size =100
learning_rate = 0.001

In [36]:
#MINST dataset
#traindataset
train_dataset = torchvision.datasets.MNIST(root ='./data',
                                           train = True,
                                           transform = transforms.ToTensor(),
                                           download = True)

In [37]:
#MINST dataset
#testdataset
test_dataset = torchvision.datasets.MNIST(root ='./data',
                                           train = False,
                                           transform = transforms.ToTensor())

In [38]:
#Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                          batch_size =batch_size,
                                          shuffle = True)

In [39]:
#fully connected RNN with one hidden layer
class RNN(nn.Module):
  def __init__(self,input_size,hidden_size, num_layers, num_classes):
    super(RNN, self).__init__()
    #start the number of layer and the hidden size
    self.num_layers =num_layers
    self.hidden_size = hidden_size
    # use builtin rnn model
    self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first = True) 
    # input needs to have shape (batch_size, seq , input_size)

    # or:
    #self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
    #self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
    #creating fully connected linear layer input size hidden size, outpu size, num_classes   
    self.fc = nn.Linear(hidden_size, num_classes)


  def forward(self, x):
    # Set initial hidden states h0 (and cell states for LSTM)
    #create a tensor with 0, num_layers, batch size, hiddensize and 
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
   # c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
 
    out, _ = self.rnn(x,h0)
    #ouput: batch_size, seq_length, hidden_size 
    #out (N, 28, 128)
    out = out[:,-1,:]
    #out(N, 128)
    out = self.fc(out)
    return(out)



In [40]:
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

In [41]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

In [42]:
# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # origin shape: [N, 1, 28, 28]
        # resized: [N, 28, 28]
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)

        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')



Epoch [1/2], Step [100/600], Loss: 0.8073
Epoch [1/2], Step [200/600], Loss: 0.6427
Epoch [1/2], Step [300/600], Loss: 0.5445
Epoch [1/2], Step [400/600], Loss: 0.6915
Epoch [1/2], Step [500/600], Loss: 0.3976
Epoch [1/2], Step [600/600], Loss: 0.4456
Epoch [2/2], Step [100/600], Loss: 0.2698
Epoch [2/2], Step [200/600], Loss: 0.3522
Epoch [2/2], Step [300/600], Loss: 0.1961
Epoch [2/2], Step [400/600], Loss: 0.3390
Epoch [2/2], Step [500/600], Loss: 0.2441
Epoch [2/2], Step [600/600], Loss: 0.3292


In [43]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 92.21 %


In [45]:
print(labels)

tensor([8, 9, 4, 3, 2, 6, 5, 5, 2, 5, 2, 8, 3, 0, 4, 0, 0, 0, 1, 3, 4, 4, 5, 2,
        3, 4, 6, 3, 9, 2, 4, 2, 6, 5, 8, 3, 2, 8, 9, 4, 4, 0, 7, 7, 1, 1, 9, 6,
        5, 6, 8, 8, 6, 7, 2, 6, 6, 5, 8, 6, 5, 4, 4, 7, 0, 4, 3, 1, 0, 2, 2, 7,
        7, 2, 0, 2, 1, 5, 0, 6, 4, 7, 2, 0, 7, 1, 0, 3, 7, 3, 6, 3, 2, 9, 3, 1,
        2, 4, 1, 3])


In [46]:
print(outputs)

tensor([[-5.7448e-01, -1.7802e+00,  5.3725e-02,  1.9971e-01, -2.1774e+00,
          1.8993e+00, -5.1161e+00,  2.2918e+00,  6.3161e+00,  1.9537e+00],
        [-1.9496e+00, -5.9289e+00, -4.1073e+00, -7.1174e-01,  2.0318e+00,
          4.3925e+00, -3.5486e+00,  9.2704e-01,  2.7550e-01,  4.8593e+00],
        [-3.3459e+00, -2.9552e+00, -2.9616e+00, -3.5457e+00,  3.9183e+00,
          1.1560e+00, -2.5586e+00,  4.6535e-01,  1.5720e+00,  5.8636e+00],
        [-3.6531e+00,  7.1243e-01,  5.1045e-01,  8.2719e+00, -1.6936e+00,
          1.7024e+00, -5.5046e+00,  5.0272e-01,  6.5211e-01,  3.5141e-01],
        [ 5.9301e-01,  9.4463e-01,  7.6263e+00,  2.4947e+00, -5.6129e+00,
          1.7633e-01, -2.7394e+00,  1.7234e+00,  1.2147e+00, -3.0250e+00],
        [ 1.0247e+00, -1.4111e+00,  1.2491e-01, -7.2216e+00,  3.0961e+00,
         -8.4544e-01,  5.6029e+00, -1.1623e+00, -3.1836e-01, -5.9234e-01],
        [-1.1724e+00, -5.6222e+00, -3.4732e+00,  1.7602e+00,  4.1406e-01,
          6.6424e+00, -4.0122e+0