In [13]:
import torch 
import torch.nn as nn 
import torchvision.datasets as dsets 
import torchvision.transforms as transforms 
from torch.autograd import Variable
import numpy as np 
import matplotlib.pyplot as plt  

In [3]:
train_dataset = dsets.MNIST(root = './data', train = True, transform = transforms.ToTensor(), download = False)
test_dataset = dsets.MNIST(root = './data', train = False, transform= transforms.ToTensor(), download = False)

In [9]:
n_iters = 3000 
batch_size = 100 
num_epoch = n_iters/(len(train_dataset)/batch_size)
num_epoch = int(num_epoch)
train_loader = torch.utils.data.DataLoader(dataset= train_dataset,batch_size= batch_size, shuffle= True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size= batch_size, shuffle = True)

In [7]:
class RNNmodel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNmodel,self).__init__()
        self.hidden_dim = hidden_dim 
        self.layer_dim = layer_dim 
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first = True, nonlinearity = 'relu')
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_dim,x.size(0), self.hidden_dim))
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:,-1,:])
        return out 

In [8]:
criterion = nn.CrossEntropyLoss()

In [12]:
learning_rate = 0.1 
input_dim = 28 
layer_dim = 1 
hidden_dim = 100 
output_dim = 10
model = RNNmodel(input_dim, hidden_dim, layer_dim, output_dim)
optimizer = torch.optim.SGD(model.parameters(), lr =learning_rate)

In [17]:
seq_dim = 28 
iters = 0
for epoch in range(num_epoch):
    for i,(images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1,seq_dim, input_dim))
        labels = Variable(labels)
        optimizer.zero_grad() 
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iters += 1 
        if iters%500 == 0:
            total = 0 
            correct = 0
            for images, labels in train_loader:
                images = Variable(images.view(-1,seq_dim,input_dim))
                labels = Variable(labels)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                correct += (predicted == labels).sum()
                total += labels.size(0)
            accuracy = 100* correct/ total 
            print("iterations : {}, accuracy : {}".format(iters,accuracy))

iterations : 500, accuracy : 9
iterations : 1000, accuracy : 9
iterations : 1500, accuracy : 9
iterations : 2000, accuracy : 9
iterations : 2500, accuracy : 9
iterations : 3000, accuracy : 9


In [16]:
# Number of steps to unroll
seq_dim = 28  

iter = 0
for epoch in range(num_epoch):
    for i, (images, labels) in enumerate(train_loader):
        # Load images as a tensor with gradient accumulation abilities
        images = images.view(-1, seq_dim, input_dim).requires_grad_()
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        # outputs.size() --> 100, 10
        outputs = model(images)
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
        iter += 1
        
        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                # Load images to a tensor with gradient accumulation abilities
                images = images.view(-1, seq_dim, input_dim).requires_grad_()
                
                # Forward pass only to get logits/output
                outputs = model(images)
                
                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                # Total number of labels
                total += labels.size(0)
                
                # Total correct predictions
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Iteration: 500. Loss: 0.8984208106994629. Accuracy: 67
Iteration: 1000. Loss: 0.6864016056060791. Accuracy: 86
Iteration: 1500. Loss: nan. Accuracy: 9
Iteration: 2000. Loss: nan. Accuracy: 9
Iteration: 2500. Loss: nan. Accuracy: 9
Iteration: 3000. Loss: nan. Accuracy: 9
