In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datas
from torch.autograd import Variable
import os

os.environ["CUDA_VISIBLE_DEVICES"] = '0'

In [2]:
# Load data
train_dataset = datas.MNIST(root='./data',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

test_dataset = datas.MNIST(root='./data',
                       train=False,
                       transform=transforms.ToTensor())

In [3]:
# make dataset iterable
batch_size= 100
n_iters=3000
num_epochs= int(n_iters/ (len(train_dataset)/ batch_size))

train_loader= torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size, 
                                          shuffle=True)

test_loader= torch.utils.data.DataLoader(dataset=test_dataset,
                                        batch_size=batch_size,
                                        shuffle=False)



In [9]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, ouput_dim):
        super(LSTMModel, self).__init__()
        
        #hidden dimensions
        self.hidden_dim= hidden_dim
        
        # Number of hidden layers 
        self.layer_dim= layer_dim
        
        #building our RNN
        # batch first =true causes input/output tensors to be of shape
        #(batch_dim, seq_dim, input_dim)
        
        self.lstm= nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        
        #readout layer
        self.fc=nn.Linear(hidden_dim,output_dim)
        
    def forward(self, x):
        #initialize hidden state with zeros
        # (layer_dim, batch_size, hidden_dim)
        
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        
        c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        
        #one time step
        out, (hn,cn)= self.lstm(x,( h0,c0))
        
        #index hidden state of last time step
        # out.size() --> 100 ,28,100
        out= self.fc(out[:,-1,:])  # we only want the last dimension 
        #out.size()--> 100,10  
        return out
        


In [10]:
# instantiate model class
input_dim=28
hidden_dim=100
layer_dim=1  # we can incrase the layer to have more hidden layer
output_dim=10 

In [11]:
model=LSTMModel(input_dim, hidden_dim, layer_dim,output_dim)

criterion=  nn.CrossEntropyLoss()

In [12]:
lr=0.00000001
optimizer = torch.optim.SGD(model.parameters(),lr=lr)

In [None]:
seq_dim=28

iter=0

for epoch in range (num_epochs):
    for i ,(images, labels) in enumerate(train_loader):
        images= Variable(images.view(-1,seq_dim,input_dim))
        labels= Variable(labels)
        
        optimizer.zero_grad() #clear gradient w.r.t parameters
        
        outputs= model(images)  # 100 to 10 forward pass to get output
        
        loss= criterion( outputs, labels)
        
        loss.backward()
#         clipped_lr = lr * clip_gradient(model, clip)
#         for p in model.parameters():
#             p.data.add_(-clipped_lr, p.grad.data)
        torch.nn.utils.clip_grad_norm(model.parameters(), 1.0)
        
        optimizer.step()
        
        iter +=1
        
        if iter %500 == 0:
        
        #calculate accuracy
            correct = 0
            total=0
            for images ,labels in test_loader:
                    images =Variable(images.view(-1,seq_dim,input_dim))
                    outputs= model(images)

                    _,predicted= torch.max(outputs.data,1)
                    total += labels.size(0)
                    correct +=(predicted == labels).sum()

            accuracy = 100* correct / total

            print('Iteration: {}. loss: {}. accuracy: {}'.format(iter,loss.item(),accuracy))







Iteration: 500. loss: 2.2964017391204834. accuracy: 4
Iteration: 1000. loss: 2.3134865760803223. accuracy: 4
Iteration: 1500. loss: 2.3141684532165527. accuracy: 4
Iteration: 2000. loss: 2.31162166595459. accuracy: 4
