In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
import time

In [2]:
train_dataset = datasets.MNIST(root='./data',
                               train=True,
                              transform=transforms.ToTensor(),
                              download=True)
test_dataset = datasets.MNIST(root='./data',
                               train=False,
                              transform=transforms.ToTensor(),
                              download=True)

In [3]:
batch_size = 100
epochs = 5
iterations = epochs * len(train_dataset)/batch_size
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size,
                                          shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size,
                                          shuffle=False)

In [39]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        self.hidden_dim = hidden_dim  #No. of hidden dimensions
        self.layer_dim = layer_dim  #No. of hidden layers    
        self.rnn = nn.RNN(input_dim,hidden_dim,layer_dim, batch_first=True, nonlinearity='relu')
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        #hidden -> (layer dim, batch dim, hidden dim)
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:,-1,:])
        return out

In [80]:
input_dim = 28
hidden_dim = 100
layer_dim = 2
output_dim = 10
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
model.cuda()

criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


In [81]:
len(list(model.parameters()))

10

In [82]:
param=[]
for iparam in list(model.parameters()):
    print(iparam.shape)
    param.append(iparam)

torch.Size([100, 28])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([100, 100])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])


In [83]:
param[0]

Parameter containing:
tensor([[ 0.0541, -0.0383, -0.0616,  ..., -0.0385,  0.0623,  0.0840],
        [ 0.0446, -0.0623,  0.0652,  ..., -0.0944, -0.0854,  0.0564],
        [-0.0670,  0.0710, -0.0507,  ...,  0.0920,  0.0675, -0.0261],
        ...,
        [ 0.0919,  0.0441, -0.0931,  ..., -0.0791,  0.0547,  0.0045],
        [ 0.0427, -0.0990, -0.0832,  ..., -0.0203,  0.0399,  0.0945],
        [-0.0271,  0.0473, -0.0990,  ..., -0.0747,  0.0239,  0.0899]],
       device='cuda:0', requires_grad=True)

In [84]:
seq_dim =28
itern = 0
start_time = time.time()
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        images = Variable(images.view(-1, seq_dim, input_dim).cuda())
        labels = Variable(labels.cuda())
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        loss_val = loss.data.cpu().numpy().reshape(1)[0]
        loss.backward()
        
        optimizer.step()
        
        itern += 1
        if itern%500 ==0:
            correct = 0
            total = 0
            for test_images, test_labels in test_loader:
                test_images = Variable(test_images.view(-1, seq_dim, input_dim).cuda())
                test_preds = model(test_images)
                _, predicted = torch.max(test_preds.data, 1)  
                predicted = predicted.cpu()
                total +=  test_labels.size(0)
                correct += (predicted == test_labels).sum()
            correct = correct.numpy().reshape(1)[0]
            accuracy = 100 * correct/total    
            print('Epoch {}, Iter {}, Loss {}, Accuracy {}'.format(epoch, itern,loss_val, accuracy))
print('Training time {}'.format(time.time() - start_time))

Epoch 0, Iter 500, Loss 2.2933075428009033, Accuracy 11.35
Epoch 1, Iter 1000, Loss 2.2921652793884277, Accuracy 11.35
Epoch 2, Iter 1500, Loss 2.293006420135498, Accuracy 13.38
Epoch 3, Iter 2000, Loss 2.2184805870056152, Accuracy 32.34
Epoch 4, Iter 2500, Loss 1.1155391931533813, Accuracy 66.43
Epoch 4, Iter 3000, Loss 0.6878959536552429, Accuracy 76.03
Training time 74.6597044467926
