## Recurrent Neural Networks

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable

# Model A = 1 hidden Layer, ReLU activation

## Step 1 - Loading the dataset

In [8]:
train_dataset = datasets.MNIST(root='./data',
                              train = True,
                              transform=transforms.ToTensor(),
                              download = True)
test_dataset = datasets.MNIST(root='./data',
                              train = False,
                              transform=transforms.ToTensor())

In [9]:
print(train_dataset.train_data.size())

torch.Size([60000, 28, 28])


In [10]:
print(test_dataset.test_labels.size())

torch.Size([10000])


In [11]:
print(np.unique(train_dataset.train_labels.numpy()))

[0 1 2 3 4 5 6 7 8 9]


In [12]:
print(test_dataset.test_data.size())

torch.Size([10000, 28, 28])


## Step 2 - Making the dataset iterable

In [13]:
batch_size = 100
n_iters = 3000
num_epochs = n_iters/(len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                          batch_size = batch_size,
                                          shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                         batch_size = batch_size,
                                         shuffle = False)

In [16]:
train_loader.dataset.train_labels


 5
 0
 4
⋮ 
 5
 6
 8
[torch.LongTensor of size 60000]

## Step 3 - Create model class

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first = True, nonlinearity='relu')
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:,-1,:])
        return out

## Step 4 = Instantiate Model Class

In [None]:
# 28 time steps
# 1 hidden unit
# x layer dim
# 10 classes (MNIST digits)
input_dim = 28
hidden_dim = 100
layer_dim = 1
output_dim = 10

In [None]:
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
model

## Step 5 - Instantiate Loss Class

In [None]:
criterion = nn.CrossEntropyLoss()

## Step 6 - Instantiate Optimizer Class

In [None]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [None]:
len(list(model.parameters()))

In [None]:
# Parameters 
# 1 - Input to hidden - A1, B1
# 2 - Hidden to output - A2, B2
# 3 - Hidden to hidden - A3, B3

In [None]:
# Matrix A1
list(model.parameters())[0].size()

In [None]:
# Hidden A3
list(model.parameters())[1].size()

In [None]:
# Bias B1
list(model.parameters())[2].size()

In [None]:
# Bias B3
list(model.parameters())[3].size()

In [None]:
# Hidden --> Output A2
list(model.parameters())[4].size()

In [None]:
# Hidden --> Output Bias B2
list(model.parameters())[5].size()

# Step 7 - Train Model

In [None]:
## Number of steps to unroll
seq_dim = 28
iter = 0

for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        
        optimizer.zero_grad() 
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter +=1
        
        if iter % 500 ==0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, seq_dim, input_dim))
                outputs = model(images)
                _, predicted = torch.max(outputs.data,1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct/total
            print('Iterations: {}. Loss: {}. Accuracy: {}'.format(iter,loss.data[0],accuracy))

# Model B = 2 hidden Layer, Tanh activation

In [None]:
input_dim = 28
hidden_dim = 100
layer_dim = 2
output_dim = 10

In [None]:
class RNNModel2(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel2, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first = True, nonlinearity='tanh')
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        ## GPU
        #h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)).cuda()

        out, hn = self.rnn(x, h0)
        out = self.fc(out[:,-1,:])
        return out

In [None]:
model2 = RNNModel2(input_dim, hidden_dim, layer_dim, output_dim)
model2

In [None]:
## GPU
# model2.cuda()

In [None]:
criterion2 = nn.CrossEntropyLoss()
optimizer2 = torch.optim.SGD(model2.parameters(), lr = learning_rate)

In [None]:
len(list(model2.parameters()))

In [None]:
for i in range(len(list(model2.parameters()))):
    print(list(model2.parameters())[i].size())

In [None]:
## Number of steps to unroll
seq_dim = 28
iter = 0

for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        ## GPU
        #images = Variable(images.view(-1, seq_dim, input_dim)).cuda()
        #labels = Variable(labels).cuda()
        
        
        optimizer2.zero_grad() 
        outputs = model2(images)
        loss = criterion2(outputs, labels)
        loss.backward()
        optimizer2.step()
        iter +=1
        
        if iter % 500 ==0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, seq_dim, input_dim))
                # GPU
                # images = Variable(images.view(-1, seq_dim, input_dim)).cuda()
                outputs = model2(images)
                _, predicted = torch.max(outputs.data,1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct/total
            print('Iterations: {}. Loss: {}. Accuracy: {}'.format(iter,loss.data[0],accuracy))