## Recurrent Neural Networks

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable

# Model A = 1 hidden Layer, ReLU activation

## Step 1 - Loading the dataset

In [2]:
train_dataset = datasets.MNIST(root='./data',
                              train = True,
                              transform=transforms.ToTensor(),
                              download = True)
test_dataset = datasets.MNIST(root='./data',
                              train = False,
                              transform=transforms.ToTensor())

In [3]:
print(train_dataset.train_data.size())

torch.Size([60000, 28, 28])


In [4]:
print(test_dataset.test_labels.size())

torch.Size([10000])


In [5]:
print(np.unique(train_dataset.train_labels.numpy()))

[0 1 2 3 4 5 6 7 8 9]


In [6]:
print(test_dataset.test_data.size())

torch.Size([10000, 28, 28])


## Step 2 - Making the dataset iterable

In [7]:
batch_size = 100
n_iters = 3000
num_epochs = n_iters/(len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                          batch_size = batch_size,
                                          shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                         batch_size = batch_size,
                                         shuffle = False)

## Step 3 - Create model class

In [8]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first = True, nonlinearity='relu')
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:,-1,:])
        return out

## Step 4 = Instantiate Model Class

In [9]:
# 28 time steps
# 1 hidden unit
# x layer dim
# 10 classes (MNIST digits)
input_dim = 28
hidden_dim = 100
layer_dim = 1
output_dim = 10

In [10]:
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
model

RNNModel (
  (rnn): RNN(28, 100, batch_first=True)
  (fc): Linear (100 -> 10)
)

## Step 5 - Instantiate Loss Class

In [11]:
criterion = nn.CrossEntropyLoss()

## Step 6 - Instantiate Optimizer Class

In [12]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [13]:
len(list(model.parameters()))

6

In [14]:
# Parameters 
# 1 - Input to hidden - A1, B1
# 2 - Hidden to output - A2, B2
# 3 - Hidden to hidden - A3, B3

In [15]:
# Matrix A1
list(model.parameters())[0].size()

torch.Size([100, 28])

In [16]:
# Hidden A3
list(model.parameters())[1].size()

torch.Size([100, 100])

In [17]:
# Bias B1
list(model.parameters())[2].size()

torch.Size([100])

In [18]:
# Bias B3
list(model.parameters())[3].size()

torch.Size([100])

In [19]:
# Hidden --> Output A2
list(model.parameters())[4].size()

torch.Size([10, 100])

In [20]:
# Hidden --> Output Bias B2
list(model.parameters())[5].size()

torch.Size([10])

# Step 7 - Train Model

In [21]:
## Number of steps to unroll
seq_dim = 28
iter = 0

for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        
        optimizer.zero_grad() 
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        iter +=1
        
        if iter % 500 ==0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, seq_dim, input_dim))
                outputs = model(images)
                _, predicted = torch.max(outputs.data,1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct/total
            print('Iterations: {}. Loss: {}. Accuracy: {}'.format(iter,loss.data[0],accuracy))

Iterations: 500. Loss: 1.2017903327941895. Accuracy: 56.82
Iterations: 1000. Loss: 0.8404896259307861. Accuracy: 63.28
Iterations: 1500. Loss: nan. Accuracy: 9.8
Iterations: 2000. Loss: nan. Accuracy: 9.8
Iterations: 2500. Loss: nan. Accuracy: 9.8
Iterations: 3000. Loss: nan. Accuracy: 9.8


# Model B = 2 hidden Layer, Tanh activation

In [22]:
input_dim = 28
hidden_dim = 100
layer_dim = 2
output_dim = 10

In [36]:
class RNNModel2(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel2, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first = True, nonlinearity='tanh')
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        ## GPU
        #h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)).cuda()

        out, hn = self.rnn(x, h0)
        out = self.fc(out[:,-1,:])
        return out

In [31]:
model2 = RNNModel2(input_dim, hidden_dim, layer_dim, output_dim)
model2

RNNModel2 (
  (rnn): RNN(28, 100, num_layers=2, batch_first=True)
  (fc): Linear (100 -> 10)
)

In [37]:
## GPU
# model2.cuda()

In [32]:
criterion2 = nn.CrossEntropyLoss()
optimizer2 = torch.optim.SGD(model2.parameters(), lr = learning_rate)

In [33]:
len(list(model2.parameters()))

10

In [34]:
for i in range(len(list(model2.parameters()))):
    print(list(model2.parameters())[i].size())

torch.Size([100, 28])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([100, 100])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])


In [38]:
## Number of steps to unroll
seq_dim = 28
iter = 0

for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        ## GPU
        #images = Variable(images.view(-1, seq_dim, input_dim)).cuda()
        #labels = Variable(labels).cuda()
        
        
        optimizer2.zero_grad() 
        outputs = model2(images)
        loss = criterion2(outputs, labels)
        loss.backward()
        optimizer2.step()
        iter +=1
        
        if iter % 500 ==0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, seq_dim, input_dim))
                # GPU
                # images = Variable(images.view(-1, seq_dim, input_dim)).cuda()
                outputs = model2(images)
                _, predicted = torch.max(outputs.data,1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct/total
            print('Iterations: {}. Loss: {}. Accuracy: {}'.format(iter,loss.data[0],accuracy))

Iterations: 500. Loss: 0.15840470790863037. Accuracy: 95.41
Iterations: 1000. Loss: 0.12525184452533722. Accuracy: 95.66
Iterations: 1500. Loss: 0.1550762802362442. Accuracy: 96.12
Iterations: 2000. Loss: 0.15935739874839783. Accuracy: 96.15
Iterations: 2500. Loss: 0.010966230183839798. Accuracy: 97.3
Iterations: 3000. Loss: 0.05185171961784363. Accuracy: 97.06
