In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
%cd gdrive/MyDrive/rnns

/content/gdrive/MyDrive/rnns


In [None]:
!ls

cpts  HiPPO.ipynb  __pycache__	   results	rnn.py		train.py
data  __init__.py  ramen_hippo.py  rnncells.py	terminal.ipynb	utils.py


In [7]:
!python rnn.py

Simple RNN size test: passed.
Gru RNN size test: passed.
LSTM RNN size test: passed.


In [None]:
!python train.py

Simple RNN size test: passed.
Gru RNN size test: passed.
LSTM RNN size test: passed.
Run 1/5: LSTM RNN initalised with 2 layers and 64 number of hidden neurons.
Epoch:1   Train[Loss:2.2835 Top1 Acc:0.3157  Top5 Acc:0.9027]
Epoch:1   Test[Loss:2.2832   Top1 Acc:0.3149   Top5 Acc:0.9045]
Epoch:2   Train[Loss:2.0713 Top1 Acc:0.5232  Top5 Acc:0.9262]
Epoch:2   Test[Loss:2.0713   Top1 Acc:0.5178   Top5 Acc:0.9211]
Epoch:3   Train[Loss:1.9415 Top1 Acc:0.6398  Top5 Acc:0.8663]
Epoch:3   Test[Loss:1.9385   Top1 Acc:0.6406   Top5 Acc:0.862]
Checkpoint and evaluation at epoch 3 stored


In [None]:
import torch
import torch.nn.functional as F
a = torch.tensor([[ 0.2191, -1.9177,  0.0654,  1.9643,  0.1032,  0.3341, -0.0420,  0.2283,
         -0.5080,  1.5319],
        [-0.3936, -1.4508, -0.7349,  1.1032, -0.6381,  1.2768, -0.5390, -0.6996,
          0.5231,  0.9895],
        [ 0.8200, -0.2018, -0.6257,  1.4932, -0.8290,  0.4215,  0.1613, -0.3268,
         -0.4961,  0.1400],
        [-0.4569, -1.6752, -0.0989,  0.9211,  0.6620,  0.2545, -0.5454, -0.1586,
          0.0389,  1.3933]])
y = torch.tensor([3, 5, 6, 6])

In [None]:
cp = F.softmax(a, dim = 1)
cp

tensor([[0.0637, 0.0075, 0.0546, 0.3649, 0.0567, 0.0715, 0.0491, 0.0643, 0.0308,
         0.2368],
        [0.0483, 0.0168, 0.0343, 0.2157, 0.0378, 0.2566, 0.0417, 0.0356, 0.1208,
         0.1925],
        [0.1658, 0.0597, 0.0391, 0.3251, 0.0319, 0.1113, 0.0858, 0.0527, 0.0445,
         0.0840],
        [0.0453, 0.0134, 0.0649, 0.1798, 0.1388, 0.0923, 0.0415, 0.0611, 0.0744,
         0.2884]])

In [None]:
p = torch.argmax(cp, dim = 1)

tensor([3, 5, 3, 9])

In [None]:
 sum(y==p) / 4

tensor(0.5000)

In [None]:
def test_top1accuracy(out, target, batch_size):
    """
    Calculates top 1 accuracy.
    Input: Output of class probabilities from the neural network (tensor)
    and target class predictions (tensor) of shape number of classes by batch size 
    Output: Top 1 accuracy (float).
    """
    with torch.no_grad():
        pred_class = torch.argmax(out, dim = 1)
        top1_acc = sum(target==pred_class) / batch_size
        return top1_acc

In [None]:
t = test_top1accuracy(cp, y, 4)
t

tensor(0.5000)

In [6]:
import torch
from torch import nn
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
import numpy as np

class LstmCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(LstmCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.input2hidden = nn.Linear(input_size, hidden_size)
        self.hidden2hidden = nn.Linear(hidden_size, hidden_size)
        self.init_weights_normal()
    
    def init_weights_normal(self):
        # iterate over parameters or weights theta
        # and initalise them with a normal centered at 0 with 0.02 spread.
        for weight in self.parameters():
            weight.data.normal_(0, 0.02)

    def forward(self, input, hidden_state = None):
        '''
        Inputs: input (torch tensor) of shape [batchsize, input_size]
                hidden state (torch tensor) of shape [batchsize, hiddensize]
        Output: output (torch tensor) of shape [batchsize, hiddensize]
        '''
        if hidden_state is None:
            hidden_state = torch.zeros(input.shape[0], self.hidden_size).to(device)
            hidden_state = (hidden_state, hidden_state)

        hidden_state, previous_cell_state = hidden_state

        input_gate = self.input2hidden(input) + self.hidden2hidden(hidden_state)
        forget_gate = self.input2hidden(input) + self.hidden2hidden(hidden_state)
        cell_gate = self.input2hidden(input) + self.hidden2hidden(hidden_state)
        output_gate = self.input2hidden(input) + self.hidden2hidden(hidden_state)


        input_gate_activation = torch.sigmoid(input_gate)
        forget_gate_activation = torch.sigmoid(forget_gate)
        cell_gate_activation = torch.tanh(cell_gate)
        output_gate_activation = torch.sigmoid(output_gate)

        updated_cell_state = previous_cell_state * forget_gate_activation + input_gate_activation * cell_gate_activation

        # output for the hidden
        out = output_gate_activation * torch.tanh(updated_cell_state)

        return (out, updated_cell_state)

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size

        self.rnn_cell_list = nn.ModuleList()

        self.rnn_cell_list.append(LstmCell(self.input_size,
                                            self.hidden_size,
                                            ))
        for l in range(1, self.num_layers):
            self.rnn_cell_list.append(LstmCell(self.hidden_size,
                                                self.hidden_size,
                                                ))

        self.fc = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hx=None):

        # Input of shape (batch_size, seqence length , input_size)
        #
        # Output of shape (batch_size, output_size)

        if hx is None:
            if torch.cuda.is_available():
                h0 =torch.zeros(self.num_layers, input.shape[0], self.hidden_size).to(device)
            else:
                h0 =torch.zeros(self.num_layers, input.shape[0], self.hidden_size).to(device)
        else:
             h0 = hx

        outs = []

        hidden = list()
        for layer in range(self.num_layers):
            hidden.append((h0[layer, :, :], h0[layer, :, :]))

        for t in range(input.size(1)):

            for layer in range(self.num_layers):

                if layer == 0:
                    hidden_l = self.rnn_cell_list[layer](
                        input[:, t, :],
                        (hidden[layer][0], hidden[layer][1])
                        )
                else:
                    hidden_l = self.rnn_cell_list[layer](
                        hidden[layer - 1][0],
                        (hidden[layer][0], hidden[layer][1])
                        )

                hidden[layer] = hidden_l

            outs.append(hidden_l[0])

        out = outs[-1].squeeze()

        out = self.fc(out)

        return out

def test ():
  # batch size, sequence length, input size
    model = LSTM(input_size=28*28, hidden_size=128, num_layers=3, output_size=10)
    model = model.to(device)
    x = torch.randn(64, 28*28)
    x = x.unsqueeze(-1)
    vals = torch.ones(64, 28*28, 28*28-1) * (28*28)
    x = torch.cat([x, vals], dim=-1).to(device)
    out = model(x)
    xshape = out.shape
    return x, xshape

testx, xdims = test()
print("Size test: passed.")

Size test: passed.
