# Basic Recurrent Neural Networks

## PyTorch vs. NumPy 

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transform

In [2]:
# set random seed for reproducability
seed = 42
_ = torch.manual_seed(seed)

In [3]:
# hyperparameters
batch_size = 64
sequence_length = 28
input_size = 28
hidden_size = 64
num_classes = 10
num_epochs = 10
learning_rate = 0.01
verbose = True

## Loading the MNIST Dataset


In [4]:
mnist_train  = torchvision.datasets.MNIST(
    root='./MNIST/',
    train=True,
    transform=transform.ToTensor(),
    download=True
)

mnist_test  = torchvision.datasets.MNIST(
    root='./MNIST/',
    train=False,
    transform=transform.ToTensor(),
    download=True
)

dataloader_train = DataLoader(
    dataset=mnist_train,
    batch_size=batch_size,
    shuffle=True)

dataloader_test = DataLoader(
    dataset=mnist_test,
    batch_size=batch_size,
    shuffle=True)

## Using PyTorch

#### Creating the Model

In [5]:
class RNN(nn.Module):
    def __init__(
            self,
            input_size: int,
            hidden_size: int,
            num_classes: int,
            sequence_length: int):
        super().__init__()

        self.hidden_size = hidden_size
        self.sequence_length = sequence_length

        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            batch_first=True)

        self.fc = nn.Linear(
            in_features=hidden_size * sequence_length,
            out_features=num_classes,
            bias=True)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x, _ = self.rnn(x)
        x = x.flatten(start_dim=1)
        x = self.fc(x)
        return x

#### Training

In [6]:
model = RNN(input_size, hidden_size, num_classes, sequence_length)
optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate)
loss_fn = torch.nn.CrossEntropyLoss()


N = len(dataloader_train.dataset)
for epoch in range(num_epochs):
    total_loss = 0
    correct = 0
    for X, y in dataloader_train:
        optimizer.zero_grad() 
        X = X.squeeze(1)
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        total_loss += loss.sum().item() * X.size(0)
        correct += (torch.argmax(y_pred, dim=1) == y).sum()
        loss.backward()
        optimizer.step()
    if verbose: 
        total_loss = total_loss / N
        acc = correct / N
        print(f'epoch: {epoch}\tloss: {total_loss:.04f}\tacc: {acc:.04f}')

epoch: 0	loss: 0.7420	acc: 0.8176
epoch: 1	loss: 0.3507	acc: 0.8980
epoch: 2	loss: 0.2933	acc: 0.9145
epoch: 3	loss: 0.2541	acc: 0.9250
epoch: 4	loss: 0.2212	acc: 0.9342
epoch: 5	loss: 0.1938	acc: 0.9433
epoch: 6	loss: 0.1728	acc: 0.9491
epoch: 7	loss: 0.1571	acc: 0.9541
epoch: 8	loss: 0.1447	acc: 0.9573
epoch: 9	loss: 0.1347	acc: 0.9602


#### Evaluation

In [7]:
correct = 0
N = len(dataloader_test.dataset)
with torch.no_grad():
    for X, y in dataloader_test:
        X = X.squeeze(1) 
        y_pred = model(X)
        correct += (torch.argmax(y_pred, dim=1) == y).sum()
acc = correct / N
print(f'Accuracy on test set: {acc.item():.04f}')

Accuracy on test set: 0.9648


## Using a RNN written in raw NumPy

In [8]:
import numpy as np
from nn_np.rnn_np import RNN_1FC_ALL

In [9]:
# set random seed for reproducability
seed = 42
np.random.seed(seed)

In [10]:
# hyperparameters
batch_size = 64
sequence_length = 28
input_size = 28
hidden_size = 64
num_classes = 10
num_epochs = 10
learning_rate = 0.001
verbose = True

#### Creating the Model & Training

In [11]:
rnn = RNN_1FC_ALL(input_size, hidden_size, sequence_length, num_classes)

# reusing torch dataset :D
X_train = mnist_train.data.data.numpy()
Y_train = mnist_train.targets.data.numpy()
rnn.fit(
    X_train=X_train,
    y_train=Y_train,
    epochs=num_epochs,
    learning_rate=learning_rate,
    batch_size=batch_size,
    verbose=True)

epoch: 0	loss: 0.5574	acc: 0.8623
epoch: 1	loss: 0.3615	acc: 0.9022
epoch: 2	loss: 0.3020	acc: 0.9170
epoch: 3	loss: 0.2695	acc: 0.9247
epoch: 4	loss: 0.2478	acc: 0.9301
epoch: 5	loss: 0.2313	acc: 0.9338
epoch: 6	loss: 0.2190	acc: 0.9376
epoch: 7	loss: 0.2087	acc: 0.9402
epoch: 8	loss: 0.1997	acc: 0.9425
epoch: 9	loss: 0.1923	acc: 0.9453


#### Evaluation

In [12]:
X_test = mnist_test.data.data.numpy()
Y_test = mnist_test.targets.data.numpy()
N = X_test.shape[0]

# calculating cross-entropy loss
y_pred = rnn.forward(X_test)
loss = -sum(np.log(y_pred[np.arange(N), Y_test])) / N

# calculating accuracy
acc = (np.argmax(y_pred, axis=1) == Y_test).sum() / N

print(f'Accuracy on test set: {acc:.04f}')

Accuracy on test set: 0.9421
