# Basic Recurrent Neural Networks

## PyTorch vs. NumPy 

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transform
from torchvision.transforms import Compose

In [12]:
_ = torch.manual_seed(42)

In [13]:
# hyperparameters
batch_size = 64
sequence_length = 28
input_size = 28
hidden_size = 64
num_layers = 1
num_classes = 10
num_epochs = 10
learning_rate = 0.01
verbose = True

## Loading the MNIST Dataset


In [14]:
mnist_train  = torchvision.datasets.MNIST(
    root='./MNIST/',
    train=True,
    transform=transform.ToTensor(),
    download=True
)

mnist_test  = torchvision.datasets.MNIST(
    root='./MNIST/',
    train=False,
    transform=transform.ToTensor(),
    download=True
)

dataloader_train = DataLoader(
    dataset=mnist_train,
    batch_size=batch_size,
    shuffle=False)

dataloader_test = DataLoader(
    dataset=mnist_test,
    batch_size=batch_size,
    shuffle=True)

In [15]:
print(len(dataloader_train.dataset))

60000


## Creating the RNN in PyTorch

In [16]:
class RNN(nn.Module):
    def __init__(
            self,
            input_size: int,
            hidden_size: int,
            num_layers: int,
            num_classes: int,
            sequence_length: int):
        super().__init__()

        self.hidden_size = hidden_size
        self.sequence_length = sequence_length

        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True)

        self.fc = nn.Linear(
            in_features=hidden_size * sequence_length,
            out_features=num_classes,
            bias=True)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x, _ = self.rnn(x)
        x = x.flatten(start_dim=1)
        x = self.fc(x)
        return x

In [17]:
model = RNN(input_size, hidden_size, num_layers, num_classes, sequence_length)
x = torch.rand(64, 1, 28, 28)
print(x.shape)

torch.Size([64, 1, 28, 28])


In [18]:
x = x.view(x.size(0), 28, 28)
print(x.shape)

y = model(x)
print(y.shape)

torch.Size([64, 28, 28])
torch.Size([64, 10])


In [None]:
model = RNN(input_size, hidden_size, num_layers, num_classes, sequence_length)
optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate)
loss_fn = torch.nn.CrossEntropyLoss()


N = len(dataloader_train.dataset)
for epoch in range(num_epochs):
    total_loss = 0
    correct = 0
    for X, y in dataloader_train:
        optimizer.zero_grad() 
        X = X.squeeze(1)
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        total_loss += loss.sum().item() * X.size(0)
        correct += (torch.argmax(y_pred, dim=1) == y).sum()
        loss.backward()
        optimizer.step()
    if verbose: 
        total_loss = total_loss / N
        acc = correct / N
        print(f'epoch: {epoch}\tloss: {total_loss:.04f}\tacc: {acc:.04f}')

epoch: 0	loss: 0.7613	acc: 0.8180
epoch: 1	loss: 0.3531	acc: 0.8974
epoch: 2	loss: 0.2999	acc: 0.9119
epoch: 3	loss: 0.2641	acc: 0.9219
epoch: 4	loss: 0.2348	acc: 0.9306
epoch: 5	loss: 0.2097	acc: 0.9386
epoch: 6	loss: 0.1882	acc: 0.9446
epoch: 7	loss: 0.1703	acc: 0.9498
epoch: 8	loss: 0.1555	acc: 0.9539
epoch: 9	loss: 0.1432	acc: 0.9578


In [23]:
correct = 0
N = len(dataloader_test.dataset)
with torch.no_grad():
    for X, y in dataloader_test:
        X = X.squeeze(1) 
        y_pred = model(X)
        correct += (torch.argmax(y_pred, dim=1) == y).sum()
acc = correct / N
print(f'Accuracy on test set: {acc.item():.04f}')

Accuracy on test set: 0.9622
