In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, models, transforms
from matplotlib import pyplot as plt
from torch.utils.tensorboard import SummaryWriter
import torchvision
from tqdm import tqdm

In [2]:
device = 'gpu' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [3]:
transform = transforms.Compose([transforms.ToTensor()])

train_data = datasets.MNIST(root='../data', download=False, train=True, transform=transform)
test_data = datasets.MNIST(root='../data', download=False, train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=40)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=40)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [4]:
rnn = nn.RNN(10,20,2)
inp = torch.randn(5,2,10)
hidden = torch.randn(2,2,20)
output = rnn(inp, hidden)

In [5]:
print(len(train_data), len(test_data))

60000 10000


In [6]:
input_size = 28
hidden_size = 256
num_layers = 2
num_classes = 10
learning_rate = 0.005
batch_size = 64
sequence_length = 28
num_epochs = 3

In [7]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)
        out = out.reshape(out.shape[0], -1)
        
        out = self.fc(out)
        return out

In [8]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = out.reshape(out.shape[0], -1)
        
        out = self.fc(out)
        return out

In [9]:
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.gru(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc(out)
        return out

In [10]:
model_rnn = RNN(input_size, hidden_size, num_layers, num_classes)
model_lstm = LSTM(input_size, hidden_size, num_layers, num_classes)
model_gru = GRU(input_size, hidden_size, num_layers, num_classes)

In [11]:
criterion = nn.CrossEntropyLoss()

In [12]:
def train_model(model, model_name):
    print(f"Training {model_name} model")
    optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
    writer = SummaryWriter(f"runs/RNN/BatchSize_{batch_size}_LR_{learning_rate}_model_{model_name}")
    accuracies = []
    step = 0
    for epoch in range(num_epochs):
        train_loss = 0.
        losses = []
        for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
            data = data.cuda().squeeze(1) if device == 'gpu' else data.squeeze(1)
            targets = targets.cuda() if device == 'gpu' else targets

            output = model(data)
            loss = criterion(output, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * data.size(0)

            _, predictions = output.max(1)
            num_correct = (predictions == targets).sum()
            running_train_acc = float(num_correct) / float(data.shape[0])
            accuracies.append(running_train_acc)
            losses.append(loss.item())
            train_loss += loss.item() * data.size(0)
            features = data.reshape(data.shape[0], -1)
            img_grid = torchvision.utils.make_grid(data.unsqueeze(1))
            writer.add_image("mnist_images", img_grid)
#             writer.add_histogram("hidden_0", model.rnn.weight_hh_l0)
#             writer.add_histogram("hidden_1", model.rnn.weight_hh_l1)
#             writer.add_histogram("input_hidden_0", model.rnn.weight_ih_l0)
#             writer.add_histogram("input_hidden_1", model.rnn.weight_ih_l1)
            writer.add_scalar("Training loss", loss, global_step=step)
            writer.add_scalar(
                "Training Accuracy", running_train_acc, global_step=step
            )
        train_loss = train_loss / len(train_loader.dataset)
        print("Epoch {} : Training loss : {:.6f}".format(epoch+1, train_loss))
        step += 1
    return train_loss
    

In [13]:
def check_accuracy(model):
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for data, targets in tqdm(test_loader):
            data = data.cuda().squeeze(1) if device == 'gpu' else data.squeeze(1)
            targets = targets.cuda() if device == 'gpu' else targets
            
            output = model(data)
            _, pred = output.max(1)
            num_correct += (pred == targets).sum()
            num_samples += pred.size(0)
    model.train()
    return num_correct / num_samples

In [14]:
train_loss_rnn = train_model(model_rnn, 'RNN')
train_loss_gru = train_model(model_gru, 'GRU')
train_loss_lstm = train_model(model_lstm, 'LSTM')

Training RNN model


100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [00:53<00:00, 27.81it/s]


Epoch 1 : Training loss : 0.567754


100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [00:56<00:00, 26.65it/s]


Epoch 2 : Training loss : 0.266789


100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [00:56<00:00, 26.57it/s]


Epoch 3 : Training loss : 0.192824
Training GRU model


100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [02:09<00:00, 11.61it/s]


Epoch 1 : Training loss : 0.367599


100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [02:11<00:00, 11.39it/s]


Epoch 2 : Training loss : 0.143181


100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [02:09<00:00, 11.54it/s]


Epoch 3 : Training loss : 0.102261
Training LSTM model


100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [03:17<00:00,  7.58it/s]


Epoch 1 : Training loss : 0.408148


100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [03:14<00:00,  7.70it/s]


Epoch 2 : Training loss : 0.155439


100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [03:14<00:00,  7.71it/s]

Epoch 3 : Training loss : 0.113242





In [15]:
acc_rnn = check_accuracy(model_rnn)
acc_gru = check_accuracy(model_gru)
acc_lstm = check_accuracy(model_lstm)

100%|████████████████████████████████████████████████████████████████████████████████| 250/250 [00:02<00:00, 86.63it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 250/250 [00:07<00:00, 33.98it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 250/250 [00:09<00:00, 25.34it/s]


In [16]:
print(f"RNN Accuracy : {acc_rnn}\nGRU Accuracy : {acc_gru}\nLSTM Accuracy : {acc_lstm}")

RNN Accuracy : 0.972000002861023
GRU Accuracy : 0.9850000143051147
LSTM Accuracy : 0.9818999767303467
