# Lab 7_3 RNN, GRU, LSTM tutorials - Kaggle

In [None]:
# import libraries
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [None]:
# defining hyperparameters

input_size=28
sequence_length =28
num_layers=2
hidden_size=256

learning_rate = 0.001
num_epochs = 5

num_classes =10
batch_size = 64

In [None]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size=input_size, num_layers=num_layers, hidden_size=hidden_size, sequence_length=sequence_length, num_classes=num_classes):
        super(SimpleRNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size= hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc1(out)
        return out

class SimpleGRU(nn.Module):
    def __init__(self, input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, num_classes=num_classes, sequence_length=sequence_length):
        super(SimpleGRU, self).__init__()
        self.hidden_size  = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out,_ = self.gru(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc1(out)
        return out

class SimpleLSTM(nn.Module):
    def __init__(self, input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, sequence_length=sequence_length, num_classes=num_classes):
        super(SimpleLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device=device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device=device)
        out, _ = self.lstm(x,(h0, c0))
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        return out

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
models = [SimpleRNN().to(device=device), SimpleGRU().to(device=device), SimpleLSTM().to(device=device)]

In [None]:
!pip install -q kaggle
# redirect: https://www.kaggle.com/code/fanbyprinciple/learning-pytorch-3-coding-an-rnn-gru-lstm
# dataset link: https://www.kaggle.com/datasets/oddrationale/mnist-in-csv/data
!kaggle datasets download -d oddrationale/mnist-in-csv -p /content/mnist_data
!unzip /content/mnist_data/mnist-in-csv.zip -d /content/mnist_data

Dataset URL: https://www.kaggle.com/datasets/oddrationale/mnist-in-csv
License(s): CC0-1.0
mnist-in-csv.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  /content/mnist_data/mnist-in-csv.zip
replace /content/mnist_data/mnist_test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace /content/mnist_data/mnist_train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n


In [None]:
import pandas as pd
import numpy as np

class MnistDataset(Dataset):
    def __init__(self, datapath):
        super(MnistDataset).__init__()
        df = pd.read_csv(datapath, dtype=np.float32)

        self.x = torch.from_numpy(df.iloc[:, 1:].values)
        self.x = self.x.reshape(self.x.size(0), 1, 28, 28).squeeze(1) # GRU and RNN expect N * 28 * 28
        self.x = self.x.float()

        self.y = torch.from_numpy(df.iloc[:, 0].values)
        self.y = self.y.long()

        self.n_samples = df.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.n_samples

In [None]:
train_dataset = MnistDataset("./mnist_data/mnist_train.csv")
test_dataset = MnistDataset("./mnist_data/mnist_test.csv")
x, y = train_dataset[0]
x.shape, y.shape

(torch.Size([28, 28]), torch.Size([]))

In [None]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
loss_criterion  = nn.CrossEntropyLoss()

In [None]:
def check_accuracy(dlr,model):
    total_correct = 0
    total_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in dlr:
            x = x.to(device=device)
            y = y.to(device=device)

            score = model(x)
            _,predictions = score.max(1)

            total_correct += (y==predictions).sum()
            total_samples += predictions.size(0)

    model.train()
    print(f"total_correct(True_Positives): {total_correct} out of total_samples(N): {total_samples} =>  ACCURACY : {float(total_correct/total_samples)* 100}")

In [None]:
rand_inp = torch.randn(64,28,28).to(device=device)
for model in models:
    with torch.no_grad():
        y = model(rand_inp)
        print(model, "Output shape:",y.shape)

    optimizer = optim.Adam(model.parameters(), lr = learning_rate)
    current_loss = 0

    for epoch in range(num_epochs):
        for data, target in train_dataloader:
            data = data.to(device=device)
            target = target.to(device=device)

            score = model(data)
            loss = loss_criterion(score, target)
            current_loss = loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f"epoch:{epoch+1}, loss: {current_loss}")
    print("Train_Data:", end=' '); check_accuracy(train_dataloader, model)
    print("Test_Data:", end=' '); check_accuracy(test_dataloader, model)
    print("---------------------------------------------------------------------------\n")

SimpleRNN(
  (rnn): RNN(28, 256, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=7168, out_features=10, bias=True)
) Output shape: torch.Size([64, 10])
epoch:1, loss: 0.12200575321912766
epoch:2, loss: 0.7134461402893066
epoch:3, loss: 0.001105261966586113
epoch:4, loss: 0.0045845480635762215
epoch:5, loss: 1.0893514156341553
Train_Data: total_correct(True_Positives): 57811 out of total_samples(N): 60000 =>  ACCURACY : 96.35166525840759
Test_Data: total_correct(True_Positives): 9568 out of total_samples(N): 10000 =>  ACCURACY : 95.67999839782715
---------------------------------------------------------------------------

SimpleGRU(
  (gru): GRU(28, 256, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=7168, out_features=10, bias=True)
) Output shape: torch.Size([64, 10])
epoch:1, loss: 0.028496546670794487
epoch:2, loss: 0.00020632590167224407
epoch:3, loss: 0.0005049546016380191
epoch:4, loss: 0.018827950581908226
epoch:5, loss: 0.010415656492114067
Train_Data: 

---