<a href="https://colab.research.google.com/github/martinpius/PYTORCH/blob/main/Simple_RNN_with_GRU_Architecture_in_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [43]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)
try:
  COLAB = True
  import torch 
  print(f"You are using Google CoLaB with torch version:\t{torch.__version__}")
except Exception as e:
  print(f"{type(e)}: {e}\n>>>please load your drive...")
  COLAB = False
#Assigning GPU device whenever available:
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')
def time_fmt(t: float = 123.987)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"{h}: {m:>02}: {s:>05.2f}"
print(f">>>time testing\tplease wait...\n>>>time elapse:\t{time_fmt()}")

Mounted at /content/drive
You are using Google CoLaB with torch version:	1.8.1+cu101
>>>time testing	please wait...
>>>time elapse:	0: 02: 03.00


In [44]:
#In this notebook we are going to train a simple RNN model with the GRU- architecture: We use MNIST images as a 
#sequence data by set time steps to 28, and features size for each sample as 28. Since the MNIST images has the
#dimension of (1,28,28), we are going to squeeze the channel dimension to obtain the input_shape of (batch, 28, 28)

In [45]:
#We start by loading all necessary modules from torch and support libraries:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from tqdm import tqdm
import time, datetime, sys,os


In [46]:
#Hyperparameters for our model:
batch_size = 64
input_size = 28
sequence_length = 28
learning_rate = 1e-3
num_layers = 2
num_classes = 10
hidden_size = 256
epochs = 10

In [47]:
#We now defining our model class as follow:
class RNN_GRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN_GRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)
        out = out.reshape(out.shape[0], -1)

        # Decode the hidden state of the last time step
        out = self.fc(out)
        return out



In [48]:
#Instantiating the model class:
model = RNN_GRU(input_size,hidden_size,num_layers,num_classes).to(device)

In [49]:
#Downloading the data:
train_data = datasets.MNIST(root = "train_dataset/", train = True, transform = transforms.ToTensor(), download = True)
test_data = datasets.MNIST(root = 'test_dataset/', train = False, transform = transforms.ToTensor(), download = True)
train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_data, batch_size = batch_size, shuffle = True)
x_batch_train, y_batch_train = next(iter(train_loader))
print(f"x_train_batch_shape: {x_batch_train.shape}\ty_train_batch_shape: {y_batch_train.shape}")

x_train_batch_shape: torch.Size([64, 1, 28, 28])	y_train_batch_shape: torch.Size([64])


In [50]:
#Obtain the loss object and optimizer

In [51]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr = learning_rate)

In [52]:
#Training loop:
tic = time.time()
for epoch in range(epochs):
  print(f"training starts for epoch: {epoch + 1}\n>>>>please wait while training...")
  for idx, (data, target) in enumerate(tqdm(train_loader)):
    #feed the data into GPU when available:
    data = data.to(device = device).squeeze(1)#squashing the channel dimension
    target = target.to(device = device)
    #performing foward pass
    preds = model(data)
    train_loss = loss_fn(preds, target)
    #performing backward pass
    optimizer.zero_grad()
    train_loss.backward()
    #performing gradient descent with RMSprop
    optimizer.step()
#Metric evaluation for the training and testing data
def _model_checking(loader, model):
  if loader.dataset.train:
    print(f"Checking accuracy over the train dataset\n>>>please wait...")
  else:
    print(f"Cheking accuracy over the validation dataset\n>>>please wait...")
  num_correct = 0
  num_examples = 0
  model.eval()
  #no need to compute gradients again:
  with torch.no_grad():
    for x, y in loader:
      x = x.to(device = device).squeeze(1)
      y = y.to(device = device)
      preds = model(x)
      _,predictions = preds.max(1)
      num_correct+=(predictions == y).sum()
      num_examples+=predictions.size(0)
  model.train()
  return num_correct/num_examples
print(f"The accuracy for the training data is: {float(_model_checking(train_loader, model))* 100:.2f}")
print(f"The accuracy for the validation data is: {float(_model_checking(test_loader, model))* 100:.2f}")
toc = time.time()
print(f"\n>>>Total time for training and evaluation is:\t{time_fmt(toc - tic)}")

  1%|          | 6/938 [00:00<00:17, 52.83it/s]

training starts for epoch: 1
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 81.10it/s]
  1%|          | 9/938 [00:00<00:10, 85.67it/s]

training starts for epoch: 2
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 82.18it/s]
  1%|          | 9/938 [00:00<00:10, 86.16it/s]

training starts for epoch: 3
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 82.13it/s]
  1%|          | 9/938 [00:00<00:11, 81.03it/s]

training starts for epoch: 4
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 83.32it/s]
  1%|          | 9/938 [00:00<00:11, 82.17it/s]

training starts for epoch: 5
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 82.35it/s]
  1%|          | 9/938 [00:00<00:11, 82.26it/s]

training starts for epoch: 6
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 82.67it/s]
  1%|          | 9/938 [00:00<00:10, 85.03it/s]

training starts for epoch: 7
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 83.03it/s]
  1%|          | 8/938 [00:00<00:12, 77.40it/s]

training starts for epoch: 8
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 82.54it/s]
  1%|          | 9/938 [00:00<00:11, 79.04it/s]

training starts for epoch: 9
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 82.25it/s]
  1%|          | 8/938 [00:00<00:12, 76.88it/s]

training starts for epoch: 10
>>>>please wait while training...


100%|██████████| 938/938 [00:11<00:00, 82.00it/s]


Checking accuracy over the train dataset
>>>please wait...
The accuracy for the training data is: 99.49
Cheking accuracy over the validation dataset
>>>please wait...
The accuracy for the validation data is: 98.76

>>>Total time for training and evaluation is:	0: 02: 03.00
