<a href="https://colab.research.google.com/github/martinpius/PYTORCH/blob/main/Simple_RNN_with_bi_directional_LSTM_architecture_in_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)
try:
  COLAB = True
  import torch
  print(f"You are on CoLaB with Pytorch version: {torch.__version__}")
except Exception as e:
  print(f"{type(e)}: {e}\n>>>please load your drive...")
  COLAB = False
#Assigning the GPU device when available:
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')
def time_fmt(t:float = 123.890)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"{h}: {m:>02}: {s:>05.2f}"
print(f">>>time testing\tplease wait...\n>>>time elapse:\t{time_fmt()}")

Mounted at /content/drive
You are on CoLaB with Pytorch version: 1.8.1+cu101
>>>time testing	please wait...
>>>time elapse:	0: 02: 03.00


In [None]:
#In this notebook we are going to train a simple rnn with a bi-directional LSTM architecture
#on the mnist data set: We are going to treat the column and the width of the mnist images
#as sequence lrngth and features: Since the mnist image has the shape of (1,28,28) we will spueeze the channel dimension
#to construct the input of shape (batch_size, 28,28)

In [None]:
#We start by importing necessary packages and modules from torch:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
from torchvision.transforms import transforms
from tqdm import tqdm
import time, datetime, sys, os

In [None]:
#Hyperparameters declaration:
batch_size = 64
epochs = 10
input_size = 28
sequence_length = 28
hidden_size = 256
num_layers = 2
num_classes = 10
learning_rate = 1e-3


In [None]:
#We define our model using the following class with inheritance from nn.Module:
class RNN_BLSTM(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(RNN_BLSTM, self).__init__()
    self.num_layers = num_layers
    self.hidden_size = hidden_size
    self.blstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True, bidirectional = True, dropout = 0.25)
    self.fc = nn.Linear(hidden_size*2, num_classes)
  def forward(self, x):
    #initialize hidden and cell-state to zeros
    h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device)
    c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device)
    out, _ = self.blstm(x, (h0,c0))
    out = out[:,-1,:]
    out = self.fc(out)
    return out



In [None]:
#Instantiating the model class: 
model = RNN_BLSTM(input_size, hidden_size, num_layers, num_classes).to(device = device)

In [None]:
#Getting the loss object and an optimizer:
loss_obj = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr = learning_rate)

In [None]:
#Get and prepare the data for training:
train_data = datasets.MNIST(root = 'train_dataset/', train = True, transform = transforms.ToTensor(), download = True)
test_data = datasets.MNIST(root = 'test_dataset/', train = False, transform = transforms.ToTensor(),download = True)
train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_data, shuffle = True, batch_size = batch_size)

In [None]:
#The training loop:
tic = time.time()
for epoch in range(epochs):
  print(f"train starts for epoch {epoch + 1}\n>>>>please wait\t... ..training on progress.....")
  for idx, (data, target) in enumerate(tqdm(train_loader)):
    #loading data to gpu when available
    data = data.to(device = device).squeeze(1)
    target = target.to(device = device)
    #forward pass
    preds = model(data)
    train_loss = loss_obj(preds, target) 
    #backward pass
    optimizer.zero_grad()
    train_loss.backward()
    #gradient descent with adam optimizer
    optimizer.step()
#Checking the evaluation and training metrics:
def _model_checking(loader, model):
  if loader.dataset.train:
    print(f"Checking accuracy for the training data\n>>>>please wait....")
  else:
    print(f"Checking the accuracy for the validation data\n>>>>please wait....")
  num_correct = 0
  num_examples = 0
  model.eval()
  #No need to compute the gradient again:
  with torch.no_grad():
    for x, y in loader:
      x = x.to(device = device).squeeze(1)
      y = y.to(device = device)
      preds = model(x)
      _,predictions = preds.max(1)
      num_correct+= (predictions == y).sum()
      num_examples+= predictions.size(0)
  model.train()
  return num_correct/num_examples
toc = time.time()
print(f"The accuracy for the training data: {float(_model_checking(train_loader, model))*100:.2f}")
print(f"The accuracy for the validation data: {float(_model_checking(test_loader, model))*100:.2f}")
print(f"\n>>>Total time for the training and evaluation is: {time_fmt(toc - tic)}")

  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 1
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:51<00:00,  2.67it/s]
  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 2
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:44<00:00,  2.72it/s]
  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 3
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:46<00:00,  2.71it/s]
  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 4
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:40<00:00,  2.76it/s]
  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 5
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:41<00:00,  2.75it/s]
  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 6
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:39<00:00,  2.76it/s]
  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 7
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:39<00:00,  2.76it/s]
  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 8
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:40<00:00,  2.76it/s]
  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 9
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:40<00:00,  2.75it/s]
  0%|          | 0/938 [00:00<?, ?it/s]

train starts for epoch 10
>>>>please wait	... ..training on progress.....


100%|██████████| 938/938 [05:39<00:00,  2.76it/s]


Checking accuracy for the training data
>>>>please wait....
The accuracy for the training data: 99.12
Checking the accuracy for the validation data
>>>>please wait....
The accuracy for the validation data: 98.47

>>>Total time for the training and evaluation is: 0: 57: 04.00
