<a href="https://colab.research.google.com/github/martinpius/RNN-ALIENS/blob/main/RNN_with_Gated_Recurrent_Units_(GRU)_implementation_from_scratch_in_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)
try:
  COLAB = True
  import torch
  print(f">>>> You are on Google CoLaB with torch version {torch.__version__}")
except Exception as e:
  print(f">>>> {type(e)}: {e}\n>>>> please correct {type(e)} and reload your device")
  COLAB = False
def time_fmt(t: float = 123.189)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"hrs: {h} min: {m:>02} secs: {s:>05.2f}"
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print(f">>>> time formating\t....................\n>>>> time elapsed\t{time_fmt()}")

Mounted at /content/drive
>>>> You are on Google CoLaB with torch version 1.8.1+cu101
>>>> time formating	....................
>>>> time elapsed	hrs: 0 min: 02 secs: 03.00


In [None]:
#In this notebook we are going to implement the rnn with GRU architecture from the scratch.
#For demonstration we will apply on MNIST dataset by Yann Lecun. Although this is not a 
#sequential data but we can transforms the 28 width pixels into a sequence length and 28 height-pixels
#into input dimension. The channel dimension will be prooned off. 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import time, datetime,sys
from torch.utils.tensorboard import SummaryWriter


In [None]:
#Hyperparameters to be used in this model:
batch_size = 128
hidden_dim = 512
input_dim = 28
seq_len = 28
EPOCHS = 10
learning_rate = 1e-3
num_layers = 2
num_classes = 10


In [None]:
class RNN_GRU(nn.Module):
  def __init__(self, num_layers, hidden_dim, input_dim, num_classes):
    super(RNN_GRU, self).__init__()
    self.num_layers = num_layers
    self.hidden_dim = hidden_dim
    self.gru = nn.GRU(input_size = input_dim,
                      hidden_size = hidden_dim,
                      num_layers = num_layers,
                      batch_first = True,
                      dropout = 0.5)
    self.fc1 = nn.Linear(hidden_dim * seq_len, 512)
    self.fc2 = nn.Linear(512, 256)
    self.outputs = nn.Linear(256, num_classes)
  
  def forward(self, input_tensor):
    h0 = torch.zeros(self.num_layers, input_tensor.size(0), self.hidden_dim).to(device = device)
    out, _ = self.gru(input_tensor, h0)
    out = out.reshape(out.shape[0], -1)
    x = self.fc1(out)
    x = self.fc2(x)
    x = self.outputs(x)
    return x

In [None]:
#Instantiating and testing the model class if its return the desired outputs

In [None]:
rand_inputs = torch.rand(size = (64,28,28)).to(device = device)
model = RNN_GRU(num_layers = num_layers, 
                hidden_dim = hidden_dim, 
                input_dim = input_dim, 
                num_classes = num_classes).to(device = device)
print(f">>>> The output shape is: {model(rand_inputs).shape}")

>>>> The output shape is: torch.Size([64, 10])


In [None]:
#Getting and preprocessing the data from torchvision library:
train_dfm = datasets.MNIST(root = "mnist_gru_train/", train = True, transform = transforms.ToTensor(), download = True)
valid_dfm = datasets.MNIST(root = "mnist_gru_valid/", train = False, transform = transforms.ToTensor(), download = True)
train_loader = DataLoader(dataset = train_dfm, shuffle = True, batch_size = batch_size)
valid_loader = DataLoader(dataset = valid_dfm, shuffle = False, batch_size = batch_size)
x_train_batch, y_train_batch = next(iter(train_loader))
print(f"x_train_shape: {x_train_batch.shape}\ty_train_shape: {y_train_batch.shape}")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_gru_train/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting mnist_gru_train/MNIST/raw/train-images-idx3-ubyte.gz to mnist_gru_train/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_gru_train/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting mnist_gru_train/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_gru_train/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_gru_train/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to mnist_gru_train/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting mnist_gru_train/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_gru_train/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_gru_train/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting mnist_gru_train/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_gru_train/MNIST/raw

Processing...
Done!
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_gru_valid/MNIST/raw/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to mnist_gru_valid/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting mnist_gru_valid/MNIST/raw/train-images-idx3-ubyte.gz to mnist_gru_valid/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_gru_valid/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting mnist_gru_valid/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_gru_valid/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_gru_valid/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to mnist_gru_valid/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting mnist_gru_valid/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_gru_valid/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_gru_valid/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting mnist_gru_valid/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_gru_valid/MNIST/raw

Processing...
Done!
x_train_shape: torch.Size([128, 1, 28, 28])	y_train_shape: torch.Size([128])


In [None]:
#Get the loss and optimizer objects
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(params = model.parameters(), lr = learning_rate)

In [None]:
#The training loop
tic = time.time()
for epoch in range(EPOCHS):
  print(f"\n>>>> training begins for epoch {epoch + 1}................\n>>>> please wait while the model is training......................")
  for idx, (data, target) in enumerate(tqdm(train_loader)):
    data = data.squeeze(1).to(device = device)
    target = target.to(device = device)
    #forward pass
    preds = model(data)
    train_loss = loss_fn(preds, target)
    #initialize the gradients to zeros
    optimizer.zero_grad()
    #backward pass (back-propagation)
    train_loss.backward()
    #gradient descent with Adam optimizer
    optimizer.step()

def __checking__(loader, model):
  if loader.dataset.train:
    print(f"\n>>>> checking the accuracy for the training set......................")
  else: 
    print(f"\n>>>> checking the accuracy over the validation data...................")
  num_correct = 0
  num_examples = 0
  model.eval()
  with torch.no_grad():
    for x, y in loader:
      x = x.squeeze(1).to(device = device)
      y = y.to(device = device)
      preds = model(x)
      valid_loss = loss_fn(preds, y)
      _, predictions = preds.max(1)#fetch the maximum probability for the class
      num_correct+=(predictions == y).sum() #sum all correct predicted values
      num_examples+=predictions.size(0) #get the total number of examples
      acc = float((num_correct / num_examples) * 100)
      print(f"\n>>>> for epoch {epoch + 1} of batch number {idx + 1}, accuracy: {acc:.4f}")
  model.train()
toc = time.time()
__checking__(train_loader, model)
__checking__(valid_loader, model)




  0%|          | 0/469 [00:00<?, ?it/s]


>>>> training begins for epoch 1................
>>>> please wait while the model is training......................


100%|██████████| 469/469 [09:48<00:00,  1.26s/it]
  0%|          | 0/469 [00:00<?, ?it/s]


>>>> training begins for epoch 2................
>>>> please wait while the model is training......................


100%|██████████| 469/469 [09:49<00:00,  1.26s/it]
  0%|          | 0/469 [00:00<?, ?it/s]


>>>> training begins for epoch 3................
>>>> please wait while the model is training......................


100%|██████████| 469/469 [09:38<00:00,  1.23s/it]
  0%|          | 0/469 [00:00<?, ?it/s]


>>>> training begins for epoch 4................
>>>> please wait while the model is training......................


  7%|▋         | 34/469 [00:41<09:00,  1.24s/it]