<a href="https://colab.research.google.com/github/martinpius/PYTORCH/blob/main/RnnBase.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from timeit import default_timer as timer
t1 = timer()
try:
  from google.colab import drive
  drive.mount("/content/drive/", force_remount = True)
  import torch
  from torch import nn, optim
  from torch.utils.data import DataLoader
  from torch.nn import CrossEntropyLoss
  from torchvision import datasets, transforms
  from tqdm.auto import tqdm
  from PIL import Image
  import matplotlib.pyplot as plt
  import numpy as np
  print(f">>>> You are on CoLaB with torch version: {torch.__version__}")
except Exception as e:
  print(f">>>> {type(e)}: {e}\n>>>> Please correct {type(e)} and reload")

def mytimer(t: float = timer())->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"hrs: {h}: mins: {m:>02}: secs: {s:>05.2f}"

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
BATCH_SIZE = 256 if device == torch.device("cuda") else 32
print(f">>>> Available device: {device}")
!nvidia-smi
print(f"\n>>>> Time elapsed: {mytimer(timer() - t1)}")

Mounted at /content/drive/
>>>> You are on CoLaB with torch version: 1.13.1+cu116
>>>> Available device: cuda
Wed Mar 29 19:44:59 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P8    10W /  70W |      3MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                          

In [2]:
class RNNbase(nn.Module):

  def __init__(self, input_size: int = 28,hidden_size: int = 128, num_layers: int = 2,
               seq_len: int = 28, num_classes: int = 10, bidirectional: bool = False,
               batch_first: bool = True, model_type: str = "GRU")->None:

    super(RNNbase, self).__init__()

    self.input_size = input_size
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.seq_len = seq_len
    self.num_classes = num_classes
    self.batch_first = batch_first
    self.bidirectional = bidirectional
    self.model_type = model_type
    
    if self.bidirectional:

      self.fc_layer = nn.Linear(in_features = self.seq_len * self.hidden_size * 2,
                                out_features = self.num_classes)
    else:

      self.fc_layer = nn.Linear(in_features = self.seq_len * self.hidden_size,
                                out_features = self.num_classes)
    if self.model_type == "RNN":

      self.rnn = nn.RNN(input_size = self.input_size,
                                     hidden_size = self.hidden_size,
                                     num_layers = self.num_layers,
                                     batch_first = self.batch_first,
                                     bidirectional = self.bidirectional)
    elif self.model_type == "GRU":

      self.gru = nn.GRU(input_size = self.input_size,
                          hidden_size = self.hidden_size,
                          num_layers = self.num_layers,
                          batch_first = self.batch_first,
                          bidirectional = self.bidirectional)
      
    elif self.model_type == "LSTM":

        self.lstm = nn.LSTM(input_size = self.input_size,
                           hidden_size = self.hidden_size,
                           num_layers = self.num_layers,
                           batch_first = self.batch_first,
                           bidirectional = self.bidirectional)
    else: print(f">>>> ArgumentTypeError: The model type is not defined:")

  def forward(self, x: torch.Tensor)->torch.Tensor:

    x = x.squeeze(dim = 1)

    assert x.shape == (BATCH_SIZE, self.seq_len, self.input_size)

    if self.bidirectional:

      h0 = torch.zeros(size = (2 * self.num_layers, BATCH_SIZE, self.hidden_size)).to(device)
      c0 = torch.zeros(size = (2 * self.num_layers, BATCH_SIZE, self.hidden_size)).to(device)

      if self.model_type == "RNN":

        out, hidden = self.rnn(x, h0)

      elif self.model_type == "GRU":

        out, hidden = self.gru(x, h0)

      elif self.model_type == "LSTM":

        out, (cell_state, hidden) = self.lstm(x, (h0, c0))
        assert out.shape == (BATCH_SIZE, self.seq_len, self.hidden_size * 2)
        assert hidden.shape == (2 * self.num_layers, BATCH_SIZE, self.hidden_size)
        assert cell_state.shape == (2 * self.num_layers, BATCH_SIZE, self.hidden_size)

      else: 
        print(f">>>> ArgumentTypeError: The model type is not defined")

    else:

      h0 = torch.zeros(size = (self.num_layers, BATCH_SIZE, self.hidden_size), device = device)
      c0 = torch.zeros(size = (self.num_layers, BATCH_SIZE, self.hidden_size), device = device)

      if self.model_type == "RNN":

        out, hidden = self.rnn(x, h0)

      elif self.model_type == "GRU":

        out, hidden = self.gru(x, h0)

      elif self.model_type == "LSTM":

        out, (cell_state, hidden) = self.lstm(x, (h0, c0))

        assert out.shape == (BATCH_SIZE, self.seq_len, self.hidden_size)
        assert hidden.shape == (self.num_layers, BATCH_SIZE, self.hidden_size)
        assert cell_state.shape == (self.num_layers, BATCH_SIZE, self.hidden_size)

      else: 
        print(f">>>> ArgumentTypeError: The model type is not defined")

    out = out.reshape(out.shape[0], -1)

    scores = self.fc_layer(out)

    return scores

test_tensor = torch.randn(size = (BATCH_SIZE, 1, 28, 28), device = device)

model1 = RNNbase(model_type = "RNN",
                 bidirectional = False).to(device = device)
model2 = RNNbase(model_type = "RNN",
                 bidirectional = True).to(device = device)
model3 = RNNbase(model_type = "GRU",
                 bidirectional = False).to(device = device)
model4 = RNNbase(model_type = "GRU", 
                 bidirectional = True).to(device = device)
model5 = RNNbase(model_type = "LSTM",
                 bidirectional = False).to(device = device)
model6 = RNNbase(model_type = "LSTM", 
                 bidirectional = True).to(device = device)

models = {"RNN_Uni": model1,
          "RNN_bi": model2,
          "GRU_Uni": model3,
          "GRU_bi": model4,
          "LSTM_Uni": model5,
          "LSTM_bi": model6}

assert models["RNN_Uni"](test_tensor).shape == (BATCH_SIZE, 10)
assert models["RNN_bi"](test_tensor).shape == (BATCH_SIZE, 10)
assert models["GRU_Uni"](test_tensor).shape == (BATCH_SIZE, 10)
assert models["GRU_bi"](test_tensor).shape == (BATCH_SIZE, 10)
assert models["LSTM_Uni"](test_tensor).shape == (BATCH_SIZE, 10)
assert models["LSTM_bi"](test_tensor).shape == (BATCH_SIZE, 10)







In [3]:
def params_counter(model: nn.Module)->int:
  return sum([p.numel() for p in model.parameters() if p.requires_grad])

print(f">>>> Total number of trainable parameters in RNN_uni is: {params_counter(models['RNN_Uni']):,} ")
print(f">>>> Total number of trainable parameters in RNN_bi is: {params_counter(models['RNN_bi']):,} ")
print(f">>>> Total number of trainable parameters in GRU_uni is: {params_counter(models['GRU_Uni']):,} ")
print(f">>>> Total number of trainable parameters in GRU_bi is: {params_counter(models['GRU_Uni']):,} ")
print(f">>>> Total number of trainable parameters in LSTM_uni is: {params_counter(models['LSTM_Uni']):,} ")
print(f">>>> Total number of trainable parameters in LSTM_bi is: {params_counter(models['LSTM_bi']):,} ")

>>>> Total number of trainable parameters in RNN_uni is: 89,098 
>>>> Total number of trainable parameters in RNN_bi is: 210,954 
>>>> Total number of trainable parameters in GRU_uni is: 195,594 
>>>> Total number of trainable parameters in GRU_bi is: 195,594 
>>>> Total number of trainable parameters in LSTM_uni is: 248,842 
>>>> Total number of trainable parameters in LSTM_bi is: 628,746 


In [4]:
def acc_score(logits: torch.Tensor, labels: torch.Tensor):
  if len(labels.unique()) > 2:
    preds = torch.softmax(logits, dim = 1).argmax(dim = 1)
  else:
    preds = torch.round(torch.sigmoid(logits))
  acc = torch.eq(labels, preds).sum().item()
  acc /= len(labels)
  acc *= 100
  return f"{acc:.2f} %"

In [5]:
LR = 1e-4
criterion = CrossEntropyLoss()
optimizer1 = optim.Adam(params = models["RNN_Uni"].parameters(), lr = LR)
optimizer2 = optim.Adam(params = models["RNN_bi"].parameters(), lr = LR) 
optimizer3 = optim.Adam(params = models["GRU_Uni"].parameters(), lr = LR)
optimizer4 = optim.Adam(params = models["GRU_bi"].parameters(), lr = LR)
optimizer5 = optim.Adam(params = models["LSTM_Uni"].parameters(), lr = LR)
optimizer6 = optim.Adam(params = models["LSTM_bi"].parameters(), lr = LR)


In [12]:
def trainer(model: nn.Module,
            train_loader: DataLoader,
            test_loader: DataLoader,
            criterion: nn.Module,
            optimizer: optim,
            EPOCHS: int = 20)->None:

  total_tr_loss = 0
  total_val_loss = 0
  num_samples = 0
  model.train()

  for epoch in tqdm(range(EPOCHS + 1)):
    for idx, (data, labels) in enumerate(train_loader):
      data, labels = data.to(device), labels.to(device)
      logits = model(data)
      tr_loss = criterion(logits, labels)
      optimizer.zero_grad()
      tr_loss.backward()
      optimizer.step()
      total_tr_loss += tr_loss
      num_samples += idx
    print(f">>>> Epoch: {epoch + 1 if epoch == 0 else epoch}:\t\
      |\t Train Loss: {total_tr_loss / num_samples:.4f}\
      \t |\t Train Accuracy: {acc_score(logits, labels)}")

      #model.eval()
    with torch.inference_mode():
      for idx, (data, labels) in enumerate(test_loader):
        data, labels = data.to(device), labels.to(device)
        logits = model(data)
        val_loss = criterion(logits, labels)
        total_val_loss += val_loss
        num_samples += idx
    print(f">>>> Epoch: {epoch + 1 if epoch == 0 else epoch}:\
      \t |\t Valid Loss: {total_val_loss / num_samples:.4f}\
      \t|\t Valid Accuracy: {acc_score(logits, labels)}\n")


In [13]:
train_data = datasets.FashionMNIST(root = "train_data/", train = True, download = True,
                                   transform = transforms.ToTensor())
test_data = datasets.FashionMNIST(root = "test_data/", train = False, download = True,
                                  transform = transforms.ToTensor())
train_loader = DataLoader(dataset = train_data, batch_size = BATCH_SIZE,
                          shuffle = True, drop_last = True)
test_loader = DataLoader(dataset = test_data, batch_size = BATCH_SIZE, shuffle = False,
                         drop_last = True)



In [14]:
EPOCHS = 20
tic = timer()
print(f">>>> Starting experimentantion on RNN_Uni model for {EPOCHS} epochs\
\n>>>> Please wait...........................................................................................")
trainer(models["RNN_Uni"], train_loader, test_loader, criterion, optimizer1, EPOCHS)
print(f">>>> End of training for the RNN_uni: Total Time elapsed: {mytimer(timer() - tic)}")

tic = timer()
print(f"\n>>>> Starting experimentantion on RNN_bi model for {EPOCHS} epochs\
\n>>>> Please wait...........................................................................................")
trainer(models["RNN_bi"], train_loader, test_loader, criterion, optimizer2, EPOCHS)
print(f">>>> End of training for the RNN_bi: Total Time elapsed: {mytimer(timer() - tic)}")

tic = timer()
print(f"\n>>>> Starting experimentantion on GRU_Uni model for {EPOCHS} epochs\
\n>>>> Please wait...........................................................................................")
trainer(models["GRU_Uni"], train_loader, test_loader, criterion, optimizer3, EPOCHS)
print(f">>>> End of training for the GRU_uni: Total Time elapsed: {mytimer(timer() - tic)}")

tic = timer()
print(f">>>> Starting experimentantion on GRU_bi model for {EPOCHS} epochs\
\n>>>> Please wait...........................................................................................")
trainer(models["GRU_bi"], train_loader, test_loader, criterion, optimizer4, EPOCHS)
print(f">>>> End of training for the GRU_bi: Total Time elapsed: {mytimer(timer() - tic)}")

tic = timer()
print(f">>>> Starting experimentantion on LSTM_Uni model for {EPOCHS} epochs\
\n>>>> Please wait...........................................................................................")
trainer(models["LSTM_Uni"], train_loader, test_loader, criterion, optimizer5, EPOCHS)
print(f">>>> End of training for the LSTM_uni: Total Time elapsed: {mytimer(timer() - tic)}")

tic = timer()
print(f">>>> Starting experimentantion on LSTM_bi model for {EPOCHS} epochs\
\n>>>> Please wait...........................................................................................")
trainer(models["LSTM_bi"], train_loader, test_loader, criterion, optimizer6, EPOCHS)
print(f">>>> End of training for the LSTM_bi: Total Time elapsed: {mytimer(timer() - tic)}")


>>>> Starting experimentantion on RNN_Uni model for 20 epochs
>>>> Please wait...........................................................................................


  0%|          | 0/21 [00:00<?, ?it/s]

>>>> Epoch: 1:	      |	 Train Loss: 0.0069      	 |	 Train Accuracy: 73.83 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0009      	|	 Valid Accuracy: 76.95 %

>>>> Epoch: 1:	      |	 Train Loss: 0.0058      	 |	 Train Accuracy: 78.52 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 80.47 %

>>>> Epoch: 2:	      |	 Train Loss: 0.0053      	 |	 Train Accuracy: 79.30 %
>>>> Epoch: 2:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 83.20 %

>>>> Epoch: 3:	      |	 Train Loss: 0.0050      	 |	 Train Accuracy: 81.64 %
>>>> Epoch: 3:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 83.98 %

>>>> Epoch: 4:	      |	 Train Loss: 0.0048      	 |	 Train Accuracy: 85.16 %
>>>> Epoch: 4:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 85.55 %

>>>> Epoch: 5:	      |	 Train Loss: 0.0046      	 |	 Train Accuracy: 79.69 %
>>>> Epoch: 5:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 86.72 %

>>>> Epoch: 6:	      |	 Train Loss: 0.0044      	 |	 Train Accuracy: 8

  0%|          | 0/21 [00:00<?, ?it/s]

>>>> Epoch: 1:	      |	 Train Loss: 0.0070      	 |	 Train Accuracy: 78.12 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 83.20 %

>>>> Epoch: 1:	      |	 Train Loss: 0.0056      	 |	 Train Accuracy: 82.81 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 84.77 %

>>>> Epoch: 2:	      |	 Train Loss: 0.0050      	 |	 Train Accuracy: 81.64 %
>>>> Epoch: 2:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 83.98 %

>>>> Epoch: 3:	      |	 Train Loss: 0.0046      	 |	 Train Accuracy: 86.72 %
>>>> Epoch: 3:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 87.11 %

>>>> Epoch: 4:	      |	 Train Loss: 0.0044      	 |	 Train Accuracy: 83.20 %
>>>> Epoch: 4:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 86.33 %

>>>> Epoch: 5:	      |	 Train Loss: 0.0042      	 |	 Train Accuracy: 87.11 %
>>>> Epoch: 5:      	 |	 Valid Loss: 0.0006      	|	 Valid Accuracy: 88.67 %

>>>> Epoch: 6:	      |	 Train Loss: 0.0040      	 |	 Train Accuracy: 8

  0%|          | 0/21 [00:00<?, ?it/s]

>>>> Epoch: 1:	      |	 Train Loss: 0.0098      	 |	 Train Accuracy: 77.34 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0010      	|	 Valid Accuracy: 76.56 %

>>>> Epoch: 1:	      |	 Train Loss: 0.0075      	 |	 Train Accuracy: 78.52 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0009      	|	 Valid Accuracy: 80.47 %

>>>> Epoch: 2:	      |	 Train Loss: 0.0065      	 |	 Train Accuracy: 83.20 %
>>>> Epoch: 2:      	 |	 Valid Loss: 0.0009      	|	 Valid Accuracy: 82.42 %

>>>> Epoch: 3:	      |	 Train Loss: 0.0060      	 |	 Train Accuracy: 84.77 %
>>>> Epoch: 3:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 82.03 %

>>>> Epoch: 4:	      |	 Train Loss: 0.0056      	 |	 Train Accuracy: 84.77 %
>>>> Epoch: 4:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 84.77 %

>>>> Epoch: 5:	      |	 Train Loss: 0.0053      	 |	 Train Accuracy: 84.77 %
>>>> Epoch: 5:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 83.59 %

>>>> Epoch: 6:	      |	 Train Loss: 0.0051      	 |	 Train Accuracy: 8

  0%|          | 0/21 [00:00<?, ?it/s]

>>>> Epoch: 1:	      |	 Train Loss: 0.0081      	 |	 Train Accuracy: 75.78 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0009      	|	 Valid Accuracy: 80.86 %

>>>> Epoch: 1:	      |	 Train Loss: 0.0063      	 |	 Train Accuracy: 82.81 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 81.25 %

>>>> Epoch: 2:	      |	 Train Loss: 0.0056      	 |	 Train Accuracy: 80.86 %
>>>> Epoch: 2:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 83.20 %

>>>> Epoch: 3:	      |	 Train Loss: 0.0051      	 |	 Train Accuracy: 82.42 %
>>>> Epoch: 3:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 83.98 %

>>>> Epoch: 4:	      |	 Train Loss: 0.0048      	 |	 Train Accuracy: 85.94 %
>>>> Epoch: 4:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 84.77 %

>>>> Epoch: 5:	      |	 Train Loss: 0.0046      	 |	 Train Accuracy: 85.55 %
>>>> Epoch: 5:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 85.94 %

>>>> Epoch: 6:	      |	 Train Loss: 0.0044      	 |	 Train Accuracy: 8

  0%|          | 0/21 [00:00<?, ?it/s]

>>>> Epoch: 1:	      |	 Train Loss: 0.0107      	 |	 Train Accuracy: 81.64 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0010      	|	 Valid Accuracy: 77.73 %

>>>> Epoch: 1:	      |	 Train Loss: 0.0079      	 |	 Train Accuracy: 76.95 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0009      	|	 Valid Accuracy: 80.08 %

>>>> Epoch: 2:	      |	 Train Loss: 0.0068      	 |	 Train Accuracy: 82.81 %
>>>> Epoch: 2:      	 |	 Valid Loss: 0.0009      	|	 Valid Accuracy: 81.25 %

>>>> Epoch: 3:	      |	 Train Loss: 0.0062      	 |	 Train Accuracy: 80.47 %
>>>> Epoch: 3:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 82.03 %

>>>> Epoch: 4:	      |	 Train Loss: 0.0057      	 |	 Train Accuracy: 86.72 %
>>>> Epoch: 4:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 83.59 %

>>>> Epoch: 5:	      |	 Train Loss: 0.0054      	 |	 Train Accuracy: 83.20 %
>>>> Epoch: 5:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 83.98 %

>>>> Epoch: 6:	      |	 Train Loss: 0.0052      	 |	 Train Accuracy: 8

  0%|          | 0/21 [00:00<?, ?it/s]

>>>> Epoch: 1:	      |	 Train Loss: 0.0093      	 |	 Train Accuracy: 75.78 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0009      	|	 Valid Accuracy: 79.30 %

>>>> Epoch: 1:	      |	 Train Loss: 0.0070      	 |	 Train Accuracy: 82.81 %
>>>> Epoch: 1:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 80.47 %

>>>> Epoch: 2:	      |	 Train Loss: 0.0060      	 |	 Train Accuracy: 79.30 %
>>>> Epoch: 2:      	 |	 Valid Loss: 0.0008      	|	 Valid Accuracy: 83.59 %

>>>> Epoch: 3:	      |	 Train Loss: 0.0055      	 |	 Train Accuracy: 81.25 %
>>>> Epoch: 3:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 85.16 %

>>>> Epoch: 4:	      |	 Train Loss: 0.0051      	 |	 Train Accuracy: 82.42 %
>>>> Epoch: 4:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 85.16 %

>>>> Epoch: 5:	      |	 Train Loss: 0.0048      	 |	 Train Accuracy: 87.50 %
>>>> Epoch: 5:      	 |	 Valid Loss: 0.0007      	|	 Valid Accuracy: 87.89 %

>>>> Epoch: 6:	      |	 Train Loss: 0.0046      	 |	 Train Accuracy: 8