<a href="https://colab.research.google.com/github/kishorex18/PytorchDL/blob/main/RNNbasics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

In [33]:
batch_size=64
embedding_dim=28
sequence_length=28
learning_rate=0.001
epochs=3
hidden_size=256
num_classes=10
num_layers=2

In [5]:
train_data=MNIST(root="dataset/",train=True,transform=transforms.ToTensor(),download=True)
train_batches=DataLoader(dataset=train_data,batch_size=batch_size,shuffle=True)
test_data=MNIST(root="dataset/",train=False,transform=transforms.ToTensor(),download=True)
test_batches=DataLoader(dataset=test_data,batch_size=batch_size,shuffle=True)

In [8]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [38]:
class RNN(nn.Module):
  def __init__(self,input_size,num_layers,hidden_size):
    super(RNN,self).__init__()
    self.input_size=input_size
    self.num_layers=num_layers
    self.hidden_size=hidden_size

    self.rnn=nn.RNN(input_size,hidden_size,num_layers,batch_first=True)
    self.fc=nn.Linear(hidden_size*sequence_length,num_classes)

  def forward(self,X):

      h0=torch.zeros(num_layers,X.size(0),self.hidden_size).to(device)
      output,_=self.rnn(X,h0)
      output=output.reshape(output.shape[0],-1)
      output=self.fc(output)
      return output

In [48]:
class GRU(nn.Module):
  def __init__(self,input_size,num_layers,hidden_size):
    super(GRU,self).__init__()
    self.input_size=input_size
    self.num_layers=num_layers
    self.hidden_size=hidden_size

    self.gru=nn.GRU(input_size,hidden_size,num_layers,batch_first=True)
    self.fc=nn.Linear(hidden_size*sequence_length,num_classes)

  def forward(self,X):

      h0=torch.zeros(num_layers,X.size(0),self.hidden_size).to(device)
      output,_=self.gru(X,h0)
      output=output.reshape(output.shape[0],-1)
      output=self.fc(output)
      return output

In [49]:
class LSTM(nn.Module):
  def __init__(self,input_size,num_layers,hidden_size):
    super(LSTM,self).__init__()
    self.input_size=input_size
    self.num_layers=num_layers
    self.hidden_size=hidden_size

    self.lstm=nn.LSTM(input_size,hidden_size,num_layers,batch_first=True)
    self.fc=nn.Linear(hidden_size*sequence_length,num_classes)

  def forward(self,X):

      h0=torch.zeros(num_layers,X.size(0),self.hidden_size).to(device)
      c0=torch.zeros(num_layers,X.size(0),self.hidden_size).to(device)
      output,_=self.lstm(X,(h0,c0))
      output=output.reshape(output.shape[0],-1)
      output=self.fc(output)
      return output

In [53]:
def train(model,train_batches):
    creterion=nn.CrossEntropyLoss()
    optimizer=optim.Adam(model.parameters(),lr=learning_rate)
    for i in range(epochs):
      for batch_idx,(data,targets) in enumerate(train_batches):
        data=data.to(device).squeeze(1)
        targets=targets.to(device)
        predicted=model(data)
        loss=creterion(predicted,targets)

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
    return model

In [51]:
def check_accuracy(batches,model):
  num_samples=0
  num_correct=0
  model.eval()
  with torch.no_grad():
    for data,targets in batches:
      data=data.to(device).squeeze(1)
      targets=targets.to(device)
      output=model(data)
      _,predicted=output.max(1)
      num_correct+=(targets==predicted).sum()
      num_samples+=predicted.size(0)

  model.train()
  return num_correct/num_samples

In [54]:
model_rnn=RNN(embedding_dim,num_layers,hidden_size)

In [55]:
trained_rnn=train(model_rnn,train_batches)

In [56]:
print(check_accuracy(test_batches,model_rnn))

tensor(0.9633)


In [57]:
model_gru=GRU(embedding_dim,num_layers,hidden_size)

In [66]:
trained_gru=train(model_gru,train_batches)

In [67]:
print(check_accuracy(test_batches,model_gru))

tensor(0.9872)


In [62]:
model_lstm=LSTM(embedding_dim,num_layers,hidden_size)

In [63]:
trained_lstm=train(model_lstm,train_batches)

In [64]:
print(check_accuracy(test_batches,model_lstm))

tensor(0.9871)
