<a href="https://colab.research.google.com/github/novoforce/Exploring-Pytorch/blob/master/new/1001_Pytorch_RNN_GRU_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
import torch
import torch.nn as nn #All neural network layers definitions are present
import torch.nn.functional as F #functional api for NN layers, used to adding activations
import torch.optim as optim #Optimizers are defined here
from torch.utils.data import DataLoader #Data management for NN
import torchvision.datasets as datasets #Datasets for the NN
import torchvision.transforms as transforms #Data transform for augmentation

# Create a Recurrent Neural Network

In [21]:
class RNN(nn.Module):  #subclassing/inheriting from nn.Module
  def __init__(self,input_size,hidden_size,num_layers,num_classes):  #input_size= 28x28 = 784
    super(RNN,self).__init__() #calls the init function of the parent class(nn.Module)
    self.hidden_size= hidden_size
    self.num_layers= num_layers
    self.rnn= nn.RNN(input_size,hidden_size,num_layers,batch_first=True)
    self.fc= nn.Linear(hidden_size*sequence_length,num_classes)

  def forward(self,x):
    # x= F.relu(self.fc1(x)) #-->shape: (batch x 784)-->(batch x 50)
    # x= self.fc2(x) #-->shape: (batch x 50)-->(batch x num_classes)    
    h0= torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device) #initialize hidden state
    # Forward Propagation
    out,_ = self.rnn(x,h0)
    out= out.reshape(out.shape[0],-1)
    out= self.fc(out)
    return out

# Testing the class with some random generated values
# model= RNN(28,256,2,10)
# x= torch.randn((28,256,2,10))
# print(model(x).shape)


# Create a Gated Recurrent Unit(GRU)

In [38]:
class GRU(nn.Module):  #subclassing/inheriting from nn.Module
  def __init__(self,input_size,hidden_size,num_layers,num_classes):  #input_size= 28x28 = 784
    super(GRU,self).__init__() #calls the init function of the parent class(nn.Module)
    self.hidden_size= hidden_size
    self.num_layers= num_layers
    self.gru= nn.GRU(input_size,hidden_size,num_layers,batch_first=True)
    self.fc= nn.Linear(hidden_size*sequence_length,num_classes)

  def forward(self,x):
    # x= F.relu(self.fc1(x)) #-->shape: (batch x 784)-->(batch x 50)
    # x= self.fc2(x) #-->shape: (batch x 50)-->(batch x num_classes)    
    h0= torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device) #initialize hidden state
    # Forward Propagation
    out,_ = self.gru(x,h0)
    out= out.reshape(out.shape[0],-1)
    out= self.fc(out)
    return out

# Testing the class with some random generated values
# model= RNN(28,256,2,10)
# x= torch.randn((28,256,2,10))
# print(model(x).shape)


# Create a Long Short Term Memory Unit(LSTM)

In [48]:
class LSTM(nn.Module):  #subclassing/inheriting from nn.Module
  def __init__(self,input_size,hidden_size,num_layers,num_classes):  #input_size= 28x28 = 784
    super(LSTM,self).__init__() #calls the init function of the parent class(nn.Module)
    self.hidden_size= hidden_size
    self.num_layers= num_layers
    self.lstm= nn.LSTM(input_size,hidden_size,num_layers,batch_first=True)
    self.fc= nn.Linear(hidden_size*sequence_length,num_classes)

  def forward(self,x):
    # x= F.relu(self.fc1(x)) #-->shape: (batch x 784)-->(batch x 50)
    # x= self.fc2(x) #-->shape: (batch x 50)-->(batch x num_classes)    
    h0= torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device) #initialize hidden state
    c0= torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device) #initialize cell state (specific for lstm)
    # Forward Propagation
    out,_ = self.lstm(x,(h0,c0))
    out= out.reshape(out.shape[0],-1)
    out= self.fc(out)
    return out

# Testing the class with some random generated values
# model= RNN(28,256,2,10)
# x= torch.randn((28,256,2,10))
# print(model(x).shape)


# Set Device

In [39]:
device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters

In [44]:
input_size= 28
sequence_length= 28
num_layers= 2
hidden_size= 256
num_classes= 10
learning_rate= 0.001
batch_size= 64
num_epochs= 2

# Load Data

In [41]:
train_dataset= datasets.MNIST(root='datasets/',train=True,transform=transforms.ToTensor(),download=True)
train_loader= DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)

test_dataset= datasets.MNIST(root='datasets/',train=False,transform=transforms.ToTensor(),download=True)
test_loader= DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)

# Initialize network

In [34]:
model= RNN(input_size,hidden_size,num_layers,num_classes).to(device)

In [42]:
model= GRU(input_size,hidden_size,num_layers,num_classes).to(device)

In [49]:
model= LSTM(input_size,hidden_size,num_layers,num_classes).to(device)

# Loss & Optimizers

In [50]:
criterion= nn.CrossEntropyLoss()
optimizer= optim.Adam(model.parameters(),lr=learning_rate) 

# Train Network

In [51]:
for epoch in range(num_epochs):
    for batch_idx,(data,targets) in enumerate(train_loader):
        data= data.to(device).squeeze(1)
        targets= targets.to(device)
        
        #forward propagation
        scores= model(data)
        loss= criterion(scores,targets)

        #backward propagation
        optimizer.zero_grad()
        loss.backward()

        #gradient descent
        optimizer.step()

# Check accuracy on training & test to see how good our model is

In [52]:
def check_accuracy(loader,model):
    num_correct= 0
    num_samples= 0
    model.eval()

    with torch.no_grad():
        if loader.dataset.train:
            print("Checking accuracy on Training data")
        else:
            print("Checking accuracy on Testing data")

        for x,y in loader:
            x= x.to(device).squeeze(1)
            y= y.to(device)
            # print('y: ',y)
            scores= model(x)  #batch x num_classes   Here 'model' is the trained
            # print('scores: ',scores)
            _,predictions= scores.max(1)
            # print('vals: ',_,predictions)
            # break
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        print(f'Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f} %')

    model.train()

check_accuracy(train_loader,model)
check_accuracy(test_loader,model)


Checking accuracy on Training data
Got 59116/60000 with accuracy 98.53 %
Checking accuracy on Testing data
Got 9827/10000 with accuracy 98.27 %
