<a href="https://colab.research.google.com/github/mehrshad-sdtn/DeepLearning/blob/master/PyTorch/2_Pytorch_RNN_LSTM_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: import all the necessary packages for common pytorch programs

import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


In [None]:
# Create Fully-Connected network
hidden_size = 256
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc(out)
        return out



class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        out, _ = self.rnn(x, (h0, c0))
        out = out.reshape(out.shape[0], -1)
        out = self.fc(out)
        return out





In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
# hyperparameters
input_size = 28
sequence_length = 28
num_layers = 2
num_classes = 10
learning_rate = 0.0001
batch_size = 64
num_epochs = 5

In [None]:
# load Data
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# initialize network
model = LSTM(input_size= input_size, hidden_size= hidden_size, num_layers= num_layers, num_classes= num_classes).to(device)

In [None]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
  print("=> Saving checkpoint")
  torch.save(state, filename)


def load_checkpoint(checkpoint, model):
  print("=> Loading checkpoint")
  model.load_state_dict(checkpoint['state_dict'])
  optimizer.load_state_dict(checkpoint['state_dict'])



In [None]:
# check accurcy on training and test
def check_accuracy(loader, model):
  if loader.dataset.train:
    print('Checking accuracy on training data:')
  else:
    print('Checking accuracy on test data')

  num_correct = 0
  num_samples = 0
  model.eval()
  with torch.no_grad():
    for x, y in loader:
      x = x.to(device= device).squeeze(1)
      y = y.to(device= device)

      scores = model(x) # 64, 10
      _, predictions = scores.max(1)
      num_correct += (predictions == y).sum()
      num_samples += predictions.size(0)

    print(f" {float(num_correct)/float(num_samples)*100:.2f}%")
    model.train()



# train
for epoch in range(num_epochs):
  losses = []
  if epoch % 3 == 0:
    checkpoint = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
    save_checkpoint(checkpoint)
  for batch_idx, (data, targets) in enumerate(train_loader):
    # data shaping
    data = data.to(device).squeeze(1)
    targets = targets.to(device)

    # forward
    scores = model(data)
    loss = criterion(scores, targets)
    losses.append(loss)

    # backward
    optimizer.zero_grad()
    loss.backward()

    # gradient descent
    optimizer.step()

  print(f"Epoch {epoch+1}/{num_epochs}: loss {loss}")
  check_accuracy(train_loader, model)
  check_accuracy(test_loader, model)







Epoch 1/5: loss 0.09761921316385269
Checking accuracy on training data:
 93.16%
Checking accuracy on test data
 93.50%
Epoch 2/5: loss 0.0966176837682724
Checking accuracy on training data:
 95.85%
Checking accuracy on test data
 95.80%
Epoch 3/5: loss 0.2856576442718506
Checking accuracy on training data:
 96.95%
Checking accuracy on test data
 96.79%
Epoch 4/5: loss 0.10122382640838623
Checking accuracy on training data:
 97.77%
Checking accuracy on test data
 97.35%
Epoch 5/5: loss 0.14088431000709534
Checking accuracy on training data:
 98.10%
Checking accuracy on test data
 97.70%
