In [None]:
# Imports
import torch
import torchvision
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F  # All functions that don't have any parameters
from torch.utils.data import (
    DataLoader,
)  # Gives easier dataset managment and creates mini batches
import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
import torchvision.transforms as transforms  # Transformations we can perform on our dataset

In [32]:
input_size=28
hidden_size=256
num_classes=10
learning_rate=0.001
batch_size=64
num_epochs=5
seq_len=28
num_layers=5
class RNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,num_classes):
        super(RNN,self).__init__()
        self.hidden_size=hidden_size
        self.num_layers=num_layers
        self.rnn1=nn.RNN(input_size,hidden_size,num_layers,nonlinearity='relu',batch_first=True)
        self.fc= nn.Linear(seq_len*hidden_size,num_classes)
    def forward(self,x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        

        # Forward propagate rnn
        out, _ = self.rnn1(x, h0)
        out = out.reshape(out.shape[0], -1)
        # Decode the hidden state of the last time step
        out = self.fc(out)
        return out
#     def forward(self, x):
#         # Set initial hidden and cell states
#         h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

#         # Forward propagate LSTM
#         out, _ = self.rnn1(x, h0)
#         out = out.reshape(out.shape[0], -1)
#         # Decode the hidden state of the last time step
#         out = self.fc(out)
#         return out
# set device to cuda
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# hyperparameters
in_channels=1
num_classes=10
learning_rate=0.001
batch_size=64
num_epochs=5
# Load Data
train_dataset = datasets.MNIST(
    root="/data", train=True, transform=transforms.ToTensor(), download=True
)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(
    root="/data", train=False, transform=transforms.ToTensor(), download=True
)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
#initialisation of model
model = RNN(input_size,hidden_size,num_layers,num_classes).to(device=device)
criterion = nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=learning_rate)
for epoch in range(num_epochs):
    print(f'epoch number ........... {epoch}')
    for batch_idx,(data,targets) in enumerate(train_loader):
        #         get data to cuda if possible
        data = data.to(device=device).squeeze(1)
        targets = targets.to(device=device)
#         forward pass
        scores=model(data)
        
#         loss calculation
        loss=criterion(scores,targets)
#         zeroing gradients
        optimizer.zero_grad()
        loss.backward()
        
#         gradient descent
        optimizer.step()
def check_accuracy(loader,model):
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device).squeeze(1)
            y = y.to(device=device)
            scores=model(x)
            a,predictions=scores.max(1)
            num_correct +=(predictions == y).sum()
            num_samples += predictions.size(0)
        print(f'Got{num_correct}/{num_samples} with accuracy {float(num_correct/num_samples)*100:.2f}')
    model.train()
check_accuracy(train_loader,model)
check_accuracy(test_loader,model)
        
        

epoch number ........... 0
epoch number ........... 1
epoch number ........... 2
epoch number ........... 3
epoch number ........... 4
Got59480/60000 with accuracy 99.13
Got9873/10000 with accuracy 98.73
