In [1]:
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt

from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
%matplotlib inline

ModuleNotFoundError: No module named 'torch'

In [6]:
train_dataset = MNIST(root='datasets/mnist/',transform=ToTensor(),download=True)
test_dataset  = MNIST(root='datasets/mnist/',train=False,transform=ToTensor())

In [21]:
len(test_dataset)

10000

In [86]:
batch_size = 100
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=4)
test_loader  = DataLoader(dataset=train_dataset, batch_size=batch_size*2, shuffle=False,pin_memory=True, num_workers=4)

In [87]:
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2 #2-layer stacked LSTM
num_classes = 10

In [88]:
class RNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) #batch_first for shape (batch,seq_no,features)
        self.fc = nn.Linear(hidden_size,num_classes)
    
    def forward(self,X_batch):
        hidden_0 = torch.zeros(self.num_layers, X_batch.size(0), self.hidden_size).to(device)
        cellstate_0 = torch.zeros(self.num_layers, X_batch.size(0),self.hidden_size).to(device)
        
        output,_ = self.lstm(X_batch, (hidden_0,cellstate_0))
        #output shape is (batch_size,seq_no,hidden_size)
        # we need hidden_state of the last element in the sequence for each of the batch inputs
        output = self.fc(output[:,-1,:])
        return output
    
    def train_step(self,X_batch):
        images,labels = X_batch
        images = images.reshape(-1,sequence_length,input_size)
        output = model(images)
        loss = F.cross_entropy(output,labels)
        return loss
    
    def test_step(self,X_batch):
        images,labels = X_batch
        images = images.reshape(-1,sequence_length,input_size)
        output = model(images)
        loss = F.cross_entropy(output,labels)
        _,preds = torch.max(output,dim=1)
        acc = torch.tensor(torch.sum(preds==labels).item()/len(preds))
        return {'loss':loss,'acc':acc}
    

In [89]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data,device):
    if isinstance(data,(list,tuple)):
        return [to_device(x,device) for x in data]
    else:
        return data.to(device,non_blocking=True)
    
class DeviceDataLoader():
    #Wrap dataloader to move data to device
    def __init__(self,dl,device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        for batch in self.dl:
            yield to_device(batch,self.device)
    
    def __len__(self):
        return len(self.dl)

In [90]:
device = get_default_device()

In [91]:
device = torch.device('cpu')

In [92]:
model = RNN(input_size, hidden_size, num_layers, num_classes)
to_device(model,device)

RNN(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=10, bias=True)
)

In [93]:
train_loader = DeviceDataLoader(train_loader,device)
test_loader = DeviceDataLoader(test_loader,device)

In [94]:
@torch.no_grad()
def evaluate(model,test_loader):
    outputs = [model.test_step(batch) for batch in test_loader]
    batch_losses = [x['loss'] for x in outputs]
    batch_accs = [x['acc'] for x in outputs]
    loss = torch.stack(batch_losses).mean()
    acc = torch.stack(batch_accs).mean()
    print(f"loss is {loss} and acc is {acc}")

In [95]:
evaluate(model,test_loader)

loss is 2.303302049636841 and acc is 0.10441667586565018


In [96]:
def train(model,train_loader,test_loader,num_epochs=2,lr=0.01,optim_func=torch.optim.Adam):
    optim = optim_func(model.parameters(),lr=lr)
    for epoch in range(num_epochs):
        for batch in train_loader:
            loss = model.train_step(batch)
            loss.backward()
            optim.step()
            optim.zero_grad()
        evaluate(model,test_loader)

In [97]:
train(model,train_loader,test_loader)

loss is 0.12334827333688736 and acc is 0.9633000493049622
loss is 0.08870931714773178 and acc is 0.974266767501831
