In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
import torchvision


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else 'cpu'

In [3]:
inp_size = 28 
num_classes = 10
hid_size = 128
num_ep = 30
batch_size = 100
lr = 0.001
num_layers = 2

In [4]:
img_transform = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [5]:
#MNIST Dataset
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform = img_transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform = img_transform, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [6]:
#Dataloaders
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle=True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle=False)

In [7]:
class SequentialModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(SequentialModel, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)  
        out = out[:, -1, :]
        out = self.fc(out)
        
        return out

In [8]:
model = SequentialModel(inp_size, hid_size, num_layers, num_classes).to(device)

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

In [10]:
def train_model(model, loader, optimizer, device):
    epoch_loss = 0.0

    model.train()
    for x, y in loader:
        x = x.reshape(-1, inp_size, inp_size).to(device)
        y = y.to(device)

        optimizer.zero_grad()
        y_pred = model(x)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    epoch_loss = epoch_loss/len(loader)
    return epoch_loss

In [11]:
def evaluate(model, loader, device):
    epoch_loss = 0.0

    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.reshape(-1, inp_size, inp_size).to(device)
            y = y.to(device)

            y_pred = model(x)
            loss = criterion(y_pred, y)
            epoch_loss += loss.item()

        epoch_loss = epoch_loss/len(loader)
    return epoch_loss

In [12]:
train = []
valid = []

best_valid_loss = float("inf")

for epoch in range(num_ep):
        train_loss = train_model(model, train_loader, optimizer, device)
        valid_loss = evaluate(model, test_loader, device)

        train.append(train_loss)
        valid.append(valid_loss)        
        
        if valid_loss < best_valid_loss:
            data_str = f"Valid loss improved from {best_valid_loss:2.4f} to {valid_loss:2.4f}"
            print(data_str)

            best_valid_loss = valid_loss

        data_str = f'Epoch: {epoch+1:02}\n'
        data_str += f'\tTrain Loss: {train_loss:.3f}\n'
        data_str += f'\t Val. Loss: {valid_loss:.3f}\n'
        print(data_str)

Valid loss improved from inf to 0.4930
Epoch: 01
	Train Loss: 0.832
	 Val. Loss: 0.493

Valid loss improved from 0.4930 to 0.3688
Epoch: 02
	Train Loss: 0.434
	 Val. Loss: 0.369

Valid loss improved from 0.3688 to 0.2999
Epoch: 03
	Train Loss: 0.346
	 Val. Loss: 0.300

Valid loss improved from 0.2999 to 0.2952
Epoch: 04
	Train Loss: 0.300
	 Val. Loss: 0.295

Valid loss improved from 0.2952 to 0.2342
Epoch: 05
	Train Loss: 0.268
	 Val. Loss: 0.234

Valid loss improved from 0.2342 to 0.2218
Epoch: 06
	Train Loss: 0.255
	 Val. Loss: 0.222

Epoch: 07
	Train Loss: 0.237
	 Val. Loss: 0.239

Epoch: 08
	Train Loss: 0.222
	 Val. Loss: 0.247

Epoch: 09
	Train Loss: 0.220
	 Val. Loss: 0.232

Valid loss improved from 0.2218 to 0.2177
Epoch: 10
	Train Loss: 0.215
	 Val. Loss: 0.218

Valid loss improved from 0.2177 to 0.2057
Epoch: 11
	Train Loss: 0.211
	 Val. Loss: 0.206

Valid loss improved from 0.2057 to 0.1847
Epoch: 12
	Train Loss: 0.199
	 Val. Loss: 0.185

Epoch: 13
	Train Loss: 0.196
	 Val. L

In [13]:
#Testing the model for accuracy
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, inp_size, inp_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 94.62 %
