In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
from torch.autograd import Variable
from torch import Tensor
from torch.utils.data import DataLoader

import torchvision.transforms as transforms
import torchvision.datasets as dataset

import math

- dataset preprocessing

In [2]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

torch.manual_seed(29)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(29)

In [3]:
transform = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.5,), (1.0,))
])

In [4]:
download_root = './data'

train_dataset = dataset.MNIST(download_root, transform=transform, train=True, download=True)
val_dataset = dataset.MNIST(download_root, transform=transform, train=False, download=True)
test_dataset = dataset.MNIST(download_root, transform=transform, train=False, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 105386498.20it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 107580545.14it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 29615207.78it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 21599238.97it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [5]:
batch_size = 64
dataloaders = {}

dataloaders['train'] = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
dataloaders['val'] = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
dataloaders['test'] = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [22]:
batch_size = 100
n_iters = 6000
# num_epochs = n_iters / (len(dataloaders['train']) / batch_size)
# num_epochs = int(num_epochs)
num_epochs = 10

- modeling


In [1]:
from model import LSTM_cell, LSTM

- training

In [9]:
input_dim = 28
hidden_dim = 128
layer_dim = 1
output_dim = 10

model = LSTM(input_dim, hidden_dim, layer_dim, output_dim)

if torch.cuda.is_available():
    model.cuda()
criterion = nn.CrossEntropyLoss()
lr = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [23]:
seq_dim = 28
loss_list = []
iter = 0

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(dataloaders['train']):
        if torch.cuda.is_available():
            images = Variable(images.view(-1, seq_dim, input_dim).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.view(-1, seq_dim, input_dim))
            labels = Variable(labels)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        if torch.cuda.is_available():
            loss.cuda()

        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())
        iter += 1

        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in dataloaders['val']:
                if torch.cuda.is_available():
                    images = Variable(images.view(-1, seq_dim, input_dim).cuda())
                else:
                    images = Variable(images.view(-1, seq_dim, input_dim))

                outputs = model(images)
                _, preds = torch.max(outputs.data, 1)

                total += labels.size(0)
                if torch.cuda.is_available():
                    correct += (preds.cpu() == labels.cpu()).sum()
                else:
                    correct += (preds == labels).sum()

            accuracy = 100 * correct / total

            print(f'Iteration: {iter}. Loss: {loss.item()}. Accuracy: {accuracy}')

Iteration: 500. Loss: 0.0016002373304218054. Accuracy: 98.69000244140625
Iteration: 1000. Loss: 0.0011912022018805146. Accuracy: 98.61000061035156
Iteration: 1500. Loss: 0.0009245975525118411. Accuracy: 98.51000213623047
Iteration: 2000. Loss: 0.00022849839297123253. Accuracy: 98.7300033569336
Iteration: 2500. Loss: 0.0006032448727637529. Accuracy: 98.68000030517578
Iteration: 3000. Loss: 8.095285738818347e-05. Accuracy: 98.70999908447266
Iteration: 3500. Loss: 0.001074706669896841. Accuracy: 98.77999877929688
Iteration: 4000. Loss: 0.00010538275819271803. Accuracy: 98.70999908447266
Iteration: 4500. Loss: 0.0002693265851121396. Accuracy: 98.63999938964844
Iteration: 5000. Loss: 0.000477773864986375. Accuracy: 98.73999786376953
Iteration: 5500. Loss: 0.0002575294056441635. Accuracy: 98.62999725341797
Iteration: 6000. Loss: 4.0563070797361434e-05. Accuracy: 98.69999694824219
Iteration: 6500. Loss: 6.637313344981521e-05. Accuracy: 98.68000030517578
Iteration: 7000. Loss: 5.88938601140398

- evaluate

In [60]:
def evaluate(model, dataloaders):
    corrects, total, total_loss = 0, 0, 0

    model.eval()
    for images, labels in dataloaders:
        # if torch.cuda.is_available():
        #     images = Variable(images.view(-1, seq_dim, input_dim).cuda())
        # else:
        #     images = Variable(images.view(-1, seq_dim, input_dim)).to(DEVICE)
        images = Variable(images.view(-1, seq_dim, input_dim)).to(DEVICE)

        logit = model(images).to(DEVICE)
        loss = F.cross_entropy(logit, labels.to(DEVICE), reduction='sum')
        _, preds = torch.max(logit.data, 1)
        total += labels.size(0)
        total_loss += loss.item()
        corrects += (preds.cpu() == labels).sum()

    avg_loss = total_loss / len(dataloaders.dataset)
    avg_accuracy = corrects / total

    return avg_loss, avg_accuracy

In [61]:
test_loss, test_acc = evaluate(model, dataloaders['test'])
print(f'Test Loss: {test_loss:.6f} | Test Accuracy: {test_acc:.6f}')

Test Loss: 0.058868 | Test Accuracy: 0.987100
