In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
from torch.autograd import Variable
from torch import Tensor
from torch.utils.data import DataLoader

import torchvision.transforms as transforms
import torchvision.datasets as dataset

import math

- dataset preprocessing

In [2]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

torch.manual_seed(29)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(29)

In [3]:
transform = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.5,), (1.0,))
])

In [4]:
download_root = './data'

train_dataset = dataset.MNIST(download_root, transform=transform, train=True, download=True)
val_dataset = dataset.MNIST(download_root, transform=transform, train=False, download=True)
test_dataset = dataset.MNIST(download_root, transform=transform, train=False, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 105386498.20it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 107580545.14it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 29615207.78it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 21599238.97it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [5]:
batch_size = 64
dataloaders = {}

dataloaders['train'] = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
dataloaders['val'] = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
dataloaders['test'] = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [6]:
batch_size = 100
n_iters = 6000
num_epochs = n_iters / (len(dataloaders['train']) / batch_size)
num_epochs = int(num_epochs)
num_epochs

- modeling


In [7]:
class LSTM_cell(nn.Module):
    def __init__(self, input_size, hidden_size, bias=True):
        super(LSTM_cell, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bias = bias
        self.x2h = nn.Linear(input_size, 4 * hidden_size, bias=bias) # 망각/입력/셀/출력 게이트 4개로 쪼개져서 들어간다.(chunk(4, 1)) chunk(몇개의 텐서로 나눌지, 어떤 차원으로 나눌지)
        self.h2h = nn.Linear(hidden_size, 4 * hidden_size, bias=bias)
        self.reset_parameters()

    def reset_parameters(self):
        std = 1.0 / math.sqrt(self.hidden_size)
        for w in self.parameters():
            w.data.uniform_(-std, std) # -std, std 사이의 임의의 실수 생성

    def forward(self, x, hidden):
        hx, cx = hidden # hidden : 이전 cell -> hx : 은닉 상태 / cx : cell 상태
        x = x.view(-1, x.size(1)) # 입력

        gates = self.x2h(x) + self.h2h(hx) # 입력 + 이전 기억
        gates = gates.squeeze()
        ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)

        ingate = F.sigmoid(ingate) # 시그모이드 적용
        forgetgate = F.sigmoid(forgetgate)# 시그모이드 적용
        cellgate = F.tanh(cellgate)# tanh 적용
        outgate = F.sigmoid(outgate)# 시그모이드 적용

        cy = torch.mul(cx, forgetgate) + torch.mul(ingate, cellgate)
        hy = torch.mul(outgate, F.tanh(cy))

        return (hy, cy)

In [8]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, bias=True):
        super(LSTM, self).__init__()

        self.hidden_dim = hidden_dim # 은닉층의 뉴런/유닛 개수
        self.layer_dim = layer_dim
        self.lstm = LSTM_cell(input_dim, hidden_dim, layer_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        if torch.cuda.is_available():
            h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
        else:
            h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))

        if torch.cuda.is_available():
            c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
        else:
            c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))

        outs = []
        cn = c0[0, :, :]
        hn = h0[:, :, :]

        for seq in range(x.size(1)): # 셀 계층
            hn, cn = self.lstm(x[:, seq, :], (hn, cn))
            outs.append(hn)

        out = outs[-1].squeeze()
        out = self.fc(out)

        return out

- training

In [9]:
input_dim = 28
hidden_dim = 128
layer_dim = 1
output_dim = 10

model = LSTM(input_dim, hidden_dim, layer_dim, output_dim)

if torch.cuda.is_available():
    model.cuda()
criterion = nn.CrossEntropyLoss()
lr = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [None]:
seq_dim = 28
loss_list = []
iter = 0

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(dataloaders['train']):
        if torch.cuda.is_available():
            images = Variable(images.view(-1, seq_dim, input_dim).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.view(-1, seq_dim, input_dim))
            labels = Variable(labels)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        if torch.cuda.is_available():
            loss.cuda()

        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())
        iter += 1

        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in dataloaders['val']:
                if torch.cuda.is_available():
                    images = Variable(images.view(-1, seq_dim, input_dim).cuda())
                else:
                    images = Variable(images.view(-1, seq_dim, input_dim))

                outputs = model(images)
                _, preds = torch.max(outputs.data, 1)

                total += labels.size(0)
                if torch.cuda.is_available():
                    correct += (preds.cpu() == labels.cpu()).sum()
                else:
                    correct += (preds == labels).sum()

            accuracy = 100 * correct / total

            print(f'Iteration: {iter}. Loss: {loss.item()}. Accuracy: {accuracy}')

Iteration: 500. Loss: 2.262354612350464. Accuracy: 17.559999465942383
Iteration: 1000. Loss: 0.8935214281082153. Accuracy: 70.79000091552734
Iteration: 1500. Loss: 0.3928869664669037. Accuracy: 86.9000015258789
Iteration: 2000. Loss: 0.2623698115348816. Accuracy: 92.58000183105469
Iteration: 2500. Loss: 0.17468492686748505. Accuracy: 94.44999694824219
Iteration: 3000. Loss: 0.3281691074371338. Accuracy: 95.36000061035156
Iteration: 3500. Loss: 0.03647652268409729. Accuracy: 96.06999969482422
Iteration: 4000. Loss: 0.07383478432893753. Accuracy: 96.4000015258789
Iteration: 4500. Loss: 0.059126175940036774. Accuracy: 96.9000015258789
Iteration: 5000. Loss: 0.12666122615337372. Accuracy: 97.38999938964844
Iteration: 5500. Loss: 0.08045239746570587. Accuracy: 96.44000244140625
Iteration: 6000. Loss: 0.07876600325107574. Accuracy: 96.88999938964844
Iteration: 6500. Loss: 0.04993199184536934. Accuracy: 97.80999755859375
Iteration: 7000. Loss: 0.058640968054533005. Accuracy: 97.80000305175781

- evaluate