numpy를 이용한 간단한 계산 구현

In [2]:
import numpy as np

In [7]:
class LSTMCell:
    def __init__(self, input_size, hidden_size):
        self.input_size = input_size
        self.output_size = hidden_size
        
        # Weight matrices
        self.W_f = np.random.randn(hidden_size, input_size + hidden_size)
        self.b_f = np.zeros((hidden_size, 1))
        self.W_i = np.random.randn(hidden_size, input_size + hidden_size)
        self.b_i = np.zeros((hidden_size, 1))
        self.W_c = np.random.randn(hidden_size, input_size + hidden_size)
        self.b_c = np.zeros((hidden_size, 1))
        self.W_o = np.random.randn(hidden_size, input_size + hidden_size)
        self.b_o = np.zeros((hidden_size, 1))
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def tanh(self, x):
        return np.tanh(x)
    
    def forward(self, x, h_prev, c_prev):
        concat = np.concatenate((h_prev, x), axis=0)
        
        # Forget gate
        f = self.sigmoid(np.dot(self.W_f, concat) + self.b_f)
        # Input gate
        i = self.sigmoid(np.dot(self.W_i, concat) + self.b_i)
        # Candidate value
        c_bar = self.tanh(np.dot(self.W_c, concat) + self.b_c)
        # Update cell state
        c = f * c_prev + i * c_bar
        # Output gate
        o = self.sigmoid(np.dot(self.W_o, concat) + self.b_o)
        # Update hidden state
        h = o * self.tanh(c)
        
        return h, c
    
# Test LSTM cell
input_size = 3
hidden_size = 4 # units
cell = LSTMCell(input_size, hidden_size)

# Random input, previous hidden state, and previous cell state
x = np.random.randn(input_size, 1)
h_prev = np.random.randn(hidden_size, 1)
c_prev = np.random.randn(hidden_size, 1)

# Forward pass throug LSTM cell
h_next, c_next = cell.forward(x, h_prev, c_prev)

print("Next hidden state:\n", h_next)
print("Next cell state:\n", c_next)

Next hidden state:
 [[-0.17037006]
 [ 0.33734858]
 [ 0.16595098]
 [-0.03092968]]
Next cell state:
 [[-1.07893995]
 [ 1.42117444]
 [ 0.39138007]
 [-1.20887825]]


torch를 이용한 LSTM구현

In [8]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch import Tensor
import torch.nn.functional as F

import pdb
import math

In [9]:
'''
STEP 1: LOADING DATASET
'''
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)
 
test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())
 
batch_size = 100
n_iters = 6000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [10]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)
 
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

In [None]:
class LSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size, bias=True):
        super(LSTMCell, self).__init__() # super().__init__()과 동일함, 자식클래스를 명시한다는것의 차이만 있음
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bias = bias
        self.x2h = nn.Linear(input_size, 4 * hidden_size, bias=bias)
        self.h2h = nn.Linear(hidden_size, 4 * hidden_size, bias=bias)
        self.c2c = Tensor(hidden_size*3)
        self.reset_parameters()
    
    def reset_parameters(self):
        std = 1.0 / math.sqrt(self.hidden_size)
        for w in self.parameters():
            w.data.uniform_(-std, std)
            
    def forward(self, x, hidden):
        hx, cx = hidden