In [1]:
import tensorflow as tf

from tensorflow.keras.layers import LSTM
# https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM

import random

In [2]:
# input : 연속된 문장을 수치화한 데이터라고 생각하고, random dataset 생성
sentence_length = random.randint(1, 10)                   # 문장 내의 단어 개수를 랜덤하게 산출.
x = tf.random.normal(shape=(8, sentence_length, 10))      # 8개 문장, 문장 1개당 단어 개수. 하나의 단어를 10개의 벡터로 표현. 3차원 array [batch, timesteps, feature]

print("===== Input =====\n")
print("shape of x : {}, \n value : {}\n".format(x.shape, x.numpy()))

# imp. lstm
lstm = LSTM(units = 5, return_state=True)     # 마지막 시퀀스의 state값을 리턴 : False / True
# lstm = LSTM(units=5, return_sequences=False)   # 모든 시퀀스의 출력값을 리턴 : False / True
output, final_memory_state, final_carry_state = lstm(x)

print("===== Output shape =====\n")
print("shape of y : {}\n".format(output.shape))
print("shape of cell state(memory state) : {}\n".format(final_memory_state.shape))
print("shape of hidden state(carry state) : {}\n".format(final_carry_state.shape))

# print("shape of y : {}, \nvalue : {}\n".format(output.shape, output.numpy()))
# print("shape of cell state(memory state) : {}, \nvalue : {}\n".format(final_memory_state.shape, final_memory_state.numpy()))
# print("shape of hidden state(carry state) : {}, \nvalue : {}\n".format(final_carry_state.shape, final_carry_state.numpy()))


===== Input =====

shape of x : (8, 10, 10), 
 value : [[[ 1.41291881e+00 -5.53783588e-02  1.74131525e+00 -1.55661607e+00
   -1.12961449e-01 -5.64256430e-01 -2.02132612e-01 -1.43126845e-01
   -4.96621251e-01  3.73631090e-01]
  [ 1.04281962e+00 -7.90548146e-01 -4.18447219e-02 -8.06673542e-02
   -2.88652718e-01 -2.45016873e-01 -6.89060986e-01 -3.53859693e-01
   -8.24862599e-01 -8.13889086e-01]
  [ 3.58523309e-01  1.18642449e+00  1.47693723e-01 -5.72258949e-01
    3.13473165e-01 -1.84228972e-01  1.15084827e+00  1.06838179e+00
    9.24653947e-01 -7.26995990e-02]
  [ 1.41011551e-01 -1.75188214e-01 -1.75050378e-01 -2.70061851e-01
    1.45849422e-01 -6.77860498e-01 -1.03806150e+00  1.76326811e+00
   -1.83256149e-01 -5.22017896e-01]
  [ 1.73234809e-02  1.43137932e-01  3.83414835e-01 -8.13303947e-01
    1.30801189e+00  1.52427256e+00 -1.69740841e-01 -6.26107872e-01
    1.39287949e+00  1.55619061e+00]
  [ 9.97620001e-02  7.86830664e-01 -1.00016809e+00 -4.09344286e-01
   -1.43016982e+00 -5.411740

## Numpy로 직접구현한 LSTM

In [3]:
import numpy as np                # https://numpy.org/doc/stable/reference/random/generated/numpy.random.randn.html
import random

import tensorflow as tf
from tensorflow.math import exp

In [4]:
# Input Setting
sentence_length = 6
features = 10
x = np.random.randn(sentence_length, features)
print("x shape : {}\n".format(x.shape))

# Initialize random weights and bias for LSTM
hidden_size = 5      # LSTM cell units
h_0 = tf.constant(0., shape=(hidden_size, sentence_length), dtype=tf.float64)
C_0 = tf.constant(0., shape=(hidden_size, sentence_length), dtype=tf.float64)

# Input Gate
W_xi = np.random.randn(hidden_size, features)     # standard normal distribution
W_hi = np.random.randn(hidden_size, hidden_size)
b_i = np.random.randn(hidden_size, sentence_length)

print("W_xi shape : {}".format(W_xi.shape))
print("W_hi shape : {}".format(W_hi.shape))
print("h_o shape : {}".format(h_0.shape))
print("b_i shape : {}\n".format(b_i.shape))

W_xg = np.random.randn(hidden_size, features)       # standard normal distribution
W_hg = np.random.randn(hidden_size, hidden_size)
b_g = np.random.randn(hidden_size, sentence_length)

# Imp. Input Gate
input_i_t = np.dot(W_xi, x.T) + np.dot(W_hi, h_0) + b_i
input_g_t = np.dot(W_xg, x.T) + np.dot(W_hg, h_0) + b_g
i_t = 1 / (1 + exp(-input_i_t))     # sigmoid
g_t = ( exp(input_g_t) - exp(-input_g_t) ) / ( exp(input_g_t) + exp(-input_g_t) )     # tanh

# Forget Gate
W_xf = np.random.randn(hidden_size, features)     # standard normal distribution
W_hf = np.random.randn(hidden_size, hidden_size)
b_f = np.random.randn(hidden_size, sentence_length)

# Imp. Forgat Gate
input_f_t = np.dot(W_xf, x.T) + np.dot(W_hf, h_0) + b_f
f_t = 1 / (1 + exp(-input_f_t))          # sigmoid

# Imp. Updating Cell State
C_t = f_t * C_0 + i_t * g_t

# Output Gate
W_xo = np.random.randn(hidden_size, features)       # standard normal distribution
W_ho = np.random.randn(hidden_size, hidden_size)
b_o = np.random.randn(hidden_size, sentence_length)

# Imp. Output Gate
input_o_t = np.dot(W_xo, x.T) + np.dot(W_ho, h_0) + b_o
o_t = 1 / (1 + exp(-input_o_t))     # sigmoid

# Imp. Updating hidden state
tan_C_t = ( exp(C_t) - exp(-C_t) ) / ( exp(C_t) + exp(-C_t) )      # tanh
h_t = o_t * tan_C_t

print("i_t shape : {}".format(i_t.shape))
print("g_t shape : {}".format(g_t.shape))
print("f_t shape : {}".format(f_t.shape))
print("C_t shape : {}".format(C_t.shape))
print("C_t shape : {}\n".format(h_t.shape))


x shape : (6, 10)

W_xi shape : (5, 10)
W_hi shape : (5, 5)
h_o shape : (5, 6)
b_i shape : (5, 6)

i_t shape : (5, 6)
g_t shape : (5, 6)
f_t shape : (5, 6)
C_t shape : (5, 6)
C_t shape : (5, 6)

