In [1]:
import numpy as np
import tensorflow as tf

In [5]:
#Sequence data
X = np.array([[0,1,2,3],
              [1,2,3,4],
              [2,3,4,5],
              [3,4,5,6],
              [4,5,6,7],
              [5,6,7,8]], dtype=np.float32)

x_data = tf.reshape(X, (-1,4,1))

y_data = np.array([4,5,6,7,8,9], dtype=np.float32)

print(x_data.shape, y_data.shape)

(6, 4, 1) (6,)


In [7]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


class LSTM:
    def __init__(self, Wx, Wh, b):
        self.params = [Wx, Wh, b]
        self.grades = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
        self.cache = None
        
       
        
    def forward(self, x, h_prev, c_prev):
        Wx, Wh, b = self.params
        # Wx : (D, 4*h), Wh: (h, 4*h)
        
        n, h = h_prev.shape
        
        A = np.dot(x, Wx) + np.dot(Wh, h_prev) + b #(n, 4*h)
        
        # i, g, f, o 게이트 : 동일한 크기의 4개
        f = A[:, :h]
        g = A[:, h:2*h]
        i = A[:, 2*h:3*h]
        o = A[:,3*h:]
        
        f = sigmoid(f)
        g = np.tanh(g)
        i = sigmoid(i)
        o = sigmoid(o)
        
        c_next = f * c_prev + (g * i)
        h_next = o * np.tanh(c_next)
        
        self.cache = (x, h_prev, c_prev, c_next, h_next, f, g, i, o)
        
        return h_next, c_next

In [10]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(units=300, return_sequences=True, input_shape=[4, 1]),
    tf.keras.layers.LSTM(300),
    tf.keras.layers.Dense(1)
])
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 4, 300)            362400    
                                                                 
 lstm_3 (LSTM)               (None, 300)               721200    
                                                                 
 dense (Dense)               (None, 1)                 301       
                                                                 
Total params: 1083901 (4.13 MB)
Trainable params: 1083901 (4.13 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
model.compile(loss="mse", optimizer="adam")
model.fit(x_data, y_data, epochs=100, verbose=0)

<keras.src.callbacks.History at 0x28620957b20>

In [12]:
model.predict(x_data)



array([[3.3767967],
       [5.08792  ],
       [6.367443 ],
       [7.3045864],
       [7.9988074],
       [8.522839 ]], dtype=float32)

#### Keras ver.

In [13]:
#hyperpameter
vocab_size = 10000 #토큰화 최대 단어수(어휘수)
embedding_dim = 16 # Embedding layer => output size
max_length = 100 #한 문장의 길이(데이터 셋의 길이)

- Without LSTM

In [14]:
#LSTM을 사용하지 않음
model = tf.keras.Sequential([
    #10000 * 16 = 160000
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length ),
    tf.keras.layers.GlobalAvgPool1D(), #1차원의 벡터로 변환
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 16)           160000    
                                                                 
 global_average_pooling1d (  (None, 16)                0         
 GlobalAveragePooling1D)                                         
                                                                 
 dense_1 (Dense)             (None, 64)                1088      
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 161153 (629.50 KB)
Trainable params: 161153 (629.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


- LSTM ver.

In [15]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.LSTM(64, return_sequences=False),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid"),
])

model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 100, 16)           160000    
                                                                 
 lstm_4 (LSTM)               (None, 64)                20736     
                                                                 
 dense_3 (Dense)             (None, 64)                4160      
                                                                 
 dense_4 (Dense)             (None, 1)                 65        
                                                                 
Total params: 184961 (722.50 KB)
Trainable params: 184961 (722.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


- flatten ver.

In [17]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.LSTM(64, return_sequences=False),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid"),
])

model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 100, 16)           160000    
                                                                 
 lstm_5 (LSTM)               (None, 64)                20736     
                                                                 
 flatten (Flatten)           (None, 64)                0         
                                                                 
 dense_5 (Dense)             (None, 64)                4160      
                                                                 
 dense_6 (Dense)             (None, 1)                 65        
                                                                 
Total params: 184961 (722.50 KB)
Trainable params: 184961 (722.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


굳이 flatten 할 필요 없음을 알 수 있다.