# 020. LSTM/GRU input/output shape

- return_sequences = False, True 일 때의 output 비교

- return_state = False, True 일 때의 internal state output 비교

- Bidirectional LSTM/GRU 의 output 비교

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Bidirectional, GRU
import numpy as np
import warnings
warnings.filterwarnings('ignore')

B = 2   # batch size
T = 5   #Time Steps
D = 1   #features
U = 3   #LSTM units

X = np.random.randn(B, T, D)
print(X.shape)

(2, 5, 1)


# LSTM

## return_sequences

- False (default) - last time step 의 output 만 반환
- True - 모든 timestep 의 output 을 모두 반환

In [None]:
def lstm(return_sequences=False):
    inp = Input(shape=(T, D))
    out = LSTM(U, return_sequences=return_sequences)(inp)

    model = Model(inputs=inp, outputs=out)
    return model.predict(X)

print("---- return_sequences=False ----> last timestep 의 output 만 반환")
lstm_out = lstm(return_sequences=False)
print(lstm_out.shape)
print(lstm_out)

print("\n---- return_sequences=True ----> 모든 timestep 별 output 출력")
lstm_out = lstm(return_sequences=True)
print(lstm_out.shape)
print(lstm_out)

---- return_sequences=False ----> last timestep 의 output 만 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334ms/step
(2, 3)
[[ 0.09391557  0.01567914 -0.05843095]
 [-0.07300735 -0.02361687  0.01637033]]

---- return_sequences=True ----> 모든 timestep 별 output 출력
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
(2, 5, 3)
[[[-4.39144485e-03  1.18200649e-02  6.55261129e-02]
  [-5.14554232e-03  1.96261052e-02  9.63263661e-02]
  [-4.75443900e-03  2.52968855e-02  1.11715764e-01]
  [-2.54300260e-03  2.58843284e-02  1.02124199e-01]
  [-3.53531516e-03  3.18513066e-02  1.21536471e-01]]

 [[ 2.32938954e-04 -7.73716776e-04 -4.20006830e-03]
  [ 5.43427328e-03 -2.41574235e-02 -1.15892574e-01]
  [ 7.93969724e-03 -4.84143458e-02 -2.04264283e-01]
  [ 6.22138474e-03 -5.56628816e-02 -1.94290519e-01]
  [ 1.99340750e-04 -4.43463139e-02 -1.03066772e-01]]]


## return_state

- False (default) - output 만 반환

- True - output, last step 의 hidden state, cell state (LSTM 의 경우) 반환

In [None]:
def lstm(return_state=False):
    inp = Input(shape=(T, D))
    out = LSTM(U, return_state=return_state)(inp)

    model = Model(inputs=inp, outputs=out)

    if return_state:
        o, h, c = model.predict(X)
        print("o :", o.shape)
        print(o)
        print("h :", h.shape)
        print(h)
        print("c :", c.shape)
        print(c)
    else:
        o = model.predict(X)
        print("o :", o.shape)
        print(o)

print("---- return_state=False ----> outout only")
lstm(return_state=False)
print("\n---- return_state=True ----> outout, hidden state, cell state all")
lstm(return_state=True)

---- return_state=False ----> outout only
o : (2, 3)
[[ 0.08662598  0.03549024 -0.16302542]
 [-0.04423545 -0.08543726  0.14413275]]

---- return_state=True ----> outout, hidden state, cell state all
o : (2, 3)
[[-0.08016161 -0.03315075  0.04808342]
 [ 0.14247695  0.17282172 -0.18928523]]
h : (2, 3)
[[-0.08016161 -0.03315075  0.04808342]
 [ 0.14247695  0.17282172 -0.18928523]]
c : (2, 3)
[[-0.1397419  -0.09048831  0.12727761]
 [ 0.3231544   0.32186234 -0.33606148]]


# Bidirectional LSTM

- 순방향, 역방향이 concatenate 된 output 출력  

- hidden state, cell state 는 순방향, 역방향 별도 출력

In [None]:
T, D, U

(5, 1, 3)

In [None]:
def bi_lstm(return_sequences=False, return_state=False):
    inp = Input(shape=(T, D))
    out = Bidirectional(
            LSTM(U, return_state=return_state, return_sequences=return_sequences))(inp)

    model = Model(inputs=inp, outputs=out)

    if return_state:
        o, h1, c1, h2, c2 = model.predict(X)
        print("o :",o.shape)
        print("h1 :", h1.shape)
        print("c1 :", c1.shape)
        print("h2 :", h2.shape)
        print("c2 :", c2.shape)
    else:
        o = model.predict(X)
        print("o :", o.shape)

print("*** 순방향, 역방향이 concatenate ***")
print("---- return_sequences=False ----> last timestep 의 output 만 반환")
bi_lstm(return_sequences=False, return_state=False)
print()
print("---- return_sequences=True ----> 모든 timestep 별 output 출력")
bi_lstm(return_sequences=True)
print()
print("---- return_sequences=True, return_state=True")
bi_lstm(return_state=True)

*** 순방향, 역방향이 concatenate ***
---- return_sequences=False ----> last timestep 의 output 만 반환
o : (2, 6)

---- return_sequences=True ----> 모든 timestep 별 output 출력
o : (2, 5, 6)

---- return_sequences=True, return_state=True
o : (2, 6)
h1 : (2, 3)
c1 : (2, 3)
h2 : (2, 3)
c2 : (2, 3)


# GRU

- cell state 가 없는 것만 LSTM 과 차이

In [None]:
def gru(return_sequences=False, return_state=False):
    inp = Input(shape=(T, D))
    out = GRU(U, return_state=return_state, return_sequences=return_sequences)(inp)

    model = Model(inputs=inp, outputs=out)

    if return_state:
        o, h = model.predict(X)
        print("o :", o.shape)
        print("h :", h.shape)
    else:
        o = model.predict(X)
        print("o :", o.shape)

print("---- Many-to-One output ----")
gru(return_sequences=False, return_state=False)
print()
print("---- Many-to-Many output ----")
gru(return_sequences=True)
print()
print("---- Sequence-to-Vector output ----")
gru(return_state=True)

---- Many-to-One output ----
o : (2, 3)

---- Many-to-Many output ----
o : (2, 5, 3)

---- Sequence-to-Vector output ----
o : (2, 3)
h : (2, 3)


# Bidirectional GRU

- cell state 가 없는 것 외에 LSTM 과 동일

In [None]:
def bi_gru(return_sequences=False, return_state=False):
    inp = Input(shape=(T, D))
    out = Bidirectional(
            GRU(U, return_state=return_state, return_sequences=return_sequences))(inp)

    model = Model(inputs=inp, outputs=out)
    if return_state:
        o, h1, h2 = model.predict(X)
        print("o :", o.shape)
        print("h1 :", h1.shape)
        print("h2 :", h2.shape)
    else:
        o = model.predict(X)
        print("o :", o.shape)

print("---- 순방향, 역방향이 concatenate 된 many-to-one output")
bi_gru(return_sequences=False, return_state=False)
print()
print("---- 순방향, 역방향이 concatenate 된 many-to-many output")
bi_gru(return_sequences=True)
print()
print("---- 순방향, 역방향이 concatenate 된 sequence-to-vector output")
bi_gru(return_state=True)

---- 순방향, 역방향이 concatenate 된 many-to-one output
o : (2, 6)

---- 순방향, 역방향이 concatenate 된 many-to-many output
o : (2, 5, 6)

---- 순방향, 역방향이 concatenate 된 sequence-to-vector output
o : (2, 6)
h1 : (2, 3)
h2 : (2, 3)
