In [25]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Bidirectional, GRU
import numpy as np

In [2]:
B = 2 # batch size
T = 5
D = 1 # embedding
U = 3

X = np.random.randn(B, T, D)
X.shape

(2, 5, 1)

return_sequences<br>
* False: last time step out만 반환<br>
* True: 모든 time step의 output반환

In [10]:
def lstm(return_sequneces=False):
  inp = Input(shape=(T,D))
  # print(inp)
  out = LSTM(U, return_sequences=return_sequneces)(inp)
  # print(out)
  model = Model(inputs=inp, outputs=out)
  # print(model)
  return model.predict(X)
#########################################################
print("[return_sequneces=False] ==> last timestep의 output만 반환")
lstm_output = lstm(return_sequneces=False)
print(lstm_output.shape)
print(lstm_output)

print("[return_sequneces=True] ==> all timestep의 output만 반환")
lstm_output = lstm(return_sequneces=True)
print(lstm_output.shape)
print(lstm_output)

[return_sequneces=False] ==> last timestep의 output만 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step
(2, 3)
[[ 1.0327996e-02 -2.1472158e-02 -1.6162923e-01]
 [-3.7353162e-05 -4.2262163e-02 -3.0215389e-01]]
[return_sequneces=True] ==> all timestep의 output만 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 313ms/step
(2, 5, 3)
[[[ 0.41591716 -0.39460105 -0.11405101]
  [ 0.13138554 -0.11964055 -0.14083366]
  [ 0.16015019 -0.13968913 -0.11098981]
  [ 0.03052378 -0.04019041 -0.03197095]
  [-0.06477252  0.02204618  0.11806206]]

 [[-0.02001988  0.0126246   0.01762821]
  [-0.04191697  0.02496825  0.03815358]
  [-0.09978253  0.05848864  0.20651023]
  [-0.1627945   0.09141424  0.21016419]
  [-0.08961148  0.04999166  0.4665122 ]]]


return_state<br>
* False: output만 반환
* True: output, last step의 hidden layer(state), cell state 반환

In [15]:
def lstm(return_state=False):
  inp = Input(shape=(T, D))
  out = LSTM(U, return_state=return_state)(inp)

  model = Model(inputs=inp, outputs=out)
  # model.summary()
  if return_state:
    o, h, c = model.predict(X)
    print('o: ', o.shape) # output
    print(o)
    print('h: ', h.shape) # hidden state(단기기억)
    print(h)
    print('c: ', c.shape) # cell state(장기 기억)
    print('c')
  else:
    o = model.predict(X)
    print('o: ', o.shape) # output
    print(o)

####################################################
print("[return_state=False] ==> output only")
lstm(return_state=False)

print("[return_state=True] ==> output, hidden state, cell")
lstm(return_state=True)


[return_state=False] ==> output only




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step
o:  (2, 3)
[[-0.13451643  0.04369255  0.15232547]
 [-0.25684762  0.09206742  0.15166146]]
[return_state=True] ==> output, hidden state, cell
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
o:  (2, 3)
[[-0.01304071 -0.04876684 -0.04178309]
 [ 0.0147948   0.02206161 -0.1295119 ]]
h:  (2, 3)
[[-0.01304071 -0.04876684 -0.04178309]
 [ 0.0147948   0.02206161 -0.1295119 ]]
c:  (2, 3)
c


Bidirectional LSTM<br>
* 순방향, 역방향이 결합된 output 출력: 양방향을 본다.
* hidden state, cell state는 순방향, 역방향이 별도 출력

In [16]:
T, D, U

(5, 1, 3)

In [24]:
def bi_lstm(return_sequences=False, return_state=False):
  inp = Input(shape=(T, D))
  out = Bidirectional(LSTM(U, return_sequences=return_sequences, return_state=return_state))(inp)

  model = Model(inputs=inp, outputs=out)
  # model.summary()
  if return_state:# 비교는 state로 해야 함!!
    o, h1, c1, h2, c2 = model.predict(X)
    print('o: ', o.shape)
    print('h1: ', h1.shape)
    print('c1: ', c1.shape)
    print('h2: ', h2.shape)
    print('c2: ', c2.shape)
  else:
    o = model.predict(X)
    print('o: ', o.shape)
###############################################
print("** 순방향, 역방향이 concat **")
print('[return_sequences=False] ==> last timestep의 output만 반환')
bi_lstm(return_sequences=False, return_state=False)
print()
print('[return_sequences=True] ==> all timestep의 output 반환')
bi_lstm(return_sequences=True)
print()
print('[return_sequences=False, return_state=True]')
bi_lstm(return_state=True)
print()


** 순방향, 역방향이 concat **
[return_sequences=False] ==> last timestep의 output만 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step
o:  (2, 6)

[return_sequences=True] ==> all timestep의 output 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 337ms/step
o:  (2, 5, 6)

[return_sequences=False, return_state=True]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 376ms/step
o:  (2, 6)
h1:  (2, 3)
c1:  (2, 3)
h2:  (2, 3)
c2:  (2, 3)



GRU: cell state가 없음.

In [33]:
def gru(return_sequences=False, return_state=False):
  inp = Input(shape=(T, D))
  out = GRU(U, return_sequences=return_sequences, return_state=return_state)(inp)
  model = Model(inputs=inp, outputs=out)
  # model.summary()

  if return_state:
    o, h = model.predict(X)
    print('o: ', o.shape)
    print('h: ', h.shape)
  else:
    o = model.predict(X)
    print('o: ', o.shape)
##################################################
print('[Many to One output]')
gru(return_sequences=False, return_state=False)
print()
print('[Many to Many output]')
gru(return_sequences=True)
print()
print('[Sequences to Vector output]')
gru(return_sequences=False, return_state=True)
print()

[Many to One output]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step
o:  (2, 3)

[Many to Many output]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step
o:  (2, 5, 3)

[Sequences to Vector output]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step
o:  (2, 3)
h:  (2, 3)



In [34]:
def bi_gru(return_sequences=False, return_state=False):
  inp = Input(shape=(T, D))
  out = Bidirectional(GRU(U, return_sequences=return_sequences, return_state=return_state))(inp)
  model = Model(inputs=inp, outputs=out)
  # model.summary()

  if return_state:
    o, h1, h2 = model.predict(X)
    print('o: ', o.shape)
    print('h1: ', h1.shape)
    print('h2: ', h2.shape)
  else:
    o = model.predict(X)
    print('o: ', o.shape)
##################################################
print('순방향, 역방향이 concat된 [Many to One output]')
bi_gru(return_sequences=False, return_state=False)
print()
print('순방향, 역방향이 concat된 [Many to Many output]')
bi_gru(return_sequences=True)
print()
print('순방향, 역방향이 concat된 [Sequences to Vector output]')
bi_gru(return_state=True)
print()

순방향, 역방향이 concat된 [Many to One output]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 466ms/step
o:  (2, 6)

순방향, 역방향이 concat된 [Many to Many output]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 420ms/step
o:  (2, 5, 6)

순방향, 역방향이 concat된 [Sequences to Vector output]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 433ms/step
o:  (2, 6)
h1:  (2, 3)
h2:  (2, 3)

