all excercises based on [Deep Learning for NLP](https://wikidocs.net/106473)

## 1. 임의의 입력 생성

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, LSTM, Bidirectional

In [4]:
train_X = [[0.1, 4.2, 1.5, 1.1, 2.8],
          [1.0, 3.1, 2.5, 0.7, 1.1],
          [0.3, 2.1, 1.5, 2.1, 0.1],
          [2.2, 1.4, 0.5, 0.9, 1.1]]
# 단어 벡터의 차원 5
# 문장의 길이 4
# 즉, 4번의 timestep 존재, 각 시점마다 5차원의 단어벡터가 입력으로 사용
print(np.shape(train_X))

(4, 5)


In [5]:
# RNN은 3D 텐서를 입력으로 받기 때문에 3D 텐서로 변경!
# -> 배치 크기 1 추가

train_X = [train_X]
train_X = np.array(train_X, dtype = np.float32)
print(train_X.shape)

(1, 4, 5)


## 2. SimpleRNN 이해하기

In [6]:
rnn = SimpleRNN(3)
# = SimpleRNN(3, return_sequences = False, return_state= False)
hidden_state = rnn(train_X)

print('hidden state: {}, shape: {}'.format(hidden_state, hidden_state.shape))

hidden state: [[ 0.6230601   0.5862822  -0.99906266]], shape: (1, 3)


In [7]:
rnn = SimpleRNN(3, return_sequences = True)
hidden_states = rnn(train_X)

print('hidden states : {}, shape : {}'.format(hidden_states, hidden_states.shape))

hidden states : [[[-0.9995419  -0.98667276  0.7086636 ]
  [-0.9675666  -0.9869088  -0.99089724]
  [-0.9145511   0.78240275 -0.08381365]
  [ 0.6811112  -0.9358884  -0.760372  ]]], shape : (1, 4, 3)


In [8]:
rnn = SimpleRNN(3, return_sequences = True, return_state = True)
hidden_states, last_state = rnn(train_X)

print('hidden states : {}, shape : {}'.format(hidden_states, hidden_states.shape))
print('last state : {}, shape: {}'.format(last_state, last_state.shape))

hidden states : [[[ 0.91753644  0.99996495 -0.28961176]
  [ 0.12394407  0.9948329  -0.46767345]
  [ 0.9571372   0.994189   -0.31389728]
  [-0.9348962   0.9168451   0.9055878 ]]], shape : (1, 4, 3)
last state : [[-0.9348962  0.9168451  0.9055878]], shape: (1, 3)


In [9]:
rnn = SimpleRNN(3, return_sequences = False, return_state = True)
# return_sequences를 False로 설정하면 return 값은 마지막 시점의 은닉 상태
# return_state는 마지막 출력 상태이므로 결국 위와 같음
hidden_state, last_state = rnn(train_X)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state : {}, shape : {}'.format(last_state, last_state.shape))

hidden state : [[-0.23981732  0.9985038  -0.8954732 ]], shape : (1, 3)
last_state : [[-0.23981732  0.9985038  -0.8954732 ]], shape : (1, 3)


SimpleRNN이 사용되는 경우는 거의 없고 LSTM이나 GRU가 주로 사용됨. 임의의 입력에 대해 LSTM을 사용할 경우를 확인하자!

In [10]:
lstm = LSTM(3, return_sequences = False, return_state = True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print('hidden state: {}, shape: {}'.format(hidden_state, hidden_state.shape))
print('last state: {}, shape: {}'.format(last_state, last_state.shape))
print('last cell state: {}, shape: {}'.format(last_cell_state, last_cell_state.shape))

hidden state: [[ 0.15818313 -0.4277363  -0.35995167]], shape: (1, 3)
last state: [[ 0.15818313 -0.4277363  -0.35995167]], shape: (1, 3)
last cell state: [[ 1.3171525  -0.8654924  -0.46476567]], shape: (1, 3)


In [11]:
lstm = LSTM(3, return_sequences = True, return_state = True)
hidden_states, last_state, last_cell_state = lstm(train_X)


print('hidden states: {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('last state: {}, shape: {}'.format(last_state, last_state.shape))
print('last cell state: {}, shape: {}'.format(last_cell_state, last_cell_state.shape))

hidden states: [[[ 0.26998666 -0.23358071 -0.5874365 ]
  [ 0.3618829  -0.32563525 -0.6285319 ]
  [ 0.38525638 -0.27853763 -0.39876541]
  [ 0.44024017 -0.35047087 -0.40042493]]], shape: (1, 4, 3)
last state: [[ 0.44024017 -0.35047087 -0.40042493]], shape: (1, 3)
last cell state: [[ 1.0452285 -1.2888783 -1.8749702]], shape: (1, 3)


## 3. Bidirectional(LSTM) 이해하기

In [12]:
k_init = tf.keras.initializers.Constant(value=0.1)
b_init = tf.keras.initializers.Constant(value=0)
r_init = tf.keras.initializers.Constant(value=0.1)

In [14]:
bilstm = Bidirectional(LSTM(3, return_sequences = False, return_state = True,
                           kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))
hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print('hidden_states: {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state: {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state: {}, shape: {}'.format(backward_h, backward_h.shape))

hidden_states: [[0.6303138 0.6303138 0.6303138 0.7038734 0.7038734 0.7038734]], shape: (1, 6)
forward state: [[0.6303138 0.6303138 0.6303138]], shape: (1, 3)
backward state: [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)


In [15]:
bilstm = Bidirectional(LSTM(3, return_sequences = True, return_state = True,
                           kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))

hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

In [16]:
print('hidden states: {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state: {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state: {}, shape: {}'.format(backward_h, backward_h.shape))

hidden states: [[[0.35906473 0.35906473 0.35906473 0.7038734  0.7038734  0.7038734 ]
  [0.55111325 0.55111325 0.55111325 0.58863586 0.58863586 0.58863586]
  [0.59115744 0.59115744 0.59115744 0.3951699  0.3951699  0.3951699 ]
  [0.6303138  0.6303138  0.6303138  0.21942243 0.21942243 0.21942243]]], shape: (1, 4, 6)
forward state: [[0.6303138 0.6303138 0.6303138]], shape: (1, 3)
backward state: [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)
