In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Attention
from tensorflow.keras.models import Model

## Hypter Parameters

In [73]:
# Batch Size
BATCH_SIZE = 64
# Embedding Dimension
EMBEDDING_DIM = 100
# 1문장 당 30 단어
TIME_STEPS = 30
# Vocab 사전의 크기
VOCAB_SIZE = 12638
# hidden unit
UNITS = 128

In [105]:
x = Input(shape=(TIME_STEPS))
print(f'Data 입력: {x.get_shape()}')
print('---'*10)

x = Embedding(VOCAB_SIZE, EMBEDDING_DIM, input_length=TIME_STEPS, name='Embedding')(x)
print(f'Embedding 출력: {x.get_shape()}')
print('---'*10)

x, hidden_state, cell_state = LSTM(UNITS, return_state=True, return_sequences=True, name='LSTM')(x)

print(f'output: {x.get_shape()}')
print(f'hidden_state: {hidden_state.get_shape()}')
print(f'cell_state: {cell_state.get_shape()}')
print('---'*10)

encoder_output = x
print(f'encoder output: {encoder_output.get_shape()}')

Data 입력: (None, 30)
------------------------------
Embedding 출력: (None, 30, 100)
------------------------------
output: (None, 30, 128)
hidden_state: (None, 128)
cell_state: (None, 128)
------------------------------
encoder output: (None, 30, 128)


**context_vector**

In [81]:
context_vector = [h, c]
# [(None, 128), (None, 128)]

In [109]:
encoder_inputs = context_vector

decoder_inputs = Input(shape=(TIME_STEPS))
print(f'decoder_inputs: {decoder_inputs.get_shape()}')
print('---'*10)

x = Embedding(VOCAB_SIZE, EMBEDDING_DIM, input_length=TIME_STEPS)(decoder_inputs)
print(f'Embedding 출력: {x.get_shape()}')
print('---'*10)

decoder_output, hidden_state, cell_state = LSTM(UNITS, return_state=True, return_sequences=True)(x)
print(f'decoder_output: {decoder_output.get_shape()}')
print(f'hidden_state: {hidden_state.get_shape()}')
print(f'cell_state: {cell_state.get_shape()}')
print('---'*10)

print(f'decoder_output: {encoder_output.get_shape()}')

decoder_inputs: (None, 30)
------------------------------
Embedding 출력: (None, 30, 100)
------------------------------
decoder_output: (None, 30, 128)
hidden_state: (None, 128)
cell_state: (None, 128)
------------------------------
decoder_output: (None, 30, 128)


**key_value**

encoder로 부터 나온 hidden_state와 decoder_output을 concat

In [111]:
encoder_inputs[0].get_shape()

TensorShape([None, 128])

In [114]:
key_value = tf.concat([encoder_inputs[0][:, tf.newaxis, :], decoder_output[:, :-1, :]], axis=1)   

print(f'key_value: {key_value.get_shape()}')

key_value: (None, 30, 128)


**Attention**

In [118]:
# 이전 hidden_state의 값을 concat으로 만든 vector와 encoder에서 나온 출력 값들로 attention을 구합니다.
# key_value: (None, 30, 128)
# encoder_output: (None, 30, 128)
print(f'key_value: {key_value.get_shape()}')
print(f'encoder_output: {encoder_output.get_shape()}')
attention_matrix = Attention()([key_value, encoder_output])
print('---'*10)
print(f'attention_matrix: {attention_matrix.get_shape()}')

key_value: (None, 30, 128)
encoder_output: (None, 30, 128)
------------------------------
attention_matrix: (None, 30, 128)


In [125]:
# 위에서 구한 attention_matrix와 decoder의 출력 값을 concat 합니다.
x = tf.concat([decoder_output, attention_matrix], axis=-1)

# (30, 128)
print(f'attention: {attention_matrix.get_shape()}')
# (30, 128)
decoder_output.get_shape()
print(f'decoder_output: {decoder_output.get_shape()}')
print('---'*10)
print(f'final output (x): {x.get_shape()}')
# x.get_shape()

attention: (None, 30, 128)
decoder_output: (None, 30, 128)
------------------------------
final output (x): (None, 30, 256)


In [103]:
x = Dense(VOCAB_SIZE, activation='softmax')(x)
x.get_shape()

TensorShape([None, 30, 12638])