# RNN을 이용한 인코더-디코더
- 번역기, 텍스트 요약에 활용

## Sequence-to-Sequence
- 챗봇, 기계번역, 내용요약, stt(Speech to Text)

![](https://wikidocs.net/images/page/24996/%EB%8B%A8%EC%96%B4%ED%86%A0%ED%81%B0%EB%93%A4%EC%9D%B4.PNG)
1. 인코더 RNN 마지막 시점의 은닉상태가 **컨텍스트 벡터**
2. 컨텍스트 벡터는 디코더 RNN 첫번째 시점의 은닉상태에 사용
3. 디코더는 RNNLM(다음 시점의 단어를 예측하는 모델)
4. 훈련과정에서는 기존 정답을 RNN의 인풋으로 사용
5. 테스트과정에서는 전시점의 RNN의 예측을 다음 시점의 인풋으로 사용

### 교사 강요(Teacher forcing)
- 이전 디코더셀의 출력을 현재 디코더셀의 입력으로 활용 (예측시)
- 이전 시점의 실제값을 현재 디코더셀의 입력으로 활용 (훈련시, 교사강요)
- 이전 예측이 틀려서 다음 셀의 예측의 영향을 주는 것을 막기 위한 전략

## Character-Level Neural Machine Translation (실습)

### 병렬 코퍼스 데이터에 대한 전처리

In [None]:
import os
import shutil
import zipfile
from pathlib import Path

import pandas as pd
import tensorflow as tf
import urllib3
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from pathlib import Path

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# data_dir = Path('/content/drive/MyDrive/Colab Notebooks')
data_dir = Path('C:/Users/011/sinjy1203/data')
lines = pd.read_csv(data_dir / 'fra.txt', names=['src', 'tar', 'lic'], sep='\t')
del lines['lic']
lines.shape

(197463, 2)

In [None]:
lines.head()

Unnamed: 0,src,tar
0,Go.,Va !
1,Go.,Marche.
2,Go.,En route !
3,Go.,Bouge !
4,Hi.,Salut !


In [None]:
lines = lines.loc[:, 'src':'tar']
lines = lines[0:60000]
lines.head(10)

Unnamed: 0,src,tar
0,Go.,Va !
1,Go.,Marche.
2,Go.,En route !
3,Go.,Bouge !
4,Hi.,Salut !
5,Hi.,Salut.
6,Run!,Cours !
7,Run!,Courez !
8,Run!,Prenez vos jambes à vos cous !
9,Run!,File !


In [None]:
lines.sample(10)

Unnamed: 0,src,tar
5758,She went out.,Elle sortit.
10203,He's an author.,Il est auteur.
6027,This is ours.,C'est à nous.
29452,I accept the offer.,J'accepte la proposition.
18018,Have you decided?,T'es-tu décidée ?
36940,I'm perfectly happy.,Je suis parfaitement heureux.
28168,You're very funny.,Tu es très drôle.
23891,I got my hair cut.,Je me suis fait couper les cheveux.
34430,Are you that stupid?,Êtes-vous aussi bêtes ?
53940,Where should I put it?,Où devrais-je le mettre ?


In [None]:
# 시작과 종료를 의미하는 <sos>와 <eos> 추가
# 여기서는 \t와 \n을 각각 시작 종료 심볼로 간주
lines.tar = lines.tar.apply(lambda x: '\t ' + x + ' \n')
lines.sample(10)

Unnamed: 0,src,tar
54972,Can you say that again?,\t Pouvez-vous répéter cela ? \n
58616,Most of these are mine.,\t Il y en a la plupart à moi. \n
5901,That's great!,\t Super ! \n
21069,Tom is effective.,\t Tom est efficace. \n
11396,Let me do this.,\t Laissez-moi faire ça. \n
17397,Are you a lawyer?,\t Êtes-vous avocat ? \n
25797,"Sorry, I was busy.","\t Désolé, j'étais occupé. \n"
689,I'm tidy.,\t Je suis ordonné. \n
32188,They must be happy.,"\t Ils sont heureux, sans aucun doute. \n"
3560,Is she gone?,\t S'en est-elle allée ? \n


In [None]:
# 문자 집합 생성
src_vocab = set()
for line in lines.src:
    for char in line:
        src_vocab.add(char)

tar_vocab = set()
for line in lines.tar:
    for char in line:
        tar_vocab.add(char)

In [None]:
src_vocab_size = len(src_vocab) + 1 # padding때문에 1개 추가
tar_vocab_size = len(tar_vocab) + 1 
src_vocab_size, tar_vocab_size

(79, 105)

In [None]:
src_to_index = dict([(word, i+1) for i, word in enumerate(src_vocab)])
tar_to_index = dict([(word, i+1) for i, word in enumerate(tar_vocab)])
print(src_to_index)
print(tar_to_index)

{'T': 1, ':': 2, 'U': 3, 'D': 4, 'r': 5, 'h': 6, 'g': 7, '9': 8, 'a': 9, 'V': 10, 'l': 11, 'j': 12, '1': 13, 'P': 14, 'E': 15, 'z': 16, 'Z': 17, 'A': 18, 'L': 19, 'F': 20, 'f': 21, 'q': 22, '-': 23, 'n': 24, 'I': 25, '€': 26, 'b': 27, 'd': 28, '"': 29, ' ': 30, 'p': 31, 'v': 32, '’': 33, 'w': 34, '3': 35, '%': 36, "'": 37, 'k': 38, 'G': 39, '?': 40, 'O': 41, 'R': 42, '.': 43, 't': 44, '8': 45, 'N': 46, '6': 47, 'C': 48, '/': 49, '2': 50, 'J': 51, 'S': 52, 'W': 53, 'K': 54, 'M': 55, 'c': 56, 'x': 57, '$': 58, 'u': 59, 'o': 60, 'Y': 61, '&': 62, '7': 63, 'Q': 64, 'i': 65, 's': 66, 'é': 67, '5': 68, 'y': 69, 'm': 70, ',': 71, 'H': 72, '!': 73, 'B': 74, 'e': 75, '4': 76, 'X': 77, '0': 78}
{'T': 1, 'ë': 2, ':': 3, 'U': 4, 'D': 5, '»': 6, 'r': 7, 'h': 8, '\xa0': 9, 'g': 10, '9': 11, 'a': 12, 'V': 13, 'l': 14, 'j': 15, '1': 16, 'P': 17, 'E': 18, 'z': 19, 'î': 20, 'Ô': 21, 'Z': 22, 'A': 23, 'ô': 24, ')': 25, 'L': 26, 'F': 27, 'f': 28, 'à': 29, '\u2009': 30, 'è': 31, 'q': 32, '-': 33, 'Ê': 34, 

In [None]:
# integer encoding (encoder)
encoder_input = []

for line in lines.src:
    encoded_line = []
    for char in line:
        encoded_line.append(src_to_index[char])
    encoder_input.append(encoded_line)
encoder_input[:5]

[[39, 60, 43], [39, 60, 43], [39, 60, 43], [39, 60, 43], [72, 65, 43]]

In [None]:
# decoder encoding (decoder)
decoder_input = []
for line in lines.tar:
    decoded_line = []
    for char in line:
        decoded_line.append(tar_to_index[char])
    decoder_input.append(decoded_line)
decoder_input[:5]

[[47, 40, 13, 12, 40, 99, 40, 52],
 [47, 40, 74, 12, 7, 75, 8, 101, 61, 40, 52],
 [47, 40, 18, 35, 40, 7, 80, 79, 62, 101, 40, 99, 40, 52],
 [47, 40, 100, 80, 79, 10, 101, 40, 99, 40, 52],
 [47, 40, 71, 12, 14, 79, 62, 40, 99, 40, 52]]

In [None]:
# target data
decoder_target = []
for line in lines.tar:
    timestep = 0
    decoded_line = []
    for char in line:
        if timestep > 0:
            decoded_line.append(tar_to_index[char])
        timestep += 1
    decoder_target.append(decoded_line)
decoder_target[:5]

[[40, 13, 12, 40, 99, 40, 52],
 [40, 74, 12, 7, 75, 8, 101, 61, 40, 52],
 [40, 18, 35, 40, 7, 80, 79, 62, 101, 40, 99, 40, 52],
 [40, 100, 80, 79, 10, 101, 40, 99, 40, 52],
 [40, 71, 12, 14, 79, 62, 40, 99, 40, 52]]

In [None]:
# padding
max_src_len = max([len(line) for line in lines.src])
max_tar_len = max([len(line) for line in lines.tar])
max_src_len, max_tar_len

(23, 76)

In [None]:
encoder_input = pad_sequences(encoder_input, maxlen=max_src_len, padding='post')
decoder_input = pad_sequences(decoder_input, maxlen=max_tar_len, padding='post')
decoder_target = pad_sequences(decoder_target, maxlen=max_tar_len, padding='post')

In [None]:
decoder_input[0]

array([47, 40, 13, 12, 40, 99, 40, 52,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0])

In [None]:
decoder_target[0]

array([40, 13, 12, 40, 99, 40, 52,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0])

In [None]:
encoder_input = to_categorical(encoder_input)
decoder_input = to_categorical(decoder_input)
decoder_target = to_categorical(decoder_target)

In [None]:
encoder_input.shape

(60000, 23, 79)

In [None]:
decoder_input.shape

(60000, 76, 105)

In [None]:
decoder_target.shape

(60000, 76, 105)

### training seq2seq model

In [None]:
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.models import Model
import numpy as np

In [None]:
encoder_inputs = Input(shape=(None, src_vocab_size))
encoder_lstm = LSTM(units=256, return_state=True)

encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)

encoder_states = [state_h, state_c]

In [None]:
decoder_inputs = Input(shape=(None, tar_vocab_size))
decoder_lstm = LSTM(units=256, return_sequences=True, return_state=True)

decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)

decoder_softmax_layer = Dense(tar_vocab_size, activation='softmax')
decoder_outputs = decoder_softmax_layer(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 79)]   0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None, 105)]  0           []                               
                                                                                                  
 lstm (LSTM)                    [(None, 256),        344064      ['input_1[0][0]']                
                                 (None, 256),                                                     
                                 (None, 256)]                                                     
                                                                                              

In [None]:
model.fit(x=[encoder_input, decoder_input], y=decoder_target, batch_size=64,
         epochs=40, validation_split=0.2)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7f55682b0cd0>

In [None]:
model_dir = data_dir / 'weights'
model.save(model_dir)



### seq2seq 동작
- 매 시점마다 이전 시점의 예측과 상태를 입력으로 사용
- <eos> 또는 최대길이 도달할 경우 반복 멈춤

In [None]:
encoder_model = Model(inputs=encoder_inputs, outputs=encoder_states)

In [None]:
decoder_state_input_h = Input(shape=(256,))
decoder_state_input_c = Input(shape=(256,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)

decoder_states = [state_h, state_c]
decoder_outputs = decoder_softmax_layer(decoder_outputs)
decoder_model = Model(inputs=[decoder_inputs] + decoder_states_inputs, outputs=[decoder_outputs] + decoder_states)

In [None]:
index_to_src = dict((i, char) for char, i in src_to_index.items())
index_to_tar = dict((i, char) for char, i in tar_to_index.items())

In [None]:
def decode_sequence(input_seq):
    states_value = encoder_model.predict(input_seq, verbose=0)
    
    target_seq = np.zeros((1, 1, tar_vocab_size))
    target_seq[0, 0, tar_to_index['\t']] = 1

    stop_condition = False
    decoded_sentence = ""

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = index_to_tar[sampled_token_index]

        decoded_sentence += sampled_char

        if sampled_char == '\n' or len(decoded_sentence) > max_tar_len:
            stop_condition = True
        
        target_seq = np.zeros((1, 1, tar_vocab_size))
        target_seq[0, 0, sampled_token_index] = 1.

        states_value = [h, c]
    
    return decoded_sentence

In [None]:
from IPython.utils.py3compat import decode
for seq_index in [3, 50, 100, 300, 1001]:
    input_seq = encoder_input[seq_index:seq_index+1]
    decoded_sentence = decode_sequence(input_seq)

    print(35*'-')
    print('입력문장: ', lines.src[seq_index])
    print('정답문장: ', lines.tar[seq_index][2: len(lines.tar[seq_index])-1])
    print('번역문장: ', decoded_sentence[1:len(decoded_sentence)-1])

-----------------------------------
입력문장:  Go.
정답문장:  Bouge ! 
번역문장:  Va ! 
-----------------------------------
입력문장:  Hello!
정답문장:  Bonjour ! 
번역문장:  Salut ! 
-----------------------------------
입력문장:  Got it?
정답문장:  T'as capté ? 
번역문장:  Pours-im ! 
-----------------------------------
입력문장:  Hang on.
정답문장:  Tiens bon ! 
번역문장:  Attendez. 
-----------------------------------
입력문장:  Here's $5.
정답문장:  Voilà cinq dollars. 
번역문장:  Voilà conferme. 


## Word-Level Neural Machine Translation

### 데이터로드 및 전처리

In [3]:
import os
import re
import shutil
import zipfile

import numpy as np
import pandas as pd
import tensorflow as tf
import unicodedata
import urllib3
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
from pathlib import Path
data_dir = Path('/content/drive/MyDrive/Colab Notebooks')
# data_dir = Path('C:/Users/011/sinjy1203/data')
lines = pd.read_csv(data_dir / 'fra.txt', names=['src', 'tar', 'lic'], sep='\t')
del lines['lic']
lines.shape

(197463, 2)

In [6]:
num_samples = 33000

In [7]:
## preprocess func
# 프랑스 악센트 제거
def to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')

# 악센트 제거, 구두점등을 처리
def preprocess_sentence(sent):
    sent = to_ascii(sent.lower())
    sent = re.sub(r"([?.!,¿])", r" \1", sent)
    sent = re.sub(r"[^a-zA-Z!.?]+", r" ", sent)
    sent = re.sub(r"\s+", " ", sent)
    return sent

In [8]:
## 시작토큰 <sos>, 종료 토큰 <eos> 추가
def load_preprocessed_data():
    encoder_input, decoder_input, decoder_target = [], [], []
    
    with open(data_dir / 'fra.txt', 'r', encoding='UTF8') as lines:
        for i, line in enumerate(lines):
            src_line, tar_line, _ = line.strip().split('\t')
            src_line = [w for w in preprocess_sentence(src_line).split()]
            
            tar_line = preprocess_sentence(tar_line)
            tar_line_in = [w for w in ("<sos> " + tar_line).split()]
            tar_line_out = [w for w in (tar_line + ' <eos>').split()]
            
            encoder_input.append(src_line)
            decoder_input.append(tar_line_in)
            decoder_target.append(tar_line_out)
            
            if i == num_samples - 1:
                break
    
    return encoder_input, decoder_input, decoder_target

In [9]:
sents_en_in, sents_fra_in, sents_fra_out = load_preprocessed_data()

In [10]:
sents_en_in[:5]

[['go', '.'], ['go', '.'], ['go', '.'], ['go', '.'], ['hi', '.']]

In [11]:
sents_fra_in[:5]

[['<sos>', 'va', '!'],
 ['<sos>', 'marche', '.'],
 ['<sos>', 'en', 'route', '!'],
 ['<sos>', 'bouge', '!'],
 ['<sos>', 'salut', '!']]

In [12]:
sents_fra_out[:5]

[['va', '!', '<eos>'],
 ['marche', '.', '<eos>'],
 ['en', 'route', '!', '<eos>'],
 ['bouge', '!', '<eos>'],
 ['salut', '!', '<eos>']]

In [13]:
## tokenizer(정수 인코딩), padding
tokenizer_en = Tokenizer(filters='', lower=False)
tokenizer_en.fit_on_texts(sents_en_in)
encoder_input = tokenizer_en.texts_to_sequences(sents_en_in)
encoder_input = pad_sequences(encoder_input, padding='post')

tokenizer_fra = Tokenizer(filters="", lower=False)
tokenizer_fra.fit_on_texts(sents_fra_in)
tokenizer_fra.fit_on_texts(sents_fra_out)

decoder_input = tokenizer_fra.texts_to_sequences(sents_fra_in)
decoder_input = pad_sequences(decoder_input, padding='post')

decoder_target = tokenizer_fra.texts_to_sequences(sents_fra_out)
decoder_target = pad_sequences(decoder_target, padding='post')

In [14]:
encoder_input.shape

(33000, 8)

In [15]:
decoder_input.shape

(33000, 16)

In [16]:
decoder_target.shape

(33000, 16)

In [17]:
src_vocab_size = len(tokenizer_en.word_index) + 1
tar_vocab_size = len(tokenizer_fra.word_index) + 1
print(src_vocab_size, tar_vocab_size)

4672 8137


In [18]:
src_to_index = tokenizer_en.word_index
index_to_src = tokenizer_en.index_word
tar_to_index = tokenizer_fra.word_index
index_to_tar = tokenizer_fra.index_word

### shuffle & train test split

In [19]:
## indices shuffle
indices = np.arange(encoder_input.shape[0])
np.random.shuffle(indices)
indices

array([14322, 20587, 18455, ..., 10961,  5745,  1511])

In [20]:
encoder_input = encoder_input[indices]
decoder_input = decoder_input[indices]
decoder_target = decoder_target[indices]

In [21]:
encoder_input[30997]

array([ 15,  17,   9, 412,   1,   0,   0,   0], dtype=int32)

In [22]:
decoder_input[30997]

array([  2,  18,  42,  30, 459,   1,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0], dtype=int32)

In [23]:
decoder_input[30997][1:]

array([ 18,  42,  30, 459,   1,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0], dtype=int32)

In [24]:
decoder_target[30997][:-1]

array([ 18,  42,  30, 459,   1,   3,   0,   0,   0,   0,   0,   0,   0,
         0,   0], dtype=int32)

In [25]:
## 0.2% test split
n_of_val = int(33000*0.1)

encoder_input_train = encoder_input[:-n_of_val]
decoder_input_train = decoder_input[:-n_of_val]
decoder_target_train = decoder_target[:-n_of_val]

encoder_input_test = encoder_input[-n_of_val:]
decoder_input_test = decoder_input[-n_of_val:]
decoder_target_test = decoder_target[-n_of_val:]

In [26]:
encoder_input_train.shape

(29700, 8)

### modeling

In [27]:
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Masking
from tensorflow.keras.models import Model

**masking**: mask_value(padding_value)를 연산에 제외하는 역할

In [87]:
embedding_dim = 64
hidden_units = 64

## 인코더
encoder_inputs = Input(shape=(None,))
# enc_masking = Masking(mask_value=0.0)(encoder_inputs)
enc_emb = Embedding(src_vocab_size, embedding_dim, mask_zero=True)(encoder_inputs)
# enc_masking = Masking(mask_value=0.0)(enc_emb)

encoder_lstm = LSTM(hidden_units, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(enc_emb)

encoder_states = [state_h, state_c]

In [88]:
## 디코더
decoder_inputs = Input(shape=(None,))
# dec_masking = Masking(mask_value=0.0)(decoder_inputs)
dec_emb_layer = Embedding(tar_vocab_size, hidden_units, mask_zero=True)
dec_emb = dec_emb_layer(decoder_inputs)
# dec_masking = Masking(mask_value=0.0)(dec_emb)

decoder_lstm = LSTM(hidden_units, return_sequences=True, return_state=True)

decoder_outputs, _, _ = decoder_lstm(dec_emb, 
                                   initial_state=encoder_states)

decoder_dense = Dense(tar_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', 
              metrics=['acc'])

In [89]:
model.fit(x=[encoder_input_train, decoder_input_train], y=decoder_target_train, 
         validation_data=([encoder_input_test, decoder_input_test], 
                         decoder_target_test), 
         batch_size=128, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f5c534f5eb0>

### check
mask_zero=False일때 keras_mask 확인인

In [91]:
model_ = Model(decoder_inputs, dec_emb)

In [92]:
model_(decoder_input[30997:30998])._keras_mask

<tf.Tensor: shape=(1, 16), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True,  True, False, False, False,
        False, False, False, False, False, False, False]])>

In [48]:
np.all(np.isnan(model([encoder_input[30997:30998], decoder_input[30997:30998]]).numpy()))

True

In [73]:
decoder_input[30997:30998]

array([[  2,  18,  42,  30, 459,   1,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0]], dtype=int32)

In [37]:
decoder_input[30997:30998].shape

(1, 16)

### Testing

In [None]:
# 인코더
encoder_model = Model(encoder_inputs, encoder_states)

# 디코더
decoder_state_input_h = Input(shape=(hidden_units,))
decoder_state_input_c = Input(shape=(hidden_units,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

dec_emb2 = dec_emb_layer(decoder_inputs)

decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, 
                                                    initial_state=decoder_states_inputs)
decoder_states2 = [state_h2, state_c2]
decoder_outputs2 = decoder_dense(decoder_outputs2)

decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs2] + decoder_states2)