* 덧셈 데이터 구성
* seq2seq Encoder / Decoder 정의
* 학습
* 평가 (예측)

## 덧셈 데이터 구성

In [None]:
# 최대 세자리수끼리 합을 구하는 데이터

In [None]:
import numpy as np

In [None]:
# 99 + 1 => '9','9','+','1'

In [None]:
def get_sum_dict():
  items = list(str(x) for x in range(10))
  items.append('_')
  items.append('+')

  id_to_item = {}
  for idx, item in enumerate(items):
    id_to_item[idx]=item
  
  item_to_id = dict([(value, key) for key, value in id_to_item.items()])

  return id_to_item, item_to_id

In [None]:
def convert_item_to_id(items, item_to_id):
  ids = list([item_to_id[item] for item in items])
  return ids

def convert_id_to_item(ids, id_to_item):
  items = list([id_to_item[id] for id in ids])
  return items

In [None]:
def sum_data_gen():
  num1 = np.random.randint(0,1000)
  num2 = np.random.randint(0,1000)
  ans = num1 + num2
  q_str = str(num1) + '+' + str(num2)
  a_str = '_' + str(ans).zfill(4)
  return list(q_str), list(a_str)

In [None]:
def get_dataset(data_num=100):
  id_to_item, item_to_id = get_sum_dict()
  xs = []
  ys = []
  for i in range(data_num):
    x, y = sum_data_gen()
    xs.append(convert_item_to_id(x, item_to_id))
    ys.append(convert_item_to_id(y, item_to_id))
  return xs, ys   

In [None]:
train_x, train_y = get_dataset(data_num=100000)
test_x, test_y = get_dataset(data_num=10000)
id_to_item, item_to_id = get_sum_dict()

In [None]:
train_x[0],train_y[0]

([2, 3, 0, 11, 4, 9, 4], [10, 0, 7, 2, 4])

In [None]:
convert_id_to_item(train_x[0], id_to_item)

['2', '3', '0', '+', '4', '9', '4']

In [None]:
import tensorflow as tf

In [None]:
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

In [None]:
# Source data의 단어 수 & Target data의 단어 수 정의 
src_vocab_size = len(id_to_item) 
tar_vocab_size = len(id_to_item)

In [None]:
train_x = pad_sequences(train_x, maxlen=7, padding='pre')
test_x = pad_sequences(test_x, maxlen=7, padding='pre')
train_y = pad_sequences(train_y, maxlen=5, padding='pre')
test_y = pad_sequences(test_y, maxlen=5, padding='pre')

In [None]:
convert_id_to_item(train_x[0], id_to_item), convert_id_to_item(train_y[0], id_to_item)

(['2', '3', '0', '+', '4', '9', '4'], ['_', '0', '7', '2', '4'])

In [None]:
train_y_t = train_y[:,1:]
test_y_t = test_y[:,1:]

# ['_', '0', '3', '1', '5'] => ['0', '3', '1', '5', '_'] (학습을 위한 데이터 추가 생성 )
train_y_t = pad_sequences(train_y_t, value=item_to_id['_'], maxlen=5, padding='post')
test_y_t = pad_sequences(test_y_t, value=item_to_id['_'], maxlen=5, padding='post')

In [None]:
convert_id_to_item(train_y_t[0], id_to_item)

['0', '7', '2', '4', '_']

In [None]:
# data를 Network에 전달하기 위해 (input으로 쓰기위해) one-hot encoding
train_x = to_categorical(train_x, num_classes=src_vocab_size)
test_x = to_categorical(test_x, num_classes=src_vocab_size)
train_y = to_categorical(train_y, num_classes=tar_vocab_size)
test_y = to_categorical(test_y, num_classes=tar_vocab_size)
train_y_t = to_categorical(train_y_t, num_classes=tar_vocab_size)
test_y_t = to_categorical(test_y_t, num_classes=tar_vocab_size) 

In [None]:
train_x.shape, test_x.shape, train_y.shape, test_y.shape, train_y_t.shape, test_y_t.shape

((100000, 7, 12),
 (10000, 7, 12),
 (100000, 5, 12),
 (10000, 5, 12),
 (100000, 5, 12),
 (10000, 5, 12))

In [None]:
# onehot_to_sentence(train_y_t[0], id_to_item)

In [None]:
def onehot_to_sentence(data, id_to_item):
  return convert_id_to_item(np.argmax(data, axis=1), id_to_item)

In [None]:
hidden_node_size = 128

In [None]:
# Encoder 
# encoder_input = Embedding(vocab_size, 5,  input_length=7)
encoder_inputs = Input(shape=(None, src_vocab_size))
## seq2seq (no Attention)
# encoder_lstm = LSTM(hidden_node_size, return_state=True) 

## seq2seq (with Attention)
encoder_lstm = LSTM(hidden_node_size, return_state=True, return_sequences=True) 
encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)

encoder_states = [state_h, state_c]

In [None]:
# Decoder
decoder_inputs = Input(shape=(None, tar_vocab_size))
decoder_lstm = LSTM(hidden_node_size, return_state=True, return_sequences=True) 
## seq2seq (with Attention) - Attention layer 추가 
decoder_attention = tf.keras.layers.Attention()

# output, h, c 자리인데, decoder의 h,c는 사용되지 않기 때문에 _ 로..
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)

## seq2seq (with Attention) - Attention layer 추가 
decoder_attention_output = decoder_attention([decoder_outputs, encoder_outputs])
decoder_softmax_layer = Dense(tar_vocab_size, activation='softmax')

## seq2seq (no Attention) - Attention layer 추가 
#decoder_outputs = decoder_softmax_layer(decoder_outputs)
## seq2seq (with Attention) - Attention layer 추가 
decoder_outputs = decoder_softmax_layer(decoder_outputs + decoder_attention_output)

# Model(input, output)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [None]:
model.summary()

Model: "functional_15"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_19 (InputLayer)           [(None, None, 12)]   0                                            
__________________________________________________________________________________________________
input_20 (InputLayer)           [(None, None, 12)]   0                                            
__________________________________________________________________________________________________
lstm_6 (LSTM)                   [(None, None, 128),  72192       input_19[0][0]                   
__________________________________________________________________________________________________
lstm_7 (LSTM)                   [(None, None, 128),  72192       input_20[0][0]                   
                                                                 lstm_6[0][1]         

In [None]:
# train_x.shape, test_x.shape, 
# train_y.shape, test_y.shape, 
# train_y_t.shape, test_y_t.shape

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) 

In [None]:
history = model.fit(x=[train_x, train_y], 
              y=train_y_t, 
              batch_size=512, 
              epochs=200, 
              validation_split=0.2)             

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

## 학습한 모델 동작 시키기

In [None]:
## seq2seq without Attention
# encoder_model = Model(inputs=encoder_inputs, outputs=encoder_states)
## seq2seq with Attention
encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_outputs, encoder_states])

In [None]:
# encoder에서 넘어올 decoder input들 정의
## seq2seq with Attention
decoder_encoder_outputs = Input(shape=(None, hidden_node_size))
## seq2seq without Attention
decoder_state_input_h = Input(shape=(hidden_node_size,))
decoder_state_input_c = Input(shape=(hidden_node_size,)) 
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

# 모델 동작 시에는 다음 단어 예측 시 넘겨줄 states들까지 정의 
decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]

## seq2seq with Attention
decoder_attention_output = decoder_attention([decoder_encoder_outputs, decoder_outputs])

## seq2seq without Attention
# decoder_outputs = decoder_softmax_layer(decoder_outputs)

## seq2seq with Attention
decoder_outputs = decoder_softmax_layer(decoder_outputs + decoder_attention_output)

decoder_model = Model(inputs=([decoder_inputs] + decoder_states_inputs, decoder_encoder_outputs), 
                      outputs=[decoder_outputs] + decoder_states)

In [None]:
decoder_model.summary()

Model: "functional_19"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_20 (InputLayer)           [(None, None, 12)]   0                                            
__________________________________________________________________________________________________
input_22 (InputLayer)           [(None, 128)]        0                                            
__________________________________________________________________________________________________
input_23 (InputLayer)           [(None, 128)]        0                                            
__________________________________________________________________________________________________
lstm_7 (LSTM)                   [(None, None, 128),  72192       input_20[0][0]                   
                                                                 input_22[0][0]       

In [None]:
##### 모델 확인용 (무시하면 되는 코드)#### 
# ## seq2seq without Attention
# # encoder_model = Model(inputs=encoder_inputs, outputs=encoder_states)
# ## seq2seq with Attention
# encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_outputs, encoder_states])

## without Attention
# decoder_model = Model(inputs=[decoder_inputs] + decoder_states_inputs, 
#                       outputs=[decoder_outputs] + decoder_states)

## with Attention
# decoder_model = Model(inputs=([decoder_inputs] + decoder_states_inputs, decoder_encoder_outputs), 
#                       outputs=[decoder_outputs] + decoder_states)

In [None]:
def decode_sentence(input_seq):
  ## seq2seq without Attention
  # states_value = encoder_model.predict(input_seq)
  ## seq2seq with Attention
  temp_out = encoder_model.predict(input_seq)
  outputs_value, states_value = temp_out[0], temp_out[1]

  # 문장의 시작을 알려주는 벡터 생성 => '_'
  target_seq = np.zeros((1,1,tar_vocab_size))
  target_seq[0, 0, item_to_id['_']] = 1.

  # 종료조건 : '_' 가 나오거나, 길이가 5가 됨
  stop_condition = False
  decoded_sentence = ""

  # 종료 조건까지 반복
  while not stop_condition:  
    ## seq2seq without Attention
    # output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
    ## seq2seq with Attention 
    output_tokens, h, c = decoder_model.predict(([target_seq] + states_value, outputs_value) )
    
    # 예측 결과를 문자로 변환
    sampled_token_index = np.argmax(output_tokens[0, -1, :]) 
    sampled_char = id_to_item[sampled_token_index]
    decoded_sentence += sampled_char 
    
    if sampled_char == '_' or len(decoded_sentence) > 5:
      stop_condition = True
    
    states_value = [h, c]

    target_seq = np.zeros((1, 1, tar_vocab_size))
    target_seq[0, 0, sampled_token_index] = 1.
  
  return decoded_sentence

In [None]:
onehot_to_sentence(test_x[0], id_to_item)

['2', '5', '1', '+', '2', '2', '3']

In [None]:
test_idxs = np.random.choice(list(range(len(test_x))), replace=False, size=5)

In [None]:
for idx in test_idxs:
  input_seq = test_x[idx:idx+1]
  decoded_sentence = decode_sentence(input_seq)
  print( 35 * '-')
  print('입력 문장', onehot_to_sentence(test_x[idx], id_to_item ))
  print('정답 문장', onehot_to_sentence(test_y[idx], id_to_item ))
  print('모델이 예측한 문장', decoded_sentence[:-1] ) 

-----------------------------------
입력 문장 ['2', '4', '1', '+', '6', '2', '0']
정답 문장 ['_', '0', '8', '6', '1']
모델이 예측한 문장 0879
-----------------------------------
입력 문장 ['9', '0', '9', '+', '5', '9', '4']
정답 문장 ['_', '1', '5', '0', '3']
모델이 예측한 문장 4577
-----------------------------------
입력 문장 ['7', '6', '8', '+', '5', '3', '0']
정답 문장 ['_', '1', '2', '9', '8']
모델이 예측한 문장 1477
-----------------------------------
입력 문장 ['5', '2', '4', '+', '5', '4', '5']
정답 문장 ['_', '1', '0', '6', '9']
모델이 예측한 문장 1978
-----------------------------------
입력 문장 ['8', '6', '7', '+', '8', '0', '1']
정답 문장 ['_', '1', '6', '6', '8']
모델이 예측한 문장 5677


In [None]:
input_seq = test_x[0:1]
decode_sentence(input_seq)

'0577_'