# seq2seq model

In [5]:
import numpy as np
import tensorflow as tf
from itertools import chain

### 데이터

In [6]:
text = [[['<sos>','a','b','c','d','e','<eos>'],['<sos>','b','c','d','e','f','<eos>'],['<sos>','c','d','e','f','g','<eos>']],
        [['<sos>','1','2','3','4','5','<eos>'],['<sos>','2','3','4','5','6','<eos>'],['<sos>','3','4','5','6','7','<eos>']]]
target = [[['<sos>','c','d','e','<eos>']],
          [['<sos>','3','4','5','<eos>']]]
dic = ['<sos>','<eos>','a','b','c','d','e','f','g','1','2','3','4','5','6','7']

### 배치 구성

In [7]:
fedic_encoder_x = []
fedic_encoder_seq_len_except_pad = []
for i in range(len(text)):
    unnested_text = list(chain(*text[i]))
    fedic_encoder_x.append([dic.index(i) for i in unnested_text])
    fedic_encoder_seq_len_except_pad.append(len(unnested_text))

fedic_decoder_x = []
fedic_decoder_seq_len_except_pad = []
real_label = []
for i in range(len(target)):
    unnested_text = list(chain(*target[i]))
    fedic_decoder_x.append([dic.index(i) for i in unnested_text[:-1]])
    fedic_decoder_seq_len_except_pad.append(len(unnested_text[:-1]))
    real_label.append([dic.index(i) for i in unnested_text[1:]])

In [8]:
fedic_encoder_x

[[0, 2, 3, 4, 5, 6, 1, 0, 3, 4, 5, 6, 7, 1, 0, 4, 5, 6, 7, 8, 1],
 [0, 9, 10, 11, 12, 13, 1, 0, 10, 11, 12, 13, 14, 1, 0, 11, 12, 13, 14, 15, 1]]

In [9]:
fedic_encoder_seq_len_except_pad

[21, 21]

In [8]:
fedic_decoder_x

[[0, 4, 5, 6], [0, 11, 12, 13]]

In [10]:
fedic_decoder_seq_len_except_pad

[4, 4]

In [11]:
real_label

[[4, 5, 6, 1], [11, 12, 13, 1]]

### 파라미터 설정

In [12]:
number_of_document = None
number_of_encoder_word = None
number_of_decoder_word = None
lstm_hidden_size = 10
voca_size = len(dic)
word_embedding_size = 2
max_gradient_norm = 0.1
learning_rate = 1e-4

### label

In [13]:
y = tf.placeholder(dtype=tf.int32, shape=[number_of_document, number_of_decoder_word])

### encoder

In [14]:
encoder_x = tf.placeholder(dtype=tf.int32, shape=[number_of_document, number_of_encoder_word])
source_sequence_lengths = tf.placeholder(dtype=tf.int32, shape=[number_of_document])
embedding_matrix = tf.get_variable(name="embeding_matrix", shape=[voca_size, word_embedding_size],
                                   dtype=tf.float32, initializer=tf.truncated_normal_initializer())
encoder_emb_inp = tf.nn.embedding_lookup(params=embedding_matrix, ids=encoder_x, name="encoder_emb_inp")
encoder_cell = tf.nn.rnn_cell.LSTMCell(num_units=lstm_hidden_size,
                                       initializer=tf.contrib.layers.variance_scaling_initializer())
encoder_outputs, encoder_final_hidden_state = tf.nn.dynamic_rnn(cell=encoder_cell , inputs=encoder_emb_inp,
                                                                sequence_length=source_sequence_lengths, dtype=tf.float32,
                                                                scope="encoder_LSTM")

### decoder

In [15]:
decoder_x = tf.placeholder(dtype=tf.int32, shape=[number_of_document, number_of_decoder_word])
decoder_lengths = tf.placeholder(dtype=tf.int32, shape=[number_of_document])
decoder_emb_inp = tf.nn.embedding_lookup(params=embedding_matrix, ids=decoder_x, name="decoder_embedded_x")
helper = tf.contrib.seq2seq.TrainingHelper(decoder_emb_inp, decoder_lengths)
decoder_cell = tf.nn.rnn_cell.LSTMCell(num_units=lstm_hidden_size, initializer=tf.contrib.layers.variance_scaling_initializer())
decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_final_hidden_state, output_layer=tf.layers.Dense(voca_size))
decoder_outputs, decoder_final_hidden_state, decoder_final_sequence_length = tf.contrib.seq2seq.dynamic_decode(decoder) # decoder_outputs=logits
logits = decoder_outputs.rnn_output

### loss

In [16]:
crossent = tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf.one_hot(y,voca_size), logits=logits)
train_loss = tf.reduce_sum(crossent)

### calculate and clip gradients

In [17]:
params = tf.trainable_variables()
gradients = tf.gradients(train_loss, params)
clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm) # gradient exploding 방지

### optimization

In [18]:
optimizer = tf.train.AdamOptimizer(learning_rate)
update_step = optimizer.apply_gradients(zip(clipped_gradients, params))

### training

In [20]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for i in range(5000):
    _ = sess.run(update_step, feed_dict={encoder_x:fedic_encoder_x, source_sequence_lengths:fedic_encoder_seq_len_except_pad, 
                                         decoder_x:fedic_decoder_x, decoder_lengths:fedic_decoder_seq_len_except_pad, y:real_label})
    if i%500 == 0 :
        pred, pred_loss = sess.run([tf.argmax(tf.nn.softmax(logits), axis=2), train_loss], 
                          feed_dict={encoder_x: fedic_encoder_x, source_sequence_lengths: fedic_encoder_seq_len_except_pad,
                                     decoder_x: fedic_decoder_x, decoder_lengths: fedic_decoder_seq_len_except_pad, y: real_label})
        print_list = []
        for i in range(len(pred)):
            sen = ""
            for j in range(len(pred[i])):
                sen += dic[pred[i][j]]
            print_list.append(sen)

        print("sentence : ", print_list, "loss : ", pred_loss)

sentence :  ['bddd', 'eeee'] loss :  22.323618
sentence :  ['ddd<eos>', '33<eos><eos>'] loss :  19.67084
sentence :  ['ddd<eos>', '3<eos><eos><eos>'] loss :  15.852428
sentence :  ['cd<eos><eos>', '33<eos><eos>'] loss :  12.062802
sentence :  ['cd<eos><eos>', '33<eos><eos>'] loss :  9.387261
sentence :  ['cde<eos>', '34<eos><eos>'] loss :  7.5083013
sentence :  ['cde<eos>', '345<eos>'] loss :  6.0009317
sentence :  ['cde<eos>', '345<eos>'] loss :  4.6996107
sentence :  ['cde<eos>', '345<eos>'] loss :  3.5801916
sentence :  ['cde<eos>', '345<eos>'] loss :  2.732616
