## RNN 구현연습
---
참고 : [김보섭님 자료(클릭)]("https://nbviewer.jupyter.org/github/aisolab/CS20/blob/master/Lec11_Recurrent%20Neural%20Networks/To%20quickly%20implementing%20RNN.ipynb")에서 단순 단어만 조금 바꾸어 작성

### Load Libraries

In [1]:
import tensorflow as tf
import numpy as np
from pprint import pprint

print(tf.__version__)

  from ._conv import register_converters as _register_converters


1.13.0-dev20190102


### Intro

In [2]:
sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]


print(list(map(lambda word : len(word), sentences)))

[3, 4, 7, 5]


### Intro : Padding

---
`<pad>`라는 의미없는 토큰을 함께 추가해주어서 word_dic을 구성한다

In [3]:
## word dic

word_list = []
for elm in sentences:
    word_list += elm

word_list = list(set(word_list))
word_list.sort()
word_list = ['<pad>'] + word_list
word_dic = {word : idx for idx, word in enumerate(word_list)}
pprint(word_dic)

{'<pad>': 0,
 'I': 1,
 'a': 2,
 'changing': 3,
 'deep': 4,
 'difficult': 5,
 'fast': 6,
 'feel': 7,
 'for': 8,
 'framework': 9,
 'hungry': 10,
 'is': 11,
 'learning': 12,
 'tensorflow': 13,
 'very': 14}


In [4]:
# max_len 길이에 못 미치는 문장은 <pad>로 max_len 만큼 padding 해주는 함수를 만든다.
# 길이를 맞추는 작업이 다른 딥러닝과 다른 점
def pad_seq(sequences, max_len, dic):
    seq_len, seq_indices = [], []
    for seq in sequences:
        seq_len.append(len(seq))
        seq_idx = [dic.get(char) for char in seq]
        seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')]
        seq_indices.append(seq_idx)
    return seq_len, seq_indices



In [5]:
max_length = 8
sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length,
                               dic = word_dic)
pprint(sen_len)
pprint(sen_indices)

[3, 4, 7, 5]
[[1, 7, 10, 0, 0, 0, 0, 0],
 [13, 11, 14, 5, 0, 0, 0, 0],
 [13, 11, 2, 9, 8, 4, 12, 0],
 [13, 11, 14, 6, 3, 0, 0, 0]]


### 형식 맞추기

In [6]:
seq_len = tf.placeholder(dtype = tf.int32, shape=[None])
seq_indices = tf.placeholder(dtype = tf.int32, shape=[None, max_length])

In [7]:
one_hot = np.eye(len(word_dic)).astype(np.float32)
ont_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                         trainable = False) #trainable을 따로 하지 않는다.
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

Instructions for updating:
Colocations handled automatically by placer.


In [8]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    tmp = sess.run(seq_batch, feed_dict = {seq_indices : sen_indices})
print(np.shape(sen_indices))
print(np.shape(tmp)) # tf.nn.dynamic_rnn, tf.contrib.seq2seq.TrainingHelper 등에 이 shape을 유지하면서 전달되어야함

(4, 8)
(4, 8, 15)


In [9]:
pprint(tmp[0])

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
      dtype=float32)


### many to one

---
Many to one : example data

In [13]:
tf.reset_default_graph()

sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
y = [[0.,1.], [0.,1.], [1.,0.], [1.,0.]]
max_length = 8

sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)

pprint(sen_len)
pprint(sen_indices)

[3, 4, 7, 5]
[[1, 7, 10, 0, 0, 0, 0, 0],
 [13, 11, 14, 5, 0, 0, 0, 0],
 [13, 11, 2, 9, 8, 4, 12, 0],
 [13, 11, 14, 6, 3, 0, 0, 0]]


many to one : simple

In [14]:
max_length = 8
h_dim = 2
n_of_classes = 2

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.float32, shape = [None, 2])

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [15]:
gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
_, state = tf.nn.dynamic_rnn(cell = gru_cell, inputs = seq_batch, sequence_length = seq_len,
                             dtype = tf.float32)
pprint(_)
pprint(state)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 2) dtype=float32>
<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>


In [19]:
score = tf.layers.dense(inputs = state, units = n_of_classes)
ce_loss = tf.losses.softmax_cross_entropy(onehot_labels=label,
                                          logits = score)

In [20]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([_, state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))

[array([[[ 0.0662247 , -0.01695941],
        [-0.03734433,  0.02484244],
        [-0.06660604, -0.01929412],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]]], dtype=float32),
 array([[-0.06660604, -0.01929412]], dtype=float32)]


In [21]:
pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                      label : y}))

0.679254


In [22]:
sess.close()


### many to one : stacked

In [24]:
max_length = 8
n_of_classes = 2

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.float32, shape = [None, 2])
keep_prob = tf.placeholder(dtype = tf.float32)

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [25]:
h_dims = [2,2]
gru_cells = []
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell,
                                             output_keep_prob = keep_prob)
    gru_cells.append(gru_cell)
else:
    gru_cells = tf.contrib.rnn.MultiRNNCell(cells = gru_cells)

Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.


In [26]:
_, state = tf.nn.dynamic_rnn(cell = gru_cells, inputs = seq_batch, sequence_length = seq_len,
                             dtype = tf.float32)
pprint(_)
pprint(state)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 2) dtype=float32>
(<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 2) dtype=float32>)


In [27]:
score = tf.layers.dense(inputs = state[-1], units = n_of_classes)
ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)

In [30]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([_, state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],
                                         keep_prob : 1.}))

[array([[[ 0.00917697, -0.00501139],
        [ 0.04817685,  0.00979669],
        [ 0.02524938, -0.00332088],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]]], dtype=float32),
 (array([[ 0.04972173, -0.10506723]], dtype=float32),
  array([[ 0.02524938, -0.00332088]], dtype=float32))]


In [31]:
pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                      label : y, keep_prob : 1.}))

0.6831637


In [32]:
sess.close()

### many to one : bi-directional

In [33]:
tf.reset_default_graph()

In [34]:
max_length = 8
h_dim = 2
n_of_classes = 2

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.float32, shape = [None, 2])

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [None]:
gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)

_, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell, cell_bw = gru_bw_cell,
                                           inputs = seq_batch, sequence_length = seq_len, dtype = tf.float32)
pprint(_)
pprint(output_states)

In [35]:
gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)

_, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell, 
                                             cell_bw = gru_bw_cell,
                                                   inputs = seq_batch,
                                                   sequence_length = seq_len,
                                                   dtype = tf.float32)
pprint(_)
pprint(output_states)

Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose_1:0' shape=(?, 8, 2) dtype=float32>,
 <tf.Tensor 'ReverseSequence:0' shape=(?, 8, 2) dtype=float32>)
(<tf.Tensor 'bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)


In [36]:
## fw_cell, bw_cell final state를 concat함

concat_state = tf.concat(values = [output_states[0],
                                  output_states[-1]],
                        axis = 1)

In [37]:
score = tf.layers.dense(inputs = concat_state, units = n_of_classes)
ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label,
                                         logits = score)

In [38]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([_, concat_state], feed_dict= {seq_len:[sen_len[0]],
                                              seq_indices : [sen_indices[0]]}))

[(array([[[ 0.00608759, -0.12793802],
        [ 0.05511357, -0.21184164],
        [-0.03150936, -0.1544176 ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]]], dtype=float32),
  array([[[ 0.00733875,  0.01251039],
        [-0.05339082,  0.13665897],
        [ 0.00767314,  0.05520057],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]]], dtype=float32)),
 array([[-0.03150936, -0.1544176 ,  0.00733875,  0.01251039]],
      dtype=float32)]


In [39]:
pprint(sess.run(ce_loss, feed_dict=
               {seq_len : sen_len, seq_indices:sen_indices,
               label : y}))

0.7031779


### many to one : stacked bi-directional

In [50]:
tf.reset_default_graph()

In [51]:
max_length = 8
n_of_classes = 2

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])

label = tf.placeholder(dtype = tf.float32, shape = [None, 2])


keep_prob = tf.placeholder(dtype = tf.float32)

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer=one_hot,
                         trainable = False)
seq_batch = tf.nn.embedding_lookup(params=one_hot, ids = seq_indices)

In [52]:
h_dims = [2,2]
gru_fw_cells, gru_bw_cells = [], []

# forward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_fw_cells.append(gru_cell)
    
# backward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_bw_cells.append(gru_cell)

In [53]:
outputs, output_state_fw, output_state_bw = \
tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw = gru_fw_cells, cells_bw = gru_bw_cells,
                                               inputs = seq_batch, sequence_length = seq_len,
                                               dtype = tf.float32)
pprint(outputs)
pprint(output_state_fw)
pprint(output_state_bw)

<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(?, 8, 4) dtype=float32>
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>)
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)


In [54]:
concat_state = tf.concat(values=[output_state_fw[-1],output_state_bw[-1]], axis = 1)

In [55]:
score = tf.layers.dense(inputs = concat_state, units = n_of_classes)
ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)


In [56]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([outputs, concat_state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],
                                                      keep_prob : 1.}))

[array([[[-0.01676048, -0.0757765 ,  0.03989091,  0.12585738],
        [-0.02815877, -0.06940226,  0.01626515,  0.06314133],
        [-0.0399979 , -0.00468424, -0.00685303,  0.00458733],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=float32),
 array([[-0.0399979 , -0.00468424,  0.03989091,  0.12585738]],
      dtype=float32)]


In [57]:
pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                      label : y, keep_prob : 1.}))

0.6838166


### many to many

---

#### many to many : example data

In [58]:
tf.reset_default_graph()

sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
pos = [['pronoun', 'verb', 'adjective'],
     ['noun', 'verb', 'adverb', 'adjective'],
     ['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun'],
     ['noun', 'verb', 'adverb', 'adjective', 'verb']]
max_length = 8

In [59]:
def pad_seq(sequences, max_len, dic):
    seq_len, seq_indices = [], []
    for seq in sequences:
        seq_len.append(len(seq))
        seq_idx = [dic.get(char) for char in seq]
        seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] # 0 is idx of meaningless token "pad"
        seq_indices.append(seq_idx)
    return seq_len, seq_indices

In [60]:
word_list = []
for elm in sentences:
    word_list += elm
word_list = list(set(word_list))
word_list.sort()
word_list = ['<pad>'] + word_list

word_dic = {word : idx for idx, word in enumerate(word_list)}

# pos dic
pos_list = []
for elm in pos:
    pos_list += elm
pos_list = list(set(pos_list))
pos_list.sort()
pos_list = ['<pad>'] + pos_list

pos_dic = {pos : idx for idx, pos in enumerate(pos_list)}

print(word_dic)
print(pos_dic)

{'for': 8, 'fast': 6, 'a': 2, 'is': 11, 'learning': 12, 'framework': 9, 'deep': 4, 'changing': 3, '<pad>': 0, 'tensorflow': 13, 'feel': 7, 'difficult': 5, 'I': 1, 'hungry': 10, 'very': 14}
{'preposition': 5, 'verb': 7, 'adverb': 2, 'pronoun': 6, 'adjective': 1, 'determiner': 3, 'noun': 4, '<pad>': 0}


In [61]:
sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)
_, pos_indices = pad_seq(sequences = pos, max_len = max_length, dic = pos_dic)

pprint(sen_len)
pprint(sen_indices)
pprint(pos_indices)

[3, 4, 7, 5]
[[1, 7, 10, 0, 0, 0, 0, 0],
 [13, 11, 14, 5, 0, 0, 0, 0],
 [13, 11, 2, 9, 8, 4, 12, 0],
 [13, 11, 14, 6, 3, 0, 0, 0]]
[[6, 7, 1, 0, 0, 0, 0, 0],
 [4, 7, 2, 1, 0, 0, 0, 0],
 [4, 7, 3, 4, 5, 1, 4, 0],
 [4, 7, 2, 1, 7, 0, 0, 0]]


### many to many: simple

In [62]:
max_length = 8
h_dim = 2
n_of_classes = len(pos_dic)

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [63]:
gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = gru_cell, output_size = n_of_classes)
outputs, _ = tf.nn.dynamic_rnn(cell = score_cell, inputs = seq_batch, sequence_length = seq_len,
                             dtype = tf.float32)

pprint(outputs)
pprint(_)

<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 8) dtype=float32>
<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>


In [64]:
masking = tf.sequence_mask(lengths = sen_len,
                           maxlen = max_length, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs,
                                                targets = label,
                                                weights = masking)


In [65]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([outputs, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))

[array([[[ 0.0401592 ,  0.11893751, -0.07371241,  0.06439449,
         -0.00943696,  0.0411165 ,  0.08811046, -0.00774744],
        [ 0.08153912,  0.07185946, -0.00569717,  0.15431473,
          0.05982151,  0.16183802,  0.13667941, -0.10636218],
        [ 0.04954111,  0.19189334, -0.12926933,  0.07316232,
         -0.03267328,  0.02985734,  0.11993706,  0.01457638],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=f

In [66]:
pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                           label : pos_indices}))

[array([[1., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),
 2.0618663]


### many to many : stacked

In [67]:
tf.reset_default_graph()

max_length = 8
n_of_classes = len(pos_dic)

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
keep_prob = tf.placeholder(dtype = tf.float32)

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [68]:
h_dims = [2,2]
gru_cells = []
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell =  tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_cells.append(gru_cell)
else:
    gru_cells = tf.contrib.rnn.MultiRNNCell(cells = gru_cells)

In [69]:
score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = gru_cells, output_size = n_of_classes)
outputs, _ = tf.nn.dynamic_rnn(cell = score_cell, inputs = seq_batch, sequence_length = seq_len,
                             dtype = tf.float32)

pprint(outputs)
pprint(_)

<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 8) dtype=float32>
(<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 2) dtype=float32>)


In [70]:
masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets = label, weights = masking)


In [71]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([outputs, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],
                                           keep_prob : 1.}))

[array([[[-0.00188777, -0.00037185,  0.00462506, -0.00598995,
          0.00910721, -0.00180404,  0.00169455,  0.0033047 ],
        [ 0.0004771 ,  0.00058002,  0.00017493, -0.0012932 ,
          0.00163719,  0.00043325,  0.00141179,  0.00168271],
        [-0.00852313, -0.00381461,  0.01497678, -0.01470956,
          0.02381036, -0.0080454 , -0.00043475,  0.00385634],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=f

In [72]:
pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                                      label : pos_indices, keep_prob : 1.}))

[array([[1., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),
 2.0779552]


### many to many: bi-directional

In [74]:
tf.reset_default_graph()

max_length = 8
h_dim = 2
n_of_classes = len(pos_dic)

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [75]:
gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)
gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)

outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell, cell_bw = gru_bw_cell,
                                           inputs = seq_batch, sequence_length = seq_len, dtype = tf.float32)
pprint(outputs)
pprint(_)

(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose_1:0' shape=(?, 8, 2) dtype=float32>,
 <tf.Tensor 'ReverseSequence:0' shape=(?, 8, 2) dtype=float32>)
(<tf.Tensor 'bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)


In [76]:
concat_outputs = tf.concat([outputs[0], outputs[1]], axis = 2)
weights = tf.get_variable(name = 'weights', shape = (concat_outputs.get_shape()[-1], n_of_classes),
                          initializer = tf.contrib.layers.xavier_initializer())
score = tf.map_fn(lambda elm : tf.matmul(elm, weights), concat_outputs)
pprint(score)

<tf.Tensor 'map/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 8, 8) dtype=float32>


In [77]:
masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = score, targets = label, weights = masking)

In [78]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([score, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))

[array([[[ 0.02485238,  0.08592927, -0.02481479, -0.14082852,
          0.05746712, -0.11255324, -0.02309201, -0.0053028 ],
        [ 0.09087023,  0.13373238, -0.0172023 , -0.02397757,
         -0.02929905, -0.02332759, -0.05857082, -0.00142572],
        [ 0.0338428 ,  0.04580762, -0.0417332 ,  0.06195411,
         -0.05554413,  0.06113041, -0.05987086,  0.05780768],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=f

In [79]:
pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                                      label : pos_indices}))

[array([[1., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),
 2.063561]


### many to many : stacked bi-directional

In [80]:
tf.reset_default_graph()

max_length = 8
n_of_classes = len(pos_dic)

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
keep_prob = tf.placeholder(dtype = tf.float32)

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [81]:
h_dims = [2,2]
gru_fw_cells, gru_bw_cells = [], []

# forward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_fw_cells.append(gru_cell)
    
# backward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_bw_cells.append(gru_cell)


In [82]:
outputs, output_state_fw, output_state_bw = \
tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw = gru_fw_cells, cells_bw = gru_bw_cells,
                                               inputs = seq_batch, sequence_length = seq_len,
                                               dtype = tf.float32)
pprint(outputs)
pprint(output_state_fw)
pprint(output_state_bw)

<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(?, 8, 4) dtype=float32>
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>)
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)


In [83]:
weights = tf.get_variable(name = 'weights', shape = (outputs.get_shape()[-1], n_of_classes),
                          initializer = tf.contrib.layers.xavier_initializer())
score = tf.map_fn(lambda elm : tf.matmul(elm, weights), outputs)
pprint(score)

<tf.Tensor 'map/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 8, 8) dtype=float32>


In [84]:
masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = score, targets = label, weights = masking)


In [85]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run(score, feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],
                                         keep_prob : 1.}))

array([[[ 0.01670285,  0.00166809, -0.02317582, -0.0145303 ,
          0.01358973,  0.00069475,  0.00973788, -0.00854048],
        [ 0.00082604, -0.00942596, -0.01932247, -0.02213586,
          0.02624755,  0.02337823,  0.00904453, -0.02901776],
        [-0.00578844, -0.01268585, -0.01192631, -0.02387662,
          0.00150439,  0.0192422 ,  0.01630887,  0.00575117],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=fl

In [86]:
pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                                      label : pos_indices, keep_prob : 1.}))

[array([[1., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),
 2.0801873]


### many to many : stacked bi-directional

In [87]:
tf.reset_default_graph()

max_length = 8
n_of_classes = len(pos_dic)

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
keep_prob = tf.placeholder(dtype = tf.float32)

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [88]:
h_dims = [2,2]
gru_fw_cells, gru_bw_cells = [], []

# forward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_fw_cells.append(gru_cell)
    
# backward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_bw_cells.append(gru_cell)

In [89]:
outputs, output_state_fw, output_state_bw = \
tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw = gru_fw_cells, cells_bw = gru_bw_cells,
                                               inputs = seq_batch, sequence_length = seq_len,
                                               dtype = tf.float32)
pprint(outputs)
pprint(output_state_fw)
pprint(output_state_bw)

<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(?, 8, 4) dtype=float32>
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>)
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)


In [90]:
weights = tf.get_variable(name = 'weights', shape = (outputs.get_shape()[-1], n_of_classes),
                          initializer = tf.contrib.layers.xavier_initializer())
score = tf.map_fn(lambda elm : tf.matmul(elm, weights), outputs)
pprint(score)

<tf.Tensor 'map/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 8, 8) dtype=float32>


In [91]:

masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = score, targets = label, weights = masking)

In [92]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run(score, feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],
                                         keep_prob : 1.}))


array([[[ 0.07602175,  0.03328064, -0.04958954, -0.00882747,
          0.0404028 ,  0.03835554,  0.00329314,  0.10084573],
        [ 0.04525514,  0.04387965, -0.0269363 , -0.0139736 ,
          0.00608762,  0.04037949, -0.03293841,  0.09743132],
        [ 0.01821594,  0.05290323, -0.02067207, -0.01135725,
         -0.00358083,  0.03827195, -0.05377305,  0.08085747],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=fl

In [93]:
pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                                      label : pos_indices, keep_prob : 1.}))

[array([[1., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),
 2.0720344]


### Sequence to Sequence

In [94]:
tf.reset_default_graph()

sources = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
targets = [['나는', '배가', '고프다'],
           ['텐서플로우는', '매우', '어렵다'],
           ['텐서플로우는', '딥러닝을', '위한', '프레임워크이다'],
           ['텐서플로우는', '매우', '빠르게', '변화한다']]

In [95]:
# word dic for sentences
source_words = []
for elm in sources:
    source_words += elm
source_words = list(set(source_words))
source_words.sort()
source_words = ['<pad>'] + source_words

source_dic = {word : idx for idx, word in enumerate(source_words)}
print(source_dic)
print(len(source_dic))

{'for': 8, 'fast': 6, 'a': 2, 'is': 11, 'learning': 12, 'framework': 9, 'deep': 4, 'changing': 3, '<pad>': 0, 'tensorflow': 13, 'feel': 7, 'difficult': 5, 'I': 1, 'hungry': 10, 'very': 14}
15


In [96]:
# word dic for translations
target_words = []
for elm in targets:
    target_words += elm
target_words = list(set(target_words))
target_words.sort()
target_words =  ['<pad>']+ ['<start>'] + ['<end>'] + \
                    target_words # 번역문의 시작과 끝을 알리는 'start', 'end' token 추가

target_dic = {word : idx for idx, word in enumerate(target_words)}
print(target_dic)
print(len(target_dic))


{'빠르게': 9, '배가': 7, '<start>': 1, '어렵다': 10, '고프다': 3, '<pad>': 0, '매우': 6, '나는': 4, '프레임워크이다': 13, '위한': 11, '딥러닝을': 5, '<end>': 2, '텐서플로우는': 12, '변화한다': 8}
14


In [97]:
def pad_seq_enc(sequences, max_len, dic):
    seq_len = []
    seq_indices = []
    for seq in sequences:
        seq_len.append(len(seq))
        seq_idx = [dic.get(word) for word in seq]
        seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] 
        seq_indices.append(seq_idx)        
    return seq_len, seq_indices

In [98]:
def pad_seq_dec(sequences, max_len, dic):
    seq_input_len = []
    seq_input_indices = []
    seq_target_indices = []
    
    # for decoder input
    for seq in sequences:
        seq_input_idx = [dic.get('<start>')] + [dic.get(token) for token in seq]
        seq_input_len.append(len(seq_input_idx))
        seq_input_idx += (max_len - len(seq_input_idx)) * [dic.get('<pad>')] 
        seq_input_indices.append(seq_input_idx)
        
    # for decoder output
    for seq in sequences:
        seq_target_idx = [dic.get(token) for token in seq] + [dic.get('<end>')]
        seq_target_idx += (max_len - len(seq_target_idx)) * [dic.get('<pad>')]
        seq_target_indices.append(seq_target_idx)
        
    return seq_input_len, seq_input_indices, seq_target_indices

In [99]:
# for encoder
source_max_len = 10
X_length, X_indices = pad_seq_enc(sequences = sources, max_len = source_max_len, dic = source_dic)
print(X_length, np.shape(X_indices))

[3, 4, 7, 5] (4, 10)


In [100]:
# for decoder
target_max_len = 12
y_length, y_input_indices, y_target_indices = pad_seq_dec(sequences = targets, max_len = target_max_len,
                                                             dic = target_dic)
pprint(y_length)
pprint(y_input_indices)
pprint(y_target_indices)

[4, 4, 5, 5]
[[1, 4, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 12, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 12, 5, 11, 13, 0, 0, 0, 0, 0, 0, 0],
 [1, 12, 6, 9, 8, 0, 0, 0, 0, 0, 0, 0]]
[[4, 7, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0],
 [12, 6, 10, 2, 0, 0, 0, 0, 0, 0, 0, 0],
 [12, 5, 11, 13, 2, 0, 0, 0, 0, 0, 0, 0],
 [12, 6, 9, 8, 2, 0, 0, 0, 0, 0, 0, 0]]


In [101]:
s_len = tf.placeholder(dtype = tf.int32, shape = [None])
s_indices = tf.placeholder(dtype = tf.int32, shape = [None, source_max_len])
t_len = tf.placeholder(dtype = tf.int32, shape = [None])
t_input_indices = tf.placeholder(dtype = tf.int32, shape = [None, target_max_len])
t_output_indices = tf.placeholder(dtype = tf.int32, shape = [None, target_max_len])


In [102]:
s_embedding = tf.eye(num_rows = len(source_dic), dtype = tf.float32)
s_embedding = tf.get_variable(name = 's_embedding', initializer = s_embedding)
s_batch = tf.nn.embedding_lookup(params = s_embedding, ids = s_indices)

enc_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)
_, enc_state = tf.nn.dynamic_rnn(cell = enc_cell, inputs = s_batch, sequence_length = s_len, dtype = tf.float32)


In [103]:
t_embedding = tf.eye(num_rows = len(target_dic), dtype = tf.float32)
t_embedding = tf.get_variable(name = 't_embedding', initializer = t_embedding)
t_batch = tf.nn.embedding_lookup(params = t_embedding, ids = t_input_indices)

tokens = tf.ones_like(tensor = s_len, dtype = tf.int32)
tr_tokens = tf.map_fn(lambda elm : tf.multiply(elm, target_max_len), tokens, dtype = tf.int32)
start_tokens = tokens

tr_helper = tf.contrib.seq2seq.TrainingHelper(inputs = t_batch, sequence_length = tr_tokens)
dec_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)
score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = dec_cell, output_size = len(target_dic))
tr_decoder = tf.contrib.seq2seq.BasicDecoder(cell = score_cell, initial_state = enc_state, helper = tr_helper)


In [104]:
tr_outputs,_,_= tf.contrib.seq2seq.dynamic_decode(decoder = tr_decoder, impute_finished = True,
                                                  maximum_iterations = target_max_len)

In [105]:
masking = tf.sequence_mask(lengths = t_len, maxlen = target_max_len, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = tr_outputs.rnn_output,
                                                targets = t_output_indices, weights = masking)

In [106]:
trans_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding = t_embedding,
                                                        start_tokens = start_tokens,
                                                        end_token = target_dic.get('<end>'))
trans_decoder = tf.contrib.seq2seq.BasicDecoder(cell = score_cell, initial_state = enc_state,
                                                helper = trans_helper)
trans_outputs,_,_ = tf.contrib.seq2seq.dynamic_decode(decoder = trans_decoder, impute_finished = True,
                                                      maximum_iterations = target_max_len * 2)

In [107]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
logits, masks = sess.run([tr_outputs.rnn_output,masking], feed_dict = {s_len : [X_length[0]],
                                                         s_indices : [X_indices[0]],
                                                         t_len : [y_length[0]],
                                                         t_input_indices : [y_input_indices[0]]})
loss = sess.run(seq2seq_loss, feed_dict = {s_len : [X_length[0]],
                                           s_indices : [X_indices[0]],
                                           t_len : [y_length[0]],
                                           t_input_indices : [y_input_indices[0]],
                                           t_output_indices : [y_target_indices[0]]})

In [108]:

translations = sess.run(trans_outputs.sample_id, feed_dict = {s_len : [X_length[0]],
                                               s_indices : [X_indices[0]]})

In [109]:
pprint(logits)


array([[[ 3.06569170e-02,  2.90796943e-02,  2.65586451e-02,
          3.55238467e-02,  2.72423159e-02, -2.31380723e-02,
          4.59234864e-02, -1.49730723e-02, -1.43889021e-02,
          4.43195440e-02,  3.12543707e-03,  2.01934017e-02,
         -4.12015757e-03,  1.88582577e-02],
        [ 3.35243903e-02, -3.22426520e-02, -2.72103995e-02,
          4.89585176e-02,  1.51515007e-04, -5.48993237e-02,
          2.30013207e-02, -5.11967838e-02,  4.26761024e-02,
          9.92388465e-03, -2.64386237e-02,  5.06320745e-02,
          1.63794402e-02, -3.40926908e-02],
        [ 3.29880342e-02, -7.31706433e-03, -5.33414260e-03,
          4.43210527e-02,  1.14459638e-02, -4.27400693e-02,
          3.30073163e-02, -3.71048115e-02,  1.97300129e-02,
          2.44550481e-02, -1.46358609e-02,  3.89402136e-02,
          8.15707352e-03, -1.26926647e-02],
        [-8.30884837e-03,  2.92134397e-02,  2.53850762e-02,
         -1.54850120e-02,  9.78414901e-03,  2.34143529e-02,
          3.31857242e-03,  2

In [110]:
pprint(masks)
pprint(loss)

array([[1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)
2.6351032
