# 빠르게 구현하는 RNN
18.06.24에 백수콘에서 ***"빠르게 구현하는 RNN"***이라는 주제로 발표한 슬라이드의 보충자료입니다.

### Load libraries

In [1]:
import tensorflow as tf
import numpy as np
from pprint import pprint

## Intro

In [2]:
# 문장의 단어를 RNN에 하나하나씩 넣는다고 하면?
sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]

# RNN은 아래처럼 각 문장 별로 단어의 개수만큼 sequence를 처리해야한다.
# --> variable sequence length!
print(list(map(lambda word : len(word), sentences)))

[3, 4, 7, 5]


### Intro : Padding

In [3]:
# word dic
word_list = []
for elm in sentences:
    word_list += elm
word_list = list(set(word_list))
word_list.sort()
word_list = ['<pad>'] + word_list # '<pad>'라는 의미없는 token 추가
word_dic = {word : idx for idx, word in enumerate(word_list)}
pprint(word_dic)

{'<pad>': 0,
 'I': 1,
 'a': 2,
 'changing': 3,
 'deep': 4,
 'difficult': 5,
 'fast': 6,
 'feel': 7,
 'for': 8,
 'framework': 9,
 'hungry': 10,
 'is': 11,
 'learning': 12,
 'tensorflow': 13,
 'very': 14}


In [4]:
# max_len의 길이에 못미치는 문장은 <pad>로 max_len만큼 padding
def pad_seq(sequences, max_len, dic):
    seq_len, seq_indices = [], []
    for seq in sequences:
        seq_len.append(len(seq))
        seq_idx = [dic.get(char) for char in seq]
        seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] # 0 is idx of meaningless token "<pad>"
        seq_indices.append(seq_idx)
    return seq_len, seq_indices

In [5]:
max_length = 8
sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)
pprint(sen_len)
pprint(sen_indices)

[3, 4, 7, 5]
[[1, 7, 10, 0, 0, 0, 0, 0],
 [13, 11, 14, 5, 0, 0, 0, 0],
 [13, 11, 2, 9, 8, 4, 12, 0],
 [13, 11, 14, 6, 3, 0, 0, 0]]


### Intro : Padding

In [6]:
seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])

In [7]:
one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False) # embedding vector training 안할 것이기 때문에~
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [8]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    tmp = sess.run(seq_batch, feed_dict = {seq_indices : sen_indices})
print(np.shape(sen_indices))
print(np.shape(tmp)) # tf.nn.dynamic_rnn, tf.contrib.seq2seq.TrainingHelper 등에 이 shape을 유지하면서 전달되어야함

(4, 8)
(4, 8, 15)


In [9]:
tmp[0]

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
      dtype=float32)

## Many to One

### Many to One : Example data

In [10]:
tf.reset_default_graph()

sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
y = [[0.,1.], [0.,1.], [1.,0.], [1.,0.]]
max_length = 8

sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)

pprint(sen_len)
pprint(sen_indices)

[3, 4, 7, 5]
[[1, 7, 10, 0, 0, 0, 0, 0],
 [13, 11, 14, 5, 0, 0, 0, 0],
 [13, 11, 2, 9, 8, 4, 12, 0],
 [13, 11, 14, 6, 3, 0, 0, 0]]


### Many to One : Simple

In [11]:
max_length = 8
h_dim = 2
n_of_classes = 2

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.float32, shape = [None, 2])

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [12]:
gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
_, state = tf.nn.dynamic_rnn(cell = gru_cell, inputs = seq_batch, sequence_length = seq_len,
                             dtype = tf.float32)
pprint(_)
pprint(state)

<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 2) dtype=float32>
<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>


In [13]:
score = tf.layers.dense(inputs = state, units = n_of_classes)
ce_loss = tf.losses.softmax_cross_entropy(onehot_labels=label,
                                          logits = score)

In [14]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([_, state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))

[array([[[ 0.09504685, -0.02786257],
        [ 0.05727357, -0.08943594],
        [ 0.11590318, -0.1923188 ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]]], dtype=float32),
 array([[ 0.11590318, -0.1923188 ]], dtype=float32)]


In [15]:
pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                      label : y}))

0.6566503


In [16]:
sess.close()

### Many to One : Stacked

In [17]:
tf.reset_default_graph()

In [18]:
max_length = 8
n_of_classes = 2

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.float32, shape = [None, 2])
keep_prob = tf.placeholder(dtype = tf.float32)

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [19]:
h_dims = [2,2]
gru_cells = []
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell,
                                             output_keep_prob = keep_prob)
    gru_cells.append(gru_cell)
else:
    gru_cells = tf.contrib.rnn.MultiRNNCell(cells = gru_cells)

In [20]:
_, state = tf.nn.dynamic_rnn(cell = gru_cells, inputs = seq_batch, sequence_length = seq_len,
                             dtype = tf.float32)
pprint(_)
pprint(state)

<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 2) dtype=float32>
(<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 2) dtype=float32>)


In [21]:
score = tf.layers.dense(inputs = state[-1], units = n_of_classes)
ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)

In [22]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([_, state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],
                                         keep_prob : 1.}))

[array([[[-0.00199929, -0.01550121],
        [ 0.00384277, -0.01441574],
        [ 0.0184861 , -0.00312071],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]]], dtype=float32),
 (array([[-0.10920699, -0.25967512]], dtype=float32),
  array([[ 0.0184861 , -0.00312071]], dtype=float32))]


In [23]:
pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                      label : y, keep_prob : 1.}))

0.7151346


In [24]:
sess.close()

### Many to One : Bi-directional

In [25]:
tf.reset_default_graph()

In [26]:
max_length = 8
h_dim = 2
n_of_classes = 2

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.float32, shape = [None, 2])

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [27]:
gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)

_, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell, cell_bw = gru_bw_cell,
                                           inputs = seq_batch, sequence_length = seq_len, dtype = tf.float32)
pprint(_)
pprint(output_states)

(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose_1:0' shape=(?, 8, 2) dtype=float32>,
 <tf.Tensor 'ReverseSequence:0' shape=(?, 8, 2) dtype=float32>)
(<tf.Tensor 'bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)


In [28]:
# fw_cell의 final state와 bw_cell의 final state를 concatenate
concat_state = tf.concat(values = [output_states[0],
                                   output_states[-1]],
                         axis = 1)

In [29]:
score = tf.layers.dense(inputs = concat_state, units = n_of_classes)
ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)

In [30]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([_, concat_state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))

[(array([[[-0.06784903, -0.09155025],
        [-0.05353671, -0.08901785],
        [-0.06768034, -0.2230948 ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]]], dtype=float32),
  array([[[-0.11015528,  0.0082678 ],
        [ 0.06593657, -0.03157877],
        [ 0.04607821,  0.03097701],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]]], dtype=float32)),
 array([[-0.06768034, -0.2230948 , -0.11015528,  0.0082678 ]],
      dtype=float32)]


In [31]:
pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                      label : y}))

0.77225125


### Many to One : Stacked Bi-directional

In [32]:
tf.reset_default_graph()

In [33]:
max_length = 8
n_of_classes = 2

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.float32, shape = [None, 2])
keep_prob = tf.placeholder(dtype = tf.float32)

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [34]:
h_dims = [2,2]
gru_fw_cells, gru_bw_cells = [], []

# forward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_fw_cells.append(gru_cell)
    
# backward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_bw_cells.append(gru_cell)

In [35]:
outputs, output_state_fw, output_state_bw = \
tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw = gru_fw_cells, cells_bw = gru_bw_cells,
                                               inputs = seq_batch, sequence_length = seq_len,
                                               dtype = tf.float32)
pprint(outputs)
pprint(output_state_fw)
pprint(output_state_bw)

<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(?, 8, 4) dtype=float32>
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>)
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)


In [36]:
concat_state = tf.concat(values=[output_state_fw[-1],output_state_bw[-1]], axis = 1)

In [37]:
score = tf.layers.dense(inputs = concat_state, units = n_of_classes)
ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)

In [38]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([outputs, concat_state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],
                                                      keep_prob : 1.}))

[array([[[ 0.01751452, -0.01348923,  0.01666009,  0.01491449],
        [ 0.01305745, -0.0105529 ,  0.00292257,  0.02082924],
        [-0.00403357, -0.00203712,  0.00713446,  0.02249983],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=float32),
 array([[-0.00403357, -0.00203712,  0.01666009,  0.01491449]],
      dtype=float32)]


In [39]:
pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                      label : y, keep_prob : 1.}))

0.69006765


## Many to Many

### Many to Many : Example data

In [40]:
tf.reset_default_graph()

sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
pos = [['pronoun', 'verb', 'adjective'],
     ['noun', 'verb', 'adverb', 'adjective'],
     ['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun'],
     ['noun', 'verb', 'adverb', 'adjective', 'verb']]
max_length = 8

In [41]:
# max_len의 길이에 못미치는 문장은 <pad>로 max_len만큼 padding
def pad_seq(sequences, max_len, dic):
    seq_len, seq_indices = [], []
    for seq in sequences:
        seq_len.append(len(seq))
        seq_idx = [dic.get(char) for char in seq]
        seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] # 0 is idx of meaningless token "pad"
        seq_indices.append(seq_idx)
    return seq_len, seq_indices

In [42]:
# word dic
word_list = []
for elm in sentences:
    word_list += elm
word_list = list(set(word_list))
word_list.sort()
word_list = ['<pad>'] + word_list

word_dic = {word : idx for idx, word in enumerate(word_list)}

# pos dic
pos_list = []
for elm in pos:
    pos_list += elm
pos_list = list(set(pos_list))
pos_list.sort()
pos_list = ['<pad>'] + pos_list

pos_dic = {pos : idx for idx, pos in enumerate(pos_list)}

print(word_dic)
print(pos_dic)

{'<pad>': 0, 'I': 1, 'a': 2, 'changing': 3, 'deep': 4, 'difficult': 5, 'fast': 6, 'feel': 7, 'for': 8, 'framework': 9, 'hungry': 10, 'is': 11, 'learning': 12, 'tensorflow': 13, 'very': 14}
{'<pad>': 0, 'adjective': 1, 'adverb': 2, 'determiner': 3, 'noun': 4, 'preposition': 5, 'pronoun': 6, 'verb': 7}


In [43]:
sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)
_, pos_indices = pad_seq(sequences = pos, max_len = max_length, dic = pos_dic)

pprint(sen_len)
pprint(sen_indices)
pprint(pos_indices)

[3, 4, 7, 5]
[[1, 7, 10, 0, 0, 0, 0, 0],
 [13, 11, 14, 5, 0, 0, 0, 0],
 [13, 11, 2, 9, 8, 4, 12, 0],
 [13, 11, 14, 6, 3, 0, 0, 0]]
[[6, 7, 1, 0, 0, 0, 0, 0],
 [4, 7, 2, 1, 0, 0, 0, 0],
 [4, 7, 3, 4, 5, 1, 4, 0],
 [4, 7, 2, 1, 7, 0, 0, 0]]


### Many to Many : Simple

In [44]:
max_length = 8
h_dim = 2
n_of_classes = len(pos_dic)

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [45]:
gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = gru_cell, output_size = n_of_classes)
outputs, _ = tf.nn.dynamic_rnn(cell = score_cell, inputs = seq_batch, sequence_length = seq_len,
                             dtype = tf.float32)

pprint(outputs)
pprint(_)

<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 8) dtype=float32>
<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>


In [46]:
masking = tf.sequence_mask(lengths = sen_len,
                           maxlen = max_length, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs,
                                                targets = label,
                                                weights = masking)

In [47]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([outputs, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))

[array([[[ 0.01570132,  0.0524365 , -0.14573137, -0.12667798,
          0.00564757, -0.02671078, -0.09015815, -0.10335645],
        [ 0.0628261 ,  0.10740477, -0.25103313, -0.21593538,
         -0.01752878,  0.03181047, -0.1846405 , -0.15016457],
        [ 0.02495369,  0.07662933, -0.20985961, -0.18227278,
          0.00634775, -0.03336836, -0.13175288, -0.14701241],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=f

In [48]:
pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                           label : pos_indices}))

[array([[1., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),
 2.10219]


### Many to Many : Stacked

In [49]:
tf.reset_default_graph()

max_length = 8
n_of_classes = len(pos_dic)

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
keep_prob = tf.placeholder(dtype = tf.float32)

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [50]:
h_dims = [2,2]
gru_cells = []
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell =  tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_cells.append(gru_cell)
else:
    gru_cells = tf.contrib.rnn.MultiRNNCell(cells = gru_cells)

In [51]:
score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = gru_cells, output_size = n_of_classes)
outputs, _ = tf.nn.dynamic_rnn(cell = score_cell, inputs = seq_batch, sequence_length = seq_len,
                             dtype = tf.float32)

pprint(outputs)
pprint(_)

<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 8) dtype=float32>
(<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 2) dtype=float32>)


In [52]:
masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets = label, weights = masking)

In [53]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([outputs, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],
                                           keep_prob : 1.}))

[array([[[-0.0035896 ,  0.00190467, -0.00314977, -0.00056082,
         -0.00091539, -0.00521373,  0.00357852, -0.00100121],
        [-0.01860922,  0.00316853, -0.01515145, -0.00715531,
         -0.00680878, -0.02094646,  0.01408813, -0.00569285],
        [-0.02975762, -0.00246203, -0.02290624, -0.01621121,
         -0.01320424, -0.02666592,  0.01751654, -0.00966735],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=f

In [54]:
pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                                      label : pos_indices, keep_prob : 1.}))

[array([[1., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),
 2.0782952]


### Many to Many : Bi-directional

In [55]:
tf.reset_default_graph()

max_length = 8
h_dim = 2
n_of_classes = len(pos_dic)

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [56]:
gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)
gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)

outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell, cell_bw = gru_bw_cell,
                                           inputs = seq_batch, sequence_length = seq_len, dtype = tf.float32)
pprint(outputs)
pprint(_)

(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose_1:0' shape=(?, 8, 2) dtype=float32>,
 <tf.Tensor 'ReverseSequence:0' shape=(?, 8, 2) dtype=float32>)
(<tf.Tensor 'bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)


In [57]:
concat_outputs = tf.concat([outputs[0], outputs[1]], axis = 2)
weights = tf.get_variable(name = 'weights', shape = (concat_outputs.get_shape()[-1], n_of_classes),
                          initializer = tf.contrib.layers.xavier_initializer())
score = tf.map_fn(lambda elm : tf.matmul(elm, weights), concat_outputs)
pprint(score)

<tf.Tensor 'map/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 8, 8) dtype=float32>


In [58]:
masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = score, targets = label, weights = masking)

In [59]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run([score, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))

[array([[[ 0.16862816,  0.00404082, -0.11827   ,  0.0535416 ,
         -0.11164413,  0.0408196 ,  0.00550904, -0.16447452],
        [ 0.21764617, -0.04588123, -0.15964638,  0.21997268,
         -0.09319463,  0.08445665,  0.06755111, -0.23474823],
        [ 0.13310257, -0.08929545, -0.08857331,  0.12412636,
         -0.07805191,  0.01264486,  0.02737853, -0.13982151],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=f

In [60]:
pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                                      label : pos_indices}))

[array([[1., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),
 2.0999782]


### Many to Many : Stacked Bi-directional

In [61]:
tf.reset_default_graph()

max_length = 8
n_of_classes = len(pos_dic)

seq_len = tf.placeholder(dtype = tf.int32, shape = [None])
seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])
keep_prob = tf.placeholder(dtype = tf.float32)

one_hot = np.eye(len(word_dic)).astype(np.float32)
one_hot = tf.get_variable(name='one_hot', initializer = one_hot,
                                   trainable = False)
seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)

In [62]:
h_dims = [2,2]
gru_fw_cells, gru_bw_cells = [], []

# forward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_fw_cells.append(gru_cell)
    
# backward
for h_dim in h_dims:
    gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)
    gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)
    gru_bw_cells.append(gru_cell)

In [63]:
outputs, output_state_fw, output_state_bw = \
tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw = gru_fw_cells, cells_bw = gru_bw_cells,
                                               inputs = seq_batch, sequence_length = seq_len,
                                               dtype = tf.float32)
pprint(outputs)
pprint(output_state_fw)
pprint(output_state_bw)

<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(?, 8, 4) dtype=float32>
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>)
(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)


In [64]:
weights = tf.get_variable(name = 'weights', shape = (outputs.get_shape()[-1], n_of_classes),
                          initializer = tf.contrib.layers.xavier_initializer())
score = tf.map_fn(lambda elm : tf.matmul(elm, weights), outputs)
pprint(score)

<tf.Tensor 'map/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 8, 8) dtype=float32>


In [65]:
masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = score, targets = label, weights = masking)

In [66]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
pprint(sess.run(score, feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],
                                         keep_prob : 1.}))

array([[[ 0.02271244, -0.02384472,  0.01439292, -0.01237436,
         -0.0037775 ,  0.03624248, -0.02230978, -0.01178392],
        [ 0.02258522, -0.01905726,  0.00837328, -0.00484734,
          0.00557474,  0.0180396 ,  0.00872493,  0.00712166],
        [ 0.02025667, -0.02466974,  0.01172541, -0.01174619,
          0.00904868,  0.01962387,  0.01582363,  0.00594005],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ]]],
      dtype=fl

In [67]:
pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,
                                                      label : pos_indices, keep_prob : 1.}))

[array([[1., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),
 2.0819566]


## Sequence to Sequence

### Sequence to Sequence : Example data

In [68]:
tf.reset_default_graph()

sources = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
targets = [['나는', '배가', '고프다'],
           ['텐서플로우는', '매우', '어렵다'],
           ['텐서플로우는', '딥러닝을', '위한', '프레임워크이다'],
           ['텐서플로우는', '매우', '빠르게', '변화한다']]

In [69]:
# word dic for sentences
source_words = []
for elm in sources:
    source_words += elm
source_words = list(set(source_words))
source_words.sort()
source_words = ['<pad>'] + source_words

source_dic = {word : idx for idx, word in enumerate(source_words)}
print(source_dic)
print(len(source_dic))

{'<pad>': 0, 'I': 1, 'a': 2, 'changing': 3, 'deep': 4, 'difficult': 5, 'fast': 6, 'feel': 7, 'for': 8, 'framework': 9, 'hungry': 10, 'is': 11, 'learning': 12, 'tensorflow': 13, 'very': 14}
15


In [70]:
# word dic for translations
target_words = []
for elm in targets:
    target_words += elm
target_words = list(set(target_words))
target_words.sort()
target_words =  ['<pad>']+ ['<start>'] + ['<end>'] + \
                    target_words # 번역문의 시작과 끝을 알리는 'start', 'end' token 추가

target_dic = {word : idx for idx, word in enumerate(target_words)}
print(target_dic)
print(len(target_dic))

{'<pad>': 0, '<start>': 1, '<end>': 2, '고프다': 3, '나는': 4, '딥러닝을': 5, '매우': 6, '배가': 7, '변화한다': 8, '빠르게': 9, '어렵다': 10, '위한': 11, '텐서플로우는': 12, '프레임워크이다': 13}
14


In [71]:
def pad_seq_enc(sequences, max_len, dic):
    seq_len = []
    seq_indices = []
    for seq in sequences:
        seq_len.append(len(seq))
        seq_idx = [dic.get(word) for word in seq]
        seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] 
        seq_indices.append(seq_idx)        
    return seq_len, seq_indices

In [72]:
def pad_seq_dec(sequences, max_len, dic):
    seq_input_len = []
    seq_input_indices = []
    seq_target_indices = []
    
    # for decoder input
    for seq in sequences:
        seq_input_idx = [dic.get('<start>')] + [dic.get(token) for token in seq]
        seq_input_len.append(len(seq_input_idx))
        seq_input_idx += (max_len - len(seq_input_idx)) * [dic.get('<pad>')] 
        seq_input_indices.append(seq_input_idx)
        
    # for decoder output
    for seq in sequences:
        seq_target_idx = [dic.get(token) for token in seq] + [dic.get('<end>')]
        seq_target_idx += (max_len - len(seq_target_idx)) * [dic.get('<pad>')]
        seq_target_indices.append(seq_target_idx)
        
    return seq_input_len, seq_input_indices, seq_target_indices

In [73]:
# for encoder
source_max_len = 10
X_length, X_indices = pad_seq_enc(sequences = sources, max_len = source_max_len, dic = source_dic)
print(X_length, np.shape(X_indices))

[3, 4, 7, 5] (4, 10)


In [74]:
# for decoder
target_max_len = 12
y_length, y_input_indices, y_target_indices = pad_seq_dec(sequences = targets, max_len = target_max_len,
                                                             dic = target_dic)
pprint(y_length)
pprint(y_input_indices)
pprint(y_target_indices)

[4, 4, 5, 5]
[[1, 4, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 12, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 12, 5, 11, 13, 0, 0, 0, 0, 0, 0, 0],
 [1, 12, 6, 9, 8, 0, 0, 0, 0, 0, 0, 0]]
[[4, 7, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0],
 [12, 6, 10, 2, 0, 0, 0, 0, 0, 0, 0, 0],
 [12, 5, 11, 13, 2, 0, 0, 0, 0, 0, 0, 0],
 [12, 6, 9, 8, 2, 0, 0, 0, 0, 0, 0, 0]]


In [75]:
s_len = tf.placeholder(dtype = tf.int32, shape = [None])
s_indices = tf.placeholder(dtype = tf.int32, shape = [None, source_max_len])
t_len = tf.placeholder(dtype = tf.int32, shape = [None])
t_input_indices = tf.placeholder(dtype = tf.int32, shape = [None, target_max_len])
t_output_indices = tf.placeholder(dtype = tf.int32, shape = [None, target_max_len])

In [76]:
s_embedding = tf.eye(num_rows = len(source_dic), dtype = tf.float32)
s_embedding = tf.get_variable(name = 's_embedding', initializer = s_embedding)
s_batch = tf.nn.embedding_lookup(params = s_embedding, ids = s_indices)

enc_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)
_, enc_state = tf.nn.dynamic_rnn(cell = enc_cell, inputs = s_batch, sequence_length = s_len, dtype = tf.float32)

In [77]:
t_embedding = tf.eye(num_rows = len(target_dic), dtype = tf.float32)
t_embedding = tf.get_variable(name = 't_embedding', initializer = t_embedding)
t_batch = tf.nn.embedding_lookup(params = t_embedding, ids = t_input_indices)

tokens = tf.ones_like(tensor = s_len, dtype = tf.int32)
tr_tokens = tf.map_fn(lambda elm : tf.multiply(elm, target_max_len), tokens, dtype = tf.int32)
start_tokens = tokens

tr_helper = tf.contrib.seq2seq.TrainingHelper(inputs = t_batch, sequence_length = tr_tokens)
dec_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)
score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = dec_cell, output_size = len(target_dic))
tr_decoder = tf.contrib.seq2seq.BasicDecoder(cell = score_cell, initial_state = enc_state, helper = tr_helper)

In [78]:
tr_outputs,_,_= tf.contrib.seq2seq.dynamic_decode(decoder = tr_decoder, impute_finished = True,
                                                  maximum_iterations = target_max_len)

In [79]:
masking = tf.sequence_mask(lengths = t_len, maxlen = target_max_len, dtype = tf.float32)
seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = tr_outputs.rnn_output,
                                                targets = t_output_indices, weights = masking)

In [80]:
trans_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding = t_embedding,
                                                        start_tokens = start_tokens,
                                                        end_token = target_dic.get('<end>'))
trans_decoder = tf.contrib.seq2seq.BasicDecoder(cell = score_cell, initial_state = enc_state,
                                                helper = trans_helper)
trans_outputs,_,_ = tf.contrib.seq2seq.dynamic_decode(decoder = trans_decoder, impute_finished = True,
                                                      maximum_iterations = target_max_len * 2)

In [81]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
logits, masks = sess.run([tr_outputs.rnn_output,masking], feed_dict = {s_len : [X_length[0]],
                                                         s_indices : [X_indices[0]],
                                                         t_len : [y_length[0]],
                                                         t_input_indices : [y_input_indices[0]]})
loss = sess.run(seq2seq_loss, feed_dict = {s_len : [X_length[0]],
                                           s_indices : [X_indices[0]],
                                           t_len : [y_length[0]],
                                           t_input_indices : [y_input_indices[0]],
                                           t_output_indices : [y_target_indices[0]]})

In [82]:
translations = sess.run(trans_outputs.sample_id, feed_dict = {s_len : [X_length[0]],
                                               s_indices : [X_indices[0]]})

In [83]:
pprint(logits)

array([[[-0.00770796, -0.01160654,  0.06141101, -0.00119117,
          0.00151106, -0.00418233,  0.04408944, -0.01907253,
          0.06191742,  0.00312654, -0.00581186, -0.02721635,
          0.05599122, -0.03012921],
        [-0.06202612, -0.00614491,  0.06214031, -0.06277049,
          0.02558279, -0.01664988,  0.02347147, -0.03689051,
          0.06652129,  0.04653463,  0.04533478, -0.00508038,
          0.05785662,  0.00203341],
        [-0.09766634, -0.01459097,  0.12218379, -0.09584236,
          0.0395265 , -0.02717485,  0.05562212, -0.06465542,
          0.12906486,  0.0720693 ,  0.06619576, -0.02005081,
          0.11322317, -0.0105706 ],
        [-0.05630375, -0.0053757 ,  0.0554057 , -0.05710275,
          0.02325371, -0.01507437,  0.02053787, -0.03321677,
          0.05938321,  0.04229102,  0.04136585, -0.00411567,
          0.0516084 ,  0.00241266],
        [-0.09801295, -0.01543377,  0.12653415, -0.09570034,
          0.03954508, -0.02742546,  0.05882316, -0.06594183,
  

In [84]:
pprint(masks)
pprint(loss)

array([[1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)
2.6709042
