- many to many classification with tf.contrib.seq2seq.sequence_loss
- Masking unvalid token with tf.sequence_mask


In [1]:
import os, sys
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import string
%matplotlib inline

slim = tf.contrib.slim

Instructions for updating:
Use the retry module or similar alternatives.


In [2]:
sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
pos = [['pronoun', 'verb', 'adjective'],
     ['noun', 'verb', 'adverb', 'adjective'],
     ['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun'],
     ['noun', 'verb', 'adverb', 'adjective', 'verb']]

In [3]:
word_list = []
for elm in sentences:
    word_list += elm
    
word_list = list(set(word_list))
word_list.sort()
word_list = ['<pad>'] + word_list

word_dic = {word: idx for idx, word in enumerate(word_list)}
print(word_dic)

{'deep': 4, 'changing': 3, 'is': 11, 'very': 14, 'difficult': 5, 'learning': 12, 'feel': 7, 'a': 2, 'framework': 9, 'tensorflow': 13, '<pad>': 0, 'hungry': 10, 'fast': 6, 'I': 1, 'for': 8}


In [4]:
len(word_dic)

15

In [5]:
pos_list = []
for elm in pos:
    pos_list += elm
pos_list = list(set(pos_list))
pos_list.sort()
pos_list = ['<pad>']+pos_list
print(pos_list)

pos_dic = {pos : idx for idx, pos in enumerate(pos_list)}
pos_dic

['<pad>', 'adjective', 'adverb', 'determiner', 'noun', 'preposition', 'pronoun', 'verb']


{'<pad>': 0,
 'adjective': 1,
 'adverb': 2,
 'determiner': 3,
 'noun': 4,
 'preposition': 5,
 'pronoun': 6,
 'verb': 7}

In [6]:
pos_idx_to_dic = {elm[1]: elm[0] for elm in pos_dic.items()}
pos_idx_to_dic

{0: '<pad>',
 1: 'adjective',
 2: 'adverb',
 3: 'determiner',
 4: 'noun',
 5: 'preposition',
 6: 'pronoun',
 7: 'verb'}

In [7]:
def pad_seq(sequences, max_len, dic):
    seq_len, seq_indices = [], []
    # 여기서 seq는 한 문장
    for seq in sequences:
        seq_len.append(len(seq))
        seq_idx = [dic.get(word) for word in seq]
        seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')]
        seq_indices.append(seq_idx)
    return seq_len, seq_indices

In [8]:
max_length = 10
X_length, X_indices = pad_seq(sequences=sentences, max_len=max_length, dic=word_dic)
print(X_length, np.shape(X_indices))

[3, 4, 7, 5] (4, 10)


In [9]:
# target의 padding 처리
# max_length-len(elm)의 갯수만큼 special symbol, predefined token word <pad>를 붙힌다.
y = [elm + ['<pad>'] * (max_length-len(elm)) for elm in pos]
y

[['pronoun',
  'verb',
  'adjective',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>'],
 ['noun',
  'verb',
  'adverb',
  'adjective',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>'],
 ['noun',
  'verb',
  'determiner',
  'noun',
  'preposition',
  'adjective',
  'noun',
  '<pad>',
  '<pad>',
  '<pad>'],
 ['noun',
  'verb',
  'adverb',
  'adjective',
  'verb',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>']]

In [10]:
y = [list(map(lambda el : pos_dic.get(el), elm)) for elm in y]
print(np.shape(y))

(4, 10)


In [11]:
y

[[6, 7, 1, 0, 0, 0, 0, 0, 0, 0],
 [4, 7, 2, 1, 0, 0, 0, 0, 0, 0],
 [4, 7, 3, 4, 5, 1, 4, 0, 0, 0],
 [4, 7, 2, 1, 7, 0, 0, 0, 0, 0]]

In [22]:
y_one_hot = tf.one_hot(y, 8)

In [23]:
y_one_hot

<tf.Tensor 'one_hot:0' shape=(4, 10, 8) dtype=float32>

In [12]:
class SimPosRNN:
    def __init__(self, X_length, X_indices, y, n_of_classes, hidden_dim, max_len, word_dic):
        
        with tf.variable_scope('rnn_input'):
            self._X_length = X_length
            self._X_indices = X_indices
            self._y = y
            
            one_hot = tf.eye(len(word_dic), dtype=tf.float32)
            self._one_hot = tf.get_variable(name='one_hot_embedding', initializer=one_hot,
                                           trainable=False)
            self._X_batch = tf.nn.embedding_lookup(params=self._one_hot, ids=self._X_indices)
            
        with tf.variable_scope('rnn_cell'):
            rnn_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=hidden_dim, activation=tf.nn.tanh)
            self.score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell=rnn_cell, output_size=n_of_classes)
            self._outputs, self.state = tf.nn.dynamic_rnn(cell=self.score_cell,
                                                        inputs=self._X_batch,
                                                        sequence_length=self._X_length,
                                                        dtype=tf.float32)
            
        with tf.variable_scope('rnn_seq2seq_loss'):
            self.masks = tf.sequence_mask(lengths=self._X_length, maxlen=max_len, dtype=tf.float32)
            self.seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits=self._outputs, 
                                                                targets=self._y,
                                                                weights=self.masks)
            
        with tf.variable_scope('rnn_prediction'):
            self._prediction = tf.argmax(input=self._outputs, axis=2, output_type=tf.int32)
            
    def predict(self, sess, X_length, X_indices):
        feed_prediction={self._X_length:X_length, self._X_indices:X_indices}
        return sess.run(self._prediction, feed_dict=feed_prediction)

```python
pos_rnn.score_cell

<tensorflow.contrib.rnn.python.ops.core_rnn_cell.OutputProjectionWrapper at 0x29afd699eb8>

pos_rnn._outputs
<tf.Tensor 'rnn_cell/rnn/transpose_1:0' shape=(?, 10, 8) dtype=float32>

pos_rnn.state
<tf.Tensor 'rnn_cell/rnn/while/Exit_3:0' shape=(?, 16) dtype=float32>

pos_rnn.masks
<tf.Tensor 'rnn_seq2seq_loss/SequenceMask/Cast_1:0' shape=(?, 10) dtype=float32>
```

In [13]:
class SimPosLSTM:
    def __init__(self, X_length, X_indices, y, n_of_classes, hidden_dim, max_len, word_dic):
        
        with tf.variable_scope('lstm_input'):
            self._X_length = X_length
            self._X_indices = X_indices
            self._y = y
            
            one_hot = tf.eye(len(word_dic), dtype=tf.float32)
            self._one_hot = tf.get_variable(name='one_hot_embedding', initializer=one_hot,
                                           trainable=False)
            self._X_batch = tf.nn.embedding_lookup(params=self._one_hot,ids=self._X_indices)
            
        with tf.variable_scope('lstm_cell'):
            lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim,
                                                    activation=tf.nn.tanh)
            score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell=lstm_cell, output_size=n_of_classes)
            self._outputs, self.states = tf.nn.dynamic_rnn(cell=score_cell,
                                                          inputs=self._X_batch,
                                                          sequence_length=self._X_length,
                                                          dtype=tf.float32)
            
        with tf.variable_scope('lstm_seq2seq_loss'):
            masks = tf.sequence_mask(lengths=self._X_length,maxlen=max_len,dtype=tf.float32)
            self.seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits=self._outputs,
                                                                targets=self._y,
                                                                weights=masks)
        with tf.variable_scope('lstm_prediction'):
            self._prediction = tf.argmax(input=self._outputs, axis=2, output_type=tf.int32)
            
    def predict(self, sess, X_length, X_indices):
        feed_prediction={self._X_length:X_length, self._X_indices:X_indices}
        return sess.run(self._prediction, feed_dict=feed_prediction)

```python
pos_rnn._X_batch
<tf.Tensor 'lstm_input/embedding_lookup:0' shape=(?, 10, 15) dtype=float32>

pos_rnn._outputs
<tf.Tensor 'lstm_cell/rnn/transpose_1:0' shape=(?, 10, 8) dtype=float32>

pos_rnn.states
LSTMStateTuple(c=<tf.Tensor 'lstm_cell/rnn/while/Exit_3:0' shape=(?, 16) dtype=float32>, h=<tf.Tensor 'lstm_cell/rnn/while/Exit_4:0' shape=(?, 16) dtype=float32>)
```

In [24]:
class SimPosGRU:
    def __init__(self, X_length, X_indices, y, n_of_classes, hidden_dim, max_len, word_dic):
        
        with tf.variable_scope('gru_input'):
            self._X_length = X_length
            self._X_indices = X_indices
            self._y = y
            
            one_hot = tf.eye(len(word_dic), dtype=tf.float32)
            self._one_hot = tf.get_variable(name='one_hot_embedding', initializer=one_hot,
                                           trainable=False)
            self._X_batch = tf.nn.embedding_lookup(params=self._one_hot, ids=self._X_indices)
            
        with tf.variable_scope('gru_cell'):
            gru_cell = tf.nn.rnn_cell.GRUCell(num_units=hidden_dim, activation=tf.nn.tanh)
            score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell=gru_cell,
                                                               output_size=n_of_classes)
            self._outputs, self.states = tf.nn.dynamic_rnn(cell=score_cell, inputs=self._X_batch,
                                                          sequence_length=self._X_length,
                                                          dtype=tf.float32)
            
        with tf.variable_scope('gru_seq2seq_loss'):
            masks = tf.sequence_mask(lengths=self._X_length, maxlen=max_len, dtype=tf.float32)
            self.seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits=self._outputs,
                                                                targets=self._y,
                                                                weights=masks)
            
        with tf.variable_scope('gru_prediction'):
            self._prediction = tf.argmax(input=self._outputs, axis=2, output_type=tf.int32)
            
    def predict(self, sess, X_length, X_indices):
        feed_prediction={self._X_length:X_length, self._X_indices:X_indices}
        return sess.run(self._prediction, feed_dict=feed_prediction)

```python
pos_rnn._X_batch
<tf.Tensor 'gru_input/embedding_lookup:0' shape=(?, 10, 15) dtype=float32>

pos_rnn._outputs
<tf.Tensor 'gru_cell/rnn/transpose_1:0' shape=(?, 10, 8) dtype=float32>

pos_rnn.states
<tf.Tensor 'gru_cell/rnn/while/Exit_3:0' shape=(?, 16) dtype=float32>
```

In [14]:
lr = 0.003
epochs = 100
batch_size = 2
total_step = int(np.shape(X_indices)[0]/batch_size)
print(total_step)

2


In [15]:
tr_dataset = tf.data.Dataset.from_tensor_slices((X_length, X_indices, y))

In [16]:
tr_dataset = tr_dataset.shuffle(buffer_size=20)
tr_dataset = tr_dataset.batch(batch_size=batch_size)
tr_iterator = tr_dataset.make_initializable_iterator()
print(tr_iterator)

<tensorflow.python.data.ops.iterator_ops.Iterator object at 0x000001E821FA8860>


In [17]:
X_length_mb, X_indices_mb, y_mb = tr_iterator.get_next()

In [25]:
# pos_rnn = SimPosRNN(X_length=X_length_mb, X_indices=X_indices_mb, y=y_mb,
#                    n_of_classes=8, hidden_dim=16, max_len=max_length, word_dic=word_dic)

# pos_rnn = SimPosLSTM(X_length = X_length_mb, X_indices = X_indices_mb, y = y_mb,
#                           n_of_classes = 8, hidden_dim = 16, max_len = max_length, word_dic = word_dic)


pos_rnn = SimPosGRU(X_length=X_length_mb, X_indices=X_indices_mb, y=y_mb,
                   n_of_classes=8, hidden_dim=16, max_len=max_length, word_dic=word_dic)

In [23]:
opt = tf.train.AdamOptimizer(learning_rate=lr)
training_op = opt.minimize(loss=pos_rnn.seq2seq_loss)

In [24]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

tr_loss_hist = []

for epoch in range(epochs):
    avg_tr_loss = 0
    tr_step = 0
    
    sess.run(tr_iterator.initializer)
    try:
        while True:
            _, tr_loss = sess.run(fetches = [training_op, pos_rnn.seq2seq_loss])
            avg_tr_loss += tr_loss
            tr_step += 1
            
    except tf.errors.OutOfRangeError:
        pass
    
    avg_tr_loss /= tr_step
    tr_loss_hist.append(avg_tr_loss)
    if (epoch + 1) % 10 == 0:
        print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch + 1, avg_tr_loss))

epoch :  10, tr_loss : 1.580
epoch :  20, tr_loss : 1.084
epoch :  30, tr_loss : 0.687
epoch :  40, tr_loss : 0.474
epoch :  50, tr_loss : 0.341
epoch :  60, tr_loss : 0.246
epoch :  70, tr_loss : 0.177
epoch :  80, tr_loss : 0.133
epoch :  90, tr_loss : 0.102
epoch : 100, tr_loss : 0.081


In [25]:
yhat = pos_rnn.predict(sess, X_length, X_indices)
yhat

array([[6, 7, 1, 0, 0, 0, 0, 0, 0, 0],
       [4, 7, 2, 1, 0, 0, 0, 0, 0, 0],
       [4, 7, 3, 4, 5, 1, 4, 0, 0, 0],
       [4, 7, 2, 1, 7, 0, 0, 0, 0, 0]])

In [26]:
yhat = [list(map(lambda elm: pos_idx_to_dic.get(elm), row)) for row in yhat]

In [29]:
for elm in yhat:
    print(elm)

['pronoun', 'verb', 'adjective', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['noun', 'verb', 'adverb', 'adjective', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun', '<pad>', '<pad>', '<pad>']
['noun', 'verb', 'adverb', 'adjective', 'verb', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
