In [1]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import classification_report

  from ._conv import register_converters as _register_converters


In [2]:
word2idx = {'<pad>': 0}
tag2idx = {'<pad>': 0}
word_idx = 1
tag_idx = 1
x_train = []
y_train = []
x_test = []
y_test = []

for line in open('pos_train.txt'):
    line = line.rstrip()
    if line:
        word, tag, _ = line.split()
        if word not in word2idx:
            word2idx[word] = word_idx
            word_idx += 1
        x_train.append(word2idx[word])
        if tag not in tag2idx:
            tag2idx[tag] = tag_idx
            tag_idx += 1
        y_train.append(tag2idx[tag])
        
word2idx['<unknown>'] = word_idx

for line in open('pos_test.txt'):
    line = line.rstrip()
    if line:
        word, tag, _ = line.split()
        if word in word2idx:
            x_test.append(word2idx[word])
        else:
            x_test.append(word_idx)
        y_test.append(tag2idx[tag])

In [3]:
params = {
    'seq_len': 20,
    'batch_size': 128,
    'hidden_dim': 128,
    'clip_norm': 5.0,
    'text_iter_step': 1,
    'lr': {'start': 5e-3, 'end': 5e-4},
    'n_epoch': 1,
    'display_step': 50,
    'vocab_size': len(word2idx),
    'n_class':tag_idx
}

In [4]:
def iter_seq(x):
    return np.array([x[i: i+params['seq_len']] for i in range(0, len(x)-params['seq_len'], params['text_iter_step'])])

def to_train_seq(*args):
    return [iter_seq(x) for x in args]

def to_test_seq(*args):
    return [np.reshape(x[:(len(x)-len(x)%params['seq_len'])],
        [-1,params['seq_len']]) for x in args]

In [5]:
X_train, Y_train = to_train_seq(x_train, y_train)
X_test, Y_test = to_test_seq(x_test, y_test)
params['lr']['steps'] = len(X_train) // params['batch_size']

In [8]:
class Model:
    def __init__(self):
        self.X = tf.placeholder(tf.int32, [None, params['seq_len']])
        self.Y = tf.placeholder(tf.int32, [None, params['seq_len']])
        
        def rnn_cell():
            return tf.nn.rnn_cell.LSTMCell(params['hidden_dim'],
                                          initializer=tf.orthogonal_initializer())
        
        def clip_grads(loss):
            variables = tf.trainable_variables()
            grads = tf.gradients(loss, variables)
            clipped_grads, _ = tf.clip_by_global_norm(grads, params['clip_norm'])
            return zip(clipped_grads, variables)
        
        self.embedding = tf.Variable(tf.truncated_normal([params['vocab_size'], params['hidden_dim']],
                                                      stddev=1.0 / np.sqrt(params['hidden_dim'])))
        embedded = tf.nn.embedding_lookup(self.embedding, self.X)
        embedded = tf.nn.dropout(embedded,0.1)
        bi_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
            rnn_cell(), rnn_cell(), embedded, dtype=tf.float32)
        x = tf.concat(bi_outputs, -1)
        self.logits = tf.layers.dense(x, params['n_class'])
        log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(
        self.logits, self.Y, tf.count_nonzero(self.X, 1))
        self.cost = tf.reduce_mean(-log_likelihood)
        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = tf.train.exponential_decay(params['lr']['start'],
                                                        self.global_step, params['lr']['steps'],
                                                        params['lr']['end']/params['lr']['start'])
        self.optimizer = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(clip_grads(self.cost), 
                                                                                    global_step=self.global_step)
        self.crf_decode = tf.contrib.crf.crf_decode(self.logits, 
                                                    trans_params, 
                                                    tf.count_nonzero(self.X, 1))[0]

In [9]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model()
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [10]:
for i in range(params['n_epoch']):
    total_cost = 0
    for k in range(0,(X_train.shape[0] // params['batch_size'])*params['batch_size'],params['batch_size']):
        batch_x = X_train[k:k+params['batch_size']]
        batch_y = Y_train[k:k+params['batch_size']]
        step, loss, _ = sess.run([model.global_step, model.cost, model.optimizer],
                                feed_dict={model.X:batch_x, model.Y:batch_y})
        if step % params['display_step'] == 0 or step == 1:
            print('epoch %d, step %d, loss %f'%(i+1,step,loss))
        total_cost += loss
    total_cost /= ((X_train.shape[0] // params['batch_size']))
    print('epoch %d, avg loss %f'%(i+1,total_cost))

epoch 1, step 1, loss 75.657814
epoch 1, step 50, loss 28.158131
epoch 1, step 100, loss 18.738087
epoch 1, step 150, loss 15.287540
epoch 1, step 200, loss 16.954556
epoch 1, step 250, loss 10.207080
epoch 1, step 300, loss 9.150373
epoch 1, step 350, loss 12.479515
epoch 1, step 400, loss 9.170988
epoch 1, step 450, loss 8.027289
epoch 1, step 500, loss 8.819963
epoch 1, step 550, loss 7.745811
epoch 1, step 600, loss 9.328333
epoch 1, step 650, loss 7.119045
epoch 1, step 700, loss 4.055964
epoch 1, step 750, loss 3.544483
epoch 1, step 800, loss 7.802958
epoch 1, step 850, loss 7.132889
epoch 1, step 900, loss 7.253132
epoch 1, step 950, loss 7.442185
epoch 1, step 1000, loss 1.702451
epoch 1, step 1050, loss 6.454239
epoch 1, step 1100, loss 4.067487
epoch 1, step 1150, loss 5.580586
epoch 1, step 1200, loss 5.869375
epoch 1, step 1250, loss 4.460003
epoch 1, step 1300, loss 7.007497
epoch 1, step 1350, loss 3.235937
epoch 1, step 1400, loss 4.276751
epoch 1, step 1450, loss 2.533

In [11]:
Y_pred = sess.run(model.crf_decode,feed_dict={model.X:X_test})
print(classification_report(Y_test.ravel(), Y_pred.ravel(), target_names=tag2idx.keys()))

             precision    recall  f1-score   support

        NNP       0.88      0.89      0.89      6639
          #       0.97      0.99      0.98      5070
         WP       0.99      0.99      0.99      4020
        VBG       0.93      0.83      0.88       912
         RB       0.90      0.82      0.86      1354
        RBS       0.80      0.72      0.76      1103
          .       0.99      0.99      0.99      1177
         JJ       0.90      0.88      0.89      1269
         ''       0.82      0.84      0.83      2962
        VBN       0.88      0.85      0.86      3034
       PRP$       0.81      0.89      0.85      4803
         MD       1.00      0.99      1.00      2389
        VBZ       0.99      0.98      0.99      1214
         NN       0.96      0.96      0.96       433
        WP$       1.00      0.99      0.99      1974
        PDT       0.84      0.81      0.82       539
        POS       0.75      0.69      0.72       727
         FW       0.99      0.96      0.97   

  .format(len(labels), len(target_names))
  'precision', 'predicted', average, warn_for)
