In [1]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import classification_report

  from ._conv import register_converters as _register_converters


In [2]:
word2idx = {'<pad>': 0}
tag2idx = {'<pad>': 0}
word_idx = 1
tag_idx = 1
x_train = []
y_train = []
x_test = []
y_test = []

for line in open('pos_train.txt'):
    line = line.rstrip()
    if line:
        word, tag, _ = line.split()
        if word not in word2idx:
            word2idx[word] = word_idx
            word_idx += 1
        x_train.append(word2idx[word])
        if tag not in tag2idx:
            tag2idx[tag] = tag_idx
            tag_idx += 1
        y_train.append(tag2idx[tag])
        
word2idx['<unknown>'] = word_idx

for line in open('pos_test.txt'):
    line = line.rstrip()
    if line:
        word, tag, _ = line.split()
        if word in word2idx:
            x_test.append(word2idx[word])
        else:
            x_test.append(word_idx)
        y_test.append(tag2idx[tag])

In [3]:
params = {
    'seq_len': 20,
    'batch_size': 128,
    'hidden_dim': 128,
    'clip_norm': 5.0,
    'text_iter_step': 1,
    'lr': {'start': 5e-3, 'end': 5e-4},
    'n_epoch': 1,
    'display_step': 50,
    'vocab_size': len(word2idx),
    'n_class':tag_idx,
    'kernel_sizes': [3, 5],
}

In [4]:
def iter_seq(x):
    return np.array([x[i: i+params['seq_len']] for i in range(0, len(x)-params['seq_len'], params['text_iter_step'])])

def to_train_seq(*args):
    return [iter_seq(x) for x in args]

def to_test_seq(*args):
    return [np.reshape(x[:(len(x)-len(x)%params['seq_len'])],
        [-1,params['seq_len']]) for x in args]

In [5]:
X_train, Y_train = to_train_seq(x_train, y_train)
X_test, Y_test = to_test_seq(x_test, y_test)
params['lr']['steps'] = len(X_train) // params['batch_size']

In [10]:
class Model:
    def __init__(self):
        self.X = tf.placeholder(tf.int32, [None, params['seq_len']])
        self.Y = tf.placeholder(tf.int32, [None, params['seq_len']])
        
        def rnn_cell():
            return tf.nn.rnn_cell.GRUCell(params['hidden_dim'],
                                          kernel_initializer=tf.orthogonal_initializer())
        
        def clip_grads(loss):
            variables = tf.trainable_variables()
            grads = tf.gradients(loss, variables)
            clipped_grads, _ = tf.clip_by_global_norm(grads, params['clip_norm'])
            return zip(clipped_grads, variables)
        
        self.embedding = tf.Variable(tf.truncated_normal([params['vocab_size'], params['hidden_dim']],
                                                      stddev=1.0 / np.sqrt(params['hidden_dim'])))
        embedded = tf.nn.embedding_lookup(self.embedding, self.X)
        embedded = tf.nn.dropout(embedded,0.1)
        
        pad = tf.zeros([tf.shape(embedded)[0], 1, params['hidden_dim']])
        for kernel in params['kernel_sizes']:
            n = (kernel - 1) // 2
            _embedded = tf.concat([pad]*n + [embedded] + [pad]*n, 1)
            embedded += tf.layers.conv1d(_embedded, params['hidden_dim'], kernel, activation=tf.nn.relu)
        
        self.logits = tf.layers.dense(embedded, params['n_class'])
        log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(
        self.logits, self.Y, tf.count_nonzero(self.X, 1))
        self.cost = tf.reduce_mean(-log_likelihood)
        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = tf.train.exponential_decay(params['lr']['start'],
                                                        self.global_step, params['lr']['steps'],
                                                        params['lr']['end']/params['lr']['start'])
        self.optimizer = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(clip_grads(self.cost), 
                                                                                    global_step=self.global_step)
        self.crf_decode = tf.contrib.crf.crf_decode(self.logits, 
                                                    trans_params, 
                                                    tf.count_nonzero(self.X, 1))[0]

In [11]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model()
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [12]:
for i in range(params['n_epoch']):
    total_cost = 0
    for k in range(0,(X_train.shape[0] // params['batch_size'])*params['batch_size'],params['batch_size']):
        batch_x = X_train[k:k+params['batch_size']]
        batch_y = Y_train[k:k+params['batch_size']]
        step, loss, _ = sess.run([model.global_step, model.cost, model.optimizer],
                                feed_dict={model.X:batch_x, model.Y:batch_y})
        if step % params['display_step'] == 0 or step == 1:
            print('epoch %d, step %d, loss %f'%(i+1,step,loss))
        total_cost += loss
    total_cost /= ((X_train.shape[0] // params['batch_size']))
    print('epoch %d, avg loss %f'%(i+1,total_cost))

epoch 1, step 1, loss 79.439484
epoch 1, step 50, loss 25.554974
epoch 1, step 100, loss 18.444744
epoch 1, step 150, loss 15.207127
epoch 1, step 200, loss 17.540133
epoch 1, step 250, loss 10.940320
epoch 1, step 300, loss 10.688293
epoch 1, step 350, loss 13.418294
epoch 1, step 400, loss 10.662128
epoch 1, step 450, loss 9.231688
epoch 1, step 500, loss 10.172064
epoch 1, step 550, loss 8.602699
epoch 1, step 600, loss 10.297527
epoch 1, step 650, loss 7.816038
epoch 1, step 700, loss 4.592223
epoch 1, step 750, loss 4.128824
epoch 1, step 800, loss 7.665007
epoch 1, step 850, loss 7.643055
epoch 1, step 900, loss 7.236738
epoch 1, step 950, loss 6.907567
epoch 1, step 1000, loss 1.634830
epoch 1, step 1050, loss 6.755506
epoch 1, step 1100, loss 4.212307
epoch 1, step 1150, loss 5.266168
epoch 1, step 1200, loss 5.731796
epoch 1, step 1250, loss 4.186990
epoch 1, step 1300, loss 7.574566
epoch 1, step 1350, loss 3.262103
epoch 1, step 1400, loss 4.344210
epoch 1, step 1450, loss 2

In [13]:
Y_pred = sess.run(model.crf_decode,feed_dict={model.X:X_test})
print(classification_report(Y_test.ravel(), Y_pred.ravel(), target_names=tag2idx.keys()))

             precision    recall  f1-score   support

        VBZ       0.88      0.89      0.89      6639
         ``       0.97      0.98      0.98      5070
        JJR       0.99      0.99      0.99      4020
         VB       0.88      0.86      0.87       912
        PRP       0.90      0.80      0.85      1354
        PDT       0.83      0.69      0.76      1103
        VBG       0.99      0.98      0.99      1177
        JJS       0.91      0.85      0.88      1269
        VBD       0.79      0.83      0.81      2962
         ''       0.89      0.83      0.86      3034
         NN       0.81      0.90      0.85      4803
          $       0.99      0.99      0.99      2389
        POS       0.99      0.97      0.98      1214
          )       0.96      0.95      0.96       433
         DT       0.99      0.99      0.99      1974
          (       0.86      0.84      0.85       539
         UH       0.80      0.67      0.73       727
        SYM       0.98      0.95      0.97   

  .format(len(labels), len(target_names))
  'precision', 'predicted', average, warn_for)
