In [1]:
import tensorflow as tf
from tqdm import tqdm
import numpy as np

In [2]:
with open('test.conll.txt') as fopen:
    corpus = fopen.read().split('\n')
    
with open('dev.conll.txt') as fopen:
    corpus_test = fopen.read().split('\n')

In [3]:
word2idx = {'PAD': 0,'NUM':1,'UNK':2}
tag2idx = {'PAD': 0}
char2idx = {'PAD': 0,'NUM':1,'UNK':2}
word_idx = 3
tag_idx = 1
char_idx = 3

def process_corpus(corpus, until = None):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    sentences, words, depends, labels = [], [], [], []
    temp_sentence, temp_word, temp_depend, temp_label = [], [], [], []
    for sentence in corpus:
        if len(sentence):
            sentence = sentence.split('\t')
            for c in sentence[1]:
                if c not in char2idx:
                    char2idx[c] = char_idx
                    char_idx += 1
            if sentence[7] not in tag2idx:
                tag2idx[sentence[7]] = tag_idx
                tag_idx += 1
            if sentence[1] not in word2idx:
                word2idx[sentence[1]] = word_idx
                word_idx += 1
            temp_word.append(word2idx[sentence[1]])
            temp_depend.append(int(sentence[6]))
            temp_label.append(tag2idx[sentence[7]])
            temp_sentence.append(sentence[1])
        else:
            words.append(temp_word)
            depends.append(temp_depend)
            labels.append(temp_label)
            sentences.append(temp_sentence)
            temp_word = []
            temp_depend = []
            temp_label = []
            temp_sentence = []
    return sentences[:-1], words[:-1], depends[:-1], labels[:-1]
        
sentences, words, depends, labels = process_corpus(corpus)
sentences_test, words_test, depends_test, labels_test = process_corpus(corpus_test)

In [4]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [5]:
words = pad_sequences(words,padding='post')
depends = pad_sequences(depends,padding='post')
labels = pad_sequences(labels,padding='post')

words_test = pad_sequences(words_test,padding='post')
depends_test = pad_sequences(depends_test,padding='post')
labels_test = pad_sequences(labels_test,padding='post')

In [6]:
words_test.shape

(1700, 118)

In [7]:
def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

In [8]:
idx2word = {idx: tag for tag, idx in word2idx.items()}
idx2tag = {i: w for w, i in tag2idx.items()}

train_X = words
train_Y = labels
train_depends = depends
train_char = generate_char_seq(sentences)

test_X = words_test
test_Y = labels_test
test_depends = depends_test
test_char = generate_char_seq(sentences_test)

In [9]:
class Model:
    def __init__(
        self,
        dim_word,
        dim_char,
        dropout,
        learning_rate,
        hidden_size_char,
        hidden_size_word,
        num_layers,
        maxlen
    ):
        def cells(size, reuse = False):
            return tf.contrib.rnn.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(
                    size,
                    initializer = tf.orthogonal_initializer(),
                    reuse = reuse,
                ),
                output_keep_prob = dropout,
            )

        def bahdanau(embedded, size):
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                num_units = hidden_size_word, memory = embedded
            )
            return tf.contrib.seq2seq.AttentionWrapper(
                cell = cells(hidden_size_word),
                attention_mechanism = attention_mechanism,
                attention_layer_size = hidden_size_word,
            )

        self.word_ids = tf.placeholder(tf.int32, shape = [None, None])
        self.char_ids = tf.placeholder(tf.int32, shape = [None, None, None])
        self.labels = tf.placeholder(tf.int32, shape = [None, None])
        self.depends = tf.placeholder(tf.int32, shape = [None, None])
        self.maxlen = tf.shape(self.word_ids)[1]
        self.lengths = tf.count_nonzero(self.word_ids, 1)

        self.word_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(word2idx), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        self.char_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(char2idx), dim_char], stddev = 1.0 / np.sqrt(dim_char)
            )
        )

        word_embedded = tf.nn.embedding_lookup(
            self.word_embeddings, self.word_ids
        )
        char_embedded = tf.nn.embedding_lookup(
            self.char_embeddings, self.char_ids
        )
        s = tf.shape(char_embedded)
        char_embedded = tf.reshape(
            char_embedded, shape = [s[0] * s[1], s[-2], dim_char]
        )

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_char),
                cell_bw = cells(hidden_size_char),
                inputs = char_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_char_%d' % (n),
            )
            char_embedded = tf.concat((out_fw, out_bw), 2)
        output = tf.reshape(
            char_embedded[:, -1], shape = [s[0], s[1], 2 * hidden_size_char]
        )
        word_embedded = tf.concat([word_embedded, output], axis = -1)

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = bahdanau(word_embedded, hidden_size_word),
                cell_bw = bahdanau(word_embedded, hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (n),
            )
            word_embedded = tf.concat((out_fw, out_bw), 2)

        logits = tf.layers.dense(word_embedded, len(idx2tag))
        logits_depends = tf.layers.dense(word_embedded, maxlen)
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            logits, self.labels, self.lengths
        )
        with tf.variable_scope("depends"):
            log_likelihood_depends, transition_params_depends = tf.contrib.crf.crf_log_likelihood(
                logits_depends, self.depends, self.lengths
            )
        self.cost = tf.reduce_mean(-log_likelihood) + tf.reduce_mean(-log_likelihood_depends)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate = learning_rate
        ).minimize(self.cost)
        
        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)
        
        self.tags_seq, _ = tf.contrib.crf.crf_decode(
            logits, transition_params, self.lengths
        )
        self.tags_seq_depends, _ = tf.contrib.crf.crf_decode(
            logits_depends, transition_params_depends, self.lengths
        )

        self.prediction = tf.boolean_mask(self.tags_seq, mask)
        mask_label = tf.boolean_mask(self.labels, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
        self.prediction = tf.boolean_mask(self.tags_seq_depends, mask)
        mask_label = tf.boolean_mask(self.depends, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy_depends = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [10]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

dim_word = 128
dim_char = 256
dropout = 1
learning_rate = 1e-3
hidden_size_char = 64
hidden_size_word = 64
num_layers = 2
batch_size = 32

model = Model(dim_word,dim_char,dropout,learning_rate,hidden_size_char,hidden_size_word,num_layers,
             words.shape[1])
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [11]:
import time

for e in range(20):
    lasttime = time.time()
    train_acc, train_loss, test_acc, test_loss, train_acc_depends, test_acc_depends = 0, 0, 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = train_X[i : min(i + batch_size, train_X.shape[0])]
        batch_char = train_char[i : min(i + batch_size, train_X.shape[0])]
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_depends = train_depends[i : min(i + batch_size, train_X.shape[0])]
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        train_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
        batch_char = test_char[i : min(i + batch_size, test_X.shape[0])]
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_depends = test_depends[i : min(i + batch_size, test_X.shape[0])]
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        test_loss += cost
        test_acc += acc
        test_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    train_acc_depends /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size
    test_acc_depends /= len(test_X) / batch_size

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
        % (e, train_loss, train_acc, train_acc_depends, test_loss, test_acc, test_acc_depends)
    )

train minibatch loop: 100%|██████████| 76/76 [00:40<00:00,  2.05it/s, accuracy=0.106, accuracy_depends=0.14, cost=105]   
test minibatch loop: 100%|██████████| 54/54 [00:16<00:00,  3.87it/s, accuracy=0.136, accuracy_depends=0.0455, cost=164]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 56.09716725349426
epoch: 0, training loss: 150.300666, training acc: 0.128670, training depends: 0.081010, valid loss: 141.028491, valid acc: 0.147059, valid depends: 0.120955



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.06it/s, accuracy=0.298, accuracy_depends=0.161, cost=90.5]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.83it/s, accuracy=0.309, accuracy_depends=0.118, cost=143] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.58958339691162
epoch: 1, training loss: 130.642711, training acc: 0.227523, training depends: 0.128279, valid loss: 124.700015, valid acc: 0.309620, valid depends: 0.129380



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.08it/s, accuracy=0.517, accuracy_depends=0.195, cost=74.7]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.83it/s, accuracy=0.518, accuracy_depends=0.145, cost=121] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.429301023483276
epoch: 2, training loss: 109.336289, training acc: 0.440639, training depends: 0.154167, valid loss: 105.495260, valid acc: 0.495497, valid depends: 0.153521



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.08it/s, accuracy=0.562, accuracy_depends=0.236, cost=67.6]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.83it/s, accuracy=0.573, accuracy_depends=0.2, cost=115]   
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.509848833084106
epoch: 3, training loss: 96.279404, training acc: 0.549312, training depends: 0.185349, valid loss: 99.562834, valid acc: 0.568718, valid depends: 0.161316



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.08it/s, accuracy=0.682, accuracy_depends=0.257, cost=59.7]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.84it/s, accuracy=0.627, accuracy_depends=0.2, cost=105]   
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.52307176589966
epoch: 4, training loss: 86.934430, training acc: 0.639515, training depends: 0.214143, valid loss: 90.451283, valid acc: 0.642919, valid depends: 0.181113



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.09it/s, accuracy=0.747, accuracy_depends=0.274, cost=53]  
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.88it/s, accuracy=0.673, accuracy_depends=0.209, cost=99.6]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.530587673187256
epoch: 5, training loss: 79.217252, training acc: 0.707435, training depends: 0.240276, valid loss: 85.398946, valid acc: 0.691384, valid depends: 0.198120



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.08it/s, accuracy=0.801, accuracy_depends=0.353, cost=48.9]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.91it/s, accuracy=0.709, accuracy_depends=0.182, cost=94.8]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.583584785461426
epoch: 6, training loss: 72.303042, training acc: 0.762662, training depends: 0.274533, valid loss: 82.404467, valid acc: 0.727524, valid depends: 0.189771



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.09it/s, accuracy=0.805, accuracy_depends=0.377, cost=43.4]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.87it/s, accuracy=0.727, accuracy_depends=0.191, cost=90.9]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.58053755760193
epoch: 7, training loss: 65.055744, training acc: 0.798943, training depends: 0.321437, valid loss: 80.717043, valid acc: 0.746362, valid depends: 0.196639



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.09it/s, accuracy=0.853, accuracy_depends=0.455, cost=38.9]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.86it/s, accuracy=0.745, accuracy_depends=0.209, cost=93.3]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.67471981048584
epoch: 8, training loss: 58.739642, training acc: 0.827087, training depends: 0.377910, valid loss: 81.661547, valid acc: 0.749696, valid depends: 0.195816



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.08it/s, accuracy=0.866, accuracy_depends=0.527, cost=35.2]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.85it/s, accuracy=0.727, accuracy_depends=0.145, cost=101] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.61992311477661
epoch: 9, training loss: 54.076346, training acc: 0.848619, training depends: 0.417288, valid loss: 80.947128, valid acc: 0.767324, valid depends: 0.209349



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.09it/s, accuracy=0.904, accuracy_depends=0.507, cost=33]  
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.90it/s, accuracy=0.782, accuracy_depends=0.209, cost=91.2]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.548739194869995
epoch: 10, training loss: 50.326555, training acc: 0.863248, training depends: 0.458952, valid loss: 79.820367, valid acc: 0.774822, valid depends: 0.222942



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.06it/s, accuracy=0.911, accuracy_depends=0.558, cost=30.4]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.83it/s, accuracy=0.791, accuracy_depends=0.227, cost=89.9]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.604267597198486
epoch: 11, training loss: 45.569131, training acc: 0.877704, training depends: 0.509152, valid loss: 80.193576, valid acc: 0.779312, valid depends: 0.218611



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.08it/s, accuracy=0.928, accuracy_depends=0.688, cost=23.6]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.85it/s, accuracy=0.791, accuracy_depends=0.145, cost=91.4]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.518343448638916
epoch: 12, training loss: 41.137693, training acc: 0.893106, training depends: 0.548518, valid loss: 82.710994, valid acc: 0.784206, valid depends: 0.220646



train minibatch loop: 100%|██████████| 76/76 [00:40<00:00,  2.08it/s, accuracy=0.935, accuracy_depends=0.678, cost=24.9]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.84it/s, accuracy=0.809, accuracy_depends=0.164, cost=97.1]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.76531481742859
epoch: 13, training loss: 37.963494, training acc: 0.906679, training depends: 0.583725, valid loss: 82.073511, valid acc: 0.782869, valid depends: 0.243221



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.10it/s, accuracy=0.942, accuracy_depends=0.733, cost=18.8]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.86it/s, accuracy=0.836, accuracy_depends=0.145, cost=101] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.35486125946045
epoch: 14, training loss: 34.554710, training acc: 0.917006, training depends: 0.620208, valid loss: 85.657195, valid acc: 0.784520, valid depends: 0.241463



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.10it/s, accuracy=0.966, accuracy_depends=0.781, cost=15.8]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.89it/s, accuracy=0.864, accuracy_depends=0.173, cost=103] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.17195224761963
epoch: 15, training loss: 31.398126, training acc: 0.924450, training depends: 0.656583, valid loss: 85.506386, valid acc: 0.793804, valid depends: 0.255798



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.06it/s, accuracy=0.935, accuracy_depends=0.774, cost=15.4]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.83it/s, accuracy=0.818, accuracy_depends=0.164, cost=98.7]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.265546560287476
epoch: 16, training loss: 28.956760, training acc: 0.932248, training depends: 0.678523, valid loss: 84.795803, valid acc: 0.796438, valid depends: 0.264498



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.10it/s, accuracy=0.959, accuracy_depends=0.75, cost=16.2] 
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.90it/s, accuracy=0.8, accuracy_depends=0.164, cost=111]   
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.26459813117981
epoch: 17, training loss: 27.902658, training acc: 0.938587, training depends: 0.685745, valid loss: 90.332116, valid acc: 0.796167, valid depends: 0.239845



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.08it/s, accuracy=0.959, accuracy_depends=0.856, cost=12.1]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.89it/s, accuracy=0.827, accuracy_depends=0.191, cost=102] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 55.02435898780823
epoch: 18, training loss: 24.752691, training acc: 0.943680, training depends: 0.727282, valid loss: 88.909203, valid acc: 0.802451, valid depends: 0.263541



train minibatch loop: 100%|██████████| 76/76 [00:39<00:00,  2.12it/s, accuracy=0.976, accuracy_depends=0.877, cost=9.05]
test minibatch loop: 100%|██████████| 54/54 [00:15<00:00,  3.92it/s, accuracy=0.836, accuracy_depends=0.155, cost=110] 

time taken: 55.076401472091675
epoch: 19, training loss: 21.722709, training acc: 0.951147, training depends: 0.767359, valid loss: 92.559914, valid acc: 0.800110, valid depends: 0.274829






In [12]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
        feed_dict={model.word_ids:batch_x[:1],
                  model.char_ids:batch_char[:1]})

In [13]:
seq = seq[0]
deps = deps[0]

In [14]:
seq[seq>0]

array([18, 19,  2,  6,  3,  4, 16, 18, 23, 20, 19,  2], dtype=int32)

In [15]:
batch_y[0][seq>0]

array([18, 19,  2,  6,  3,  7, 16, 18, 23, 20, 19,  2], dtype=int32)

In [16]:
deps[seq>0]

array([ 2,  4,  4,  4,  8,  8,  4, 10, 12, 12,  8,  4], dtype=int32)

In [17]:
batch_depends[0][seq>0]

array([ 2,  6,  6,  5,  6,  0,  6, 11, 11, 11,  6,  6], dtype=int32)