In [1]:
import tensorflow as tf
from tqdm import tqdm
import numpy as np

In [2]:
with open('test.conll.txt') as fopen:
    corpus = fopen.read().split('\n')
    
with open('dev.conll.txt') as fopen:
    corpus_test = fopen.read().split('\n')

In [3]:
word2idx = {'PAD': 0,'NUM':1,'UNK':2}
tag2idx = {'PAD': 0}
char2idx = {'PAD': 0}
word_idx = 3
tag_idx = 1
char_idx = 1

def process_corpus(corpus, until = None):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    words, depends, labels = [], [], []
    temp_word, temp_depend, temp_label = [], [], []
    for sentence in corpus:
        if len(sentence):
            sentence = sentence.split('\t')
            for c in sentence[1]:
                if c not in char2idx:
                    char2idx[c] = char_idx
                    char_idx += 1
            if sentence[7] not in tag2idx:
                tag2idx[sentence[7]] = tag_idx
                tag_idx += 1
            if sentence[1] not in word2idx:
                word2idx[sentence[1]] = word_idx
                word_idx += 1
            temp_word.append(word2idx[sentence[1]])
            temp_depend.append(int(sentence[6]))
            temp_label.append(tag2idx[sentence[7]])
        else:
            words.append(temp_word)
            depends.append(temp_depend)
            labels.append(temp_label)
            temp_word = []
            temp_depend = []
            temp_label = []
    return words[:-1], depends[:-1], labels[:-1]
        
words, depends, labels = process_corpus(corpus)
words_test, depends_test, labels_test = process_corpus(corpus_test)

In [4]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [5]:
words = pad_sequences(words,padding='post')
depends = pad_sequences(depends,padding='post')
labels = pad_sequences(labels,padding='post')

words_test = pad_sequences(words_test,padding='post')
depends_test = pad_sequences(depends_test,padding='post')
labels_test = pad_sequences(labels_test,padding='post')

In [6]:
idx2word = {idx: tag for tag, idx in word2idx.items()}
idx2tag = {i: w for w, i in tag2idx.items()}

train_X = words
train_Y = labels
train_depends = depends

test_X = words_test
test_Y = labels_test
test_depends = depends_test

In [7]:
maxlen = max(train_X.shape[1], test_X.shape[1])

train_X = pad_sequences(train_X,padding='post',maxlen=maxlen)
train_Y = pad_sequences(train_Y,padding='post',maxlen=maxlen)
train_depends = pad_sequences(train_depends,padding='post',maxlen=maxlen)

test_X = pad_sequences(test_X,padding='post',maxlen=maxlen)
test_Y = pad_sequences(test_Y,padding='post',maxlen=maxlen)
test_depends = pad_sequences(test_depends,padding='post',maxlen=maxlen)

In [8]:
class Attention:
    def __init__(self,hidden_size):
        self.hidden_size = hidden_size
        self.dense_layer = tf.layers.Dense(hidden_size)
        self.v = tf.random_normal([hidden_size],mean=0,stddev=1/np.sqrt(hidden_size))
        
    def score(self, hidden_tensor, encoder_outputs):
        energy = tf.nn.tanh(self.dense_layer(tf.concat([hidden_tensor,encoder_outputs],2)))
        energy = tf.transpose(energy,[0,2,1])
        batch_size = tf.shape(encoder_outputs)[0]
        v = tf.expand_dims(tf.tile(tf.expand_dims(self.v,0),[batch_size,1]),1)
        energy = tf.matmul(v,energy)
        return tf.squeeze(energy,1)
    
    def __call__(self, hidden, encoder_outputs):
        seq_len = tf.shape(encoder_outputs)[1]
        batch_size = tf.shape(encoder_outputs)[0]
        H = tf.tile(tf.expand_dims(hidden, 1),[1,seq_len,1])
        attn_energies = self.score(H,encoder_outputs)
        return tf.expand_dims(tf.nn.softmax(attn_energies),1)

class Model:
    def __init__(
        self,
        dict_size,
        size_layers,
        learning_rate,
        maxlen,
        num_blocks = 3,
        block_size = 128,
    ):
        self.word_ids = tf.placeholder(tf.int32, shape = [None, maxlen])
        self.labels = tf.placeholder(tf.int32, shape = [None, maxlen])
        self.depends = tf.placeholder(tf.int32, shape = [None, maxlen])
        embeddings = tf.Variable(tf.random_uniform([dict_size, size_layers], -1, 1))
        embedded = tf.nn.embedding_lookup(embeddings, self.word_ids)
        self.attention = Attention(size_layers)
        self.maxlen = tf.shape(self.word_ids)[1]
        self.lengths = tf.count_nonzero(self.word_ids, 1)

        def residual_block(x, size, rate, block):
            with tf.variable_scope(
                'block_%d_%d' % (block, rate), reuse = False
            ):
                attn_weights = self.attention(tf.reduce_sum(x,axis=1), x)
                conv_filter = tf.layers.conv1d(
                    attn_weights,
                    x.shape[2] // 4,
                    kernel_size = size,
                    strides = 1,
                    padding = 'same',
                    dilation_rate = rate,
                    activation = tf.nn.tanh,
                )
                conv_gate = tf.layers.conv1d(
                    x,
                    x.shape[2] // 4,
                    kernel_size = size,
                    strides = 1,
                    padding = 'same',
                    dilation_rate = rate,
                    activation = tf.nn.sigmoid,
                )
                out = tf.multiply(conv_filter, conv_gate)
                out = tf.layers.conv1d(
                    out,
                    block_size,
                    kernel_size = 1,
                    strides = 1,
                    padding = 'same',
                    activation = tf.nn.tanh,
                )
                return tf.add(x, out), out

        forward = tf.layers.conv1d(
            embedded, block_size, kernel_size = 1, strides = 1, padding = 'SAME'
        )
        zeros = tf.zeros_like(forward)
        for i in range(num_blocks):
            for r in [1, 2, 4, 8, 16]:
                forward, s = residual_block(
                    forward, size = 7, rate = r, block = i
                )
                zeros = tf.add(zeros, s)
        logits = tf.layers.conv1d(
            zeros, len(idx2tag), kernel_size = 1, strides = 1, padding = 'SAME'
        )
        logits_depends = tf.layers.conv1d(
            zeros, maxlen, kernel_size = 1, strides = 1, padding = 'SAME'
        )
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            logits, self.labels, self.lengths
        )
        with tf.variable_scope("depends"):
            log_likelihood_depends, transition_params_depends = tf.contrib.crf.crf_log_likelihood(
                logits_depends, self.depends, self.lengths
            )
        self.cost = tf.reduce_mean(-log_likelihood) + tf.reduce_mean(-log_likelihood_depends)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate = learning_rate
        ).minimize(self.cost)
        
        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)
        
        self.tags_seq, _ = tf.contrib.crf.crf_decode(
            logits, transition_params, self.lengths
        )
        self.tags_seq_depends, _ = tf.contrib.crf.crf_decode(
            logits_depends, transition_params_depends, self.lengths
        )

        self.prediction = tf.boolean_mask(self.tags_seq, mask)
        mask_label = tf.boolean_mask(self.labels, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
        self.prediction = tf.boolean_mask(self.tags_seq_depends, mask)
        mask_label = tf.boolean_mask(self.depends, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy_depends = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [9]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

dim = 256
dropout = 1
learning_rate = 1e-3
batch_size = 32

model = Model(len(word2idx), dim, learning_rate, maxlen)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [10]:
import time

for e in range(20):
    lasttime = time.time()
    train_acc, train_loss, test_acc, test_loss, train_acc_depends, test_acc_depends = 0, 0, 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = train_X[i : min(i + batch_size, train_X.shape[0])]
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_depends = train_depends[i : min(i + batch_size, train_X.shape[0])]
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.word_ids: batch_x,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        train_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_depends = test_depends[i : min(i + batch_size, test_X.shape[0])]
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.word_ids: batch_x,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        test_loss += cost
        test_acc += acc
        test_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    train_acc_depends /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size
    test_acc_depends /= len(test_X) / batch_size

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
        % (e, train_loss, train_acc, train_acc_depends, test_loss, test_acc, test_acc_depends)
    )

train minibatch loop: 100%|██████████| 76/76 [00:17<00:00,  3.64it/s, accuracy=0.497, accuracy_depends=0.0582, cost=96.4]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 10.84it/s, accuracy=0.518, accuracy_depends=0.0636, cost=151]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 22.717660427093506
epoch: 0, training loss: 153.183009, training acc: 0.294935, training depends: 0.048782, valid loss: 129.335049, valid acc: 0.491078, valid depends: 0.077788



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.47it/s, accuracy=0.634, accuracy_depends=0.171, cost=71.4]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.90it/s, accuracy=0.755, accuracy_depends=0.145, cost=113] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.58572268486023
epoch: 1, training loss: 110.329521, training acc: 0.580832, training depends: 0.113730, valid loss: 100.057401, valid acc: 0.659700, valid depends: 0.141831



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.48it/s, accuracy=0.726, accuracy_depends=0.226, cost=59.4]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.67it/s, accuracy=0.818, accuracy_depends=0.127, cost=97.3]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s, accuracy=0.739, accuracy_depends=0.168, cost=79.6]

time taken: 20.490660667419434
epoch: 2, training loss: 88.406940, training acc: 0.712373, training depends: 0.173413, valid loss: 88.115075, valid acc: 0.722415, valid depends: 0.169411



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.35it/s, accuracy=0.805, accuracy_depends=0.281, cost=50.7]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 11.75it/s, accuracy=0.8, accuracy_depends=0.155, cost=90.2]  
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.47122573852539
epoch: 3, training loss: 76.711710, training acc: 0.783817, training depends: 0.216270, valid loss: 81.675866, valid acc: 0.755237, valid depends: 0.195363



train minibatch loop: 100%|██████████| 76/76 [00:16<00:00,  5.43it/s, accuracy=0.873, accuracy_depends=0.349, cost=43.6]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.81it/s, accuracy=0.818, accuracy_depends=0.182, cost=85.8]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.655714988708496
epoch: 4, training loss: 68.472157, training acc: 0.831365, training depends: 0.259200, valid loss: 77.894161, valid acc: 0.775189, valid depends: 0.222546



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.49it/s, accuracy=0.938, accuracy_depends=0.38, cost=37.7] 
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 11.73it/s, accuracy=0.827, accuracy_depends=0.245, cost=83.4]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.527581453323364
epoch: 5, training loss: 61.882243, training acc: 0.866012, training depends: 0.299192, valid loss: 75.928109, valid acc: 0.782752, valid depends: 0.242099



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.48it/s, accuracy=0.962, accuracy_depends=0.469, cost=32.9]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.72it/s, accuracy=0.827, accuracy_depends=0.245, cost=82.1]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.484257698059082
epoch: 6, training loss: 56.255667, training acc: 0.893967, training depends: 0.341305, valid loss: 75.448544, valid acc: 0.786553, valid depends: 0.254019



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.38it/s, accuracy=0.979, accuracy_depends=0.534, cost=28.7]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.78it/s, accuracy=0.827, accuracy_depends=0.236, cost=82.8]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.61243438720703
epoch: 7, training loss: 51.263502, training acc: 0.917225, training depends: 0.383208, valid loss: 76.300179, valid acc: 0.783883, valid depends: 0.259615



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.49it/s, accuracy=0.986, accuracy_depends=0.579, cost=25.1]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 13.08it/s, accuracy=0.8, accuracy_depends=0.218, cost=86.5]  
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.4697368144989
epoch: 8, training loss: 46.744222, training acc: 0.934576, training depends: 0.428258, valid loss: 77.713942, valid acc: 0.784674, valid depends: 0.261951



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.40it/s, accuracy=0.986, accuracy_depends=0.644, cost=21.8]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.54it/s, accuracy=0.836, accuracy_depends=0.209, cost=94.3]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.674675703048706
epoch: 9, training loss: 42.561942, training acc: 0.949874, training depends: 0.470683, valid loss: 81.004866, valid acc: 0.780429, valid depends: 0.263705



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.46it/s, accuracy=0.986, accuracy_depends=0.705, cost=19.1]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.57it/s, accuracy=0.855, accuracy_depends=0.264, cost=89.2]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.510538816452026
epoch: 10, training loss: 38.979279, training acc: 0.960487, training depends: 0.507692, valid loss: 79.328121, valid acc: 0.784113, valid depends: 0.287815



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.51it/s, accuracy=0.993, accuracy_depends=0.777, cost=15.1]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.50it/s, accuracy=0.836, accuracy_depends=0.3, cost=85.7]  
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.53708791732788
epoch: 11, training loss: 35.629965, training acc: 0.968517, training depends: 0.553509, valid loss: 81.083164, valid acc: 0.784796, valid depends: 0.287922



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.47it/s, accuracy=0.997, accuracy_depends=0.846, cost=13.3]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.81it/s, accuracy=0.818, accuracy_depends=0.291, cost=96.6]
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.565435647964478
epoch: 12, training loss: 32.694344, training acc: 0.974347, training depends: 0.593964, valid loss: 84.288952, valid acc: 0.781696, valid depends: 0.291779



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.49it/s, accuracy=1, accuracy_depends=0.904, cost=10.5]    
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.77it/s, accuracy=0.8, accuracy_depends=0.309, cost=97]    
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.49126100540161
epoch: 13, training loss: 28.929201, training acc: 0.978603, training depends: 0.648120, valid loss: 87.836118, valid acc: 0.779976, valid depends: 0.289016



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.50it/s, accuracy=1, accuracy_depends=0.914, cost=9.43]    
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.41it/s, accuracy=0.809, accuracy_depends=0.364, cost=94]  
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.524020671844482
epoch: 14, training loss: 26.025858, training acc: 0.981591, training depends: 0.689773, valid loss: 89.180642, valid acc: 0.776449, valid depends: 0.298843



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.26it/s, accuracy=1, accuracy_depends=0.918, cost=7.58]    
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 11.67it/s, accuracy=0.8, accuracy_depends=0.318, cost=99.5]  
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.600938320159912
epoch: 15, training loss: 23.192569, training acc: 0.987144, training depends: 0.726383, valid loss: 94.854855, valid acc: 0.770395, valid depends: 0.286692



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.36it/s, accuracy=0.997, accuracy_depends=0.955, cost=5.65]
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.75it/s, accuracy=0.809, accuracy_depends=0.355, cost=102] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.59098505973816
epoch: 16, training loss: 20.472838, training acc: 0.992866, training depends: 0.764563, valid loss: 97.303479, valid acc: 0.769268, valid depends: 0.287900



train minibatch loop: 100%|██████████| 76/76 [00:16<00:00,  5.41it/s, accuracy=1, accuracy_depends=0.986, cost=4.23]    
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.59it/s, accuracy=0.809, accuracy_depends=0.336, cost=104] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.69632053375244
epoch: 17, training loss: 17.953982, training acc: 0.995921, training depends: 0.801940, valid loss: 100.876137, valid acc: 0.769323, valid depends: 0.283341



train minibatch loop: 100%|██████████| 76/76 [00:16<00:00,  5.31it/s, accuracy=1, accuracy_depends=0.983, cost=3.82]    
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.80it/s, accuracy=0.809, accuracy_depends=0.336, cost=102] 
train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]

time taken: 20.720519304275513
epoch: 18, training loss: 15.907509, training acc: 0.998263, training depends: 0.829425, valid loss: 103.454220, valid acc: 0.772234, valid depends: 0.287102



train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.40it/s, accuracy=1, accuracy_depends=0.973, cost=3.46]    
test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.69it/s, accuracy=0.845, accuracy_depends=0.409, cost=101] 

time taken: 20.618144035339355
epoch: 19, training loss: 14.017608, training acc: 0.999365, training depends: 0.851850, valid loss: 108.007175, valid acc: 0.771574, valid depends: 0.283096






In [11]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
        feed_dict={model.word_ids:batch_x[:1]})

In [12]:
seq = seq[0]
deps = deps[0]

In [13]:
seq[seq>0]

array([18, 19,  2,  6,  3,  7, 16, 18, 23, 20, 19,  2], dtype=int32)

In [14]:
batch_y[0][seq>0]

array([18, 19,  2,  6,  3,  7, 16, 18, 23, 20, 19,  2], dtype=int32)

In [15]:
deps[seq>0]

array([ 2, 14, 11,  5,  6,  0,  3, 11, 11, 11,  6,  6], dtype=int32)

In [16]:
batch_depends[0][seq>0]

array([ 2,  6,  6,  5,  6,  0,  6, 11, 11, 11,  6,  6], dtype=int32)