In [1]:
import tensorflow as tf
from tqdm import tqdm
import numpy as np
import re

In [2]:
with open('gsd-ud-train.conllu.txt') as fopen:
    corpus = fopen.read().split('\n')
    
with open('gsd-ud-test.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))
    
with open('gsd-ud-dev.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))

In [3]:
word2idx = {'PAD': 0,'NUM':1,'UNK':2}
tag2idx = {'PAD': 0}
char2idx = {'PAD': 0,'NUM':1,'UNK':2}
word_idx = 3
tag_idx = 1
char_idx = 3

def process_string(string):
    string = re.sub('[^A-Za-z0-9\-\/ ]+', ' ', string).split()
    return [to_title(y.strip()) for y in string]

def to_title(string):
    if string.isupper():
        string = string.title()
    return string

def process_corpus(corpus, until = None):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    sentences, words, depends, labels = [], [], [], []
    temp_sentence, temp_word, temp_depend, temp_label = [], [], [], []
    for sentence in corpus:
        if len(sentence):
            if sentence[0] == '#':
                continue
            sentence = sentence.split('\t')
            temp = process_string(sentence[1])
            if not len(temp):
                sentence[1] = 'EMPTY'
            sentence[1] = process_string(sentence[1])[0]
            for c in sentence[1]:
                if c not in char2idx:
                    char2idx[c] = char_idx
                    char_idx += 1
            if sentence[7] not in tag2idx:
                tag2idx[sentence[7]] = tag_idx
                tag_idx += 1
            if sentence[1] not in word2idx:
                word2idx[sentence[1]] = word_idx
                word_idx += 1
            temp_word.append(word2idx[sentence[1]])
            temp_depend.append(int(sentence[6]) + 1)
            temp_label.append(tag2idx[sentence[7]])
            temp_sentence.append(sentence[1])
        else:
            words.append(temp_word)
            depends.append(temp_depend)
            labels.append(temp_label)
            sentences.append(temp_sentence)
            temp_word = []
            temp_depend = []
            temp_label = []
            temp_sentence = []
    return sentences[:-1], words[:-1], depends[:-1], labels[:-1]
        
sentences, words, depends, labels = process_corpus(corpus)

In [4]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [5]:
words = pad_sequences(words,padding='post')
depends = pad_sequences(depends,padding='post')
labels = pad_sequences(labels,padding='post')
words.shape

(5595, 189)

In [6]:
def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k][:maxlen][::-1]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

In [7]:
idx2word = {idx: tag for tag, idx in word2idx.items()}
idx2tag = {i: w for w, i in tag2idx.items()}
char = generate_char_seq(sentences)

In [8]:
from sklearn.cross_validation import train_test_split
train_X, test_X, train_Y, test_Y, train_depends, test_depends, train_char, test_char = train_test_split(words,
                                                                           labels,
                                                                           depends,
                                                                           char,
                                                                           test_size=0.1)
train_X = words
train_Y = labels
train_depends = depends
train_char = char



In [9]:
class Model:
    def __init__(
        self,
        dim_word,
        dim_char,
        dropout,
        learning_rate,
        hidden_size_char,
        hidden_size_word,
        num_layers,
        maxlen
    ):
        def cells(size, reuse = False):
            return tf.contrib.rnn.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(
                    size,
                    initializer = tf.orthogonal_initializer(),
                    reuse = reuse,
                ),
                output_keep_prob = dropout,
            )

        def bahdanau(embedded, size):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units = hidden_size_word, memory = embedded
            )
            return tf.contrib.seq2seq.AttentionWrapper(
                cell = cells(hidden_size_word),
                attention_mechanism = attention_mechanism,
                attention_layer_size = hidden_size_word,
            )

        self.word_ids = tf.placeholder(tf.int32, shape = [None, None])
        self.char_ids = tf.placeholder(tf.int32, shape = [None, None, None])
        self.labels = tf.placeholder(tf.int32, shape = [None, None])
        self.depends = tf.placeholder(tf.int32, shape = [None, None])
        self.maxlen = tf.shape(self.word_ids)[1]
        self.lengths = tf.count_nonzero(self.word_ids, 1)

        self.word_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(word2idx), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        self.char_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(char2idx), dim_char], stddev = 1.0 / np.sqrt(dim_char)
            )
        )

        word_embedded = tf.nn.embedding_lookup(
            self.word_embeddings, self.word_ids
        )
        char_embedded = tf.nn.embedding_lookup(
            self.char_embeddings, self.char_ids
        )
        s = tf.shape(char_embedded)
        char_embedded = tf.reshape(
            char_embedded, shape = [s[0] * s[1], s[-2], dim_char]
        )

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_char),
                cell_bw = cells(hidden_size_char),
                inputs = char_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_char_%d' % (n),
            )
            char_embedded = tf.concat((out_fw, out_bw), 2)
        output = tf.reshape(
            char_embedded[:, -1], shape = [s[0], s[1], 2 * hidden_size_char]
        )
        word_embedded = tf.concat([word_embedded, output], axis = -1)

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = bahdanau(word_embedded, hidden_size_word),
                cell_bw = bahdanau(word_embedded, hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (n),
            )
            word_embedded = tf.concat((out_fw, out_bw), 2)

        logits = tf.layers.dense(word_embedded, len(idx2tag))
        
        tag_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(idx2tag), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        logits_max = tf.argmax(logits,axis=2,output_type=tf.int32)
        lookup_logits = tf.nn.embedding_lookup(
            tag_embeddings, logits_max
        )
        (out_fw, out_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_word),
                cell_bw = cells(hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (n),
            )
        
        cast_mask = tf.cast(tf.sequence_mask(self.lengths + 1, maxlen = maxlen), dtype = tf.float32)
        cast_mask = tf.tile(tf.expand_dims(cast_mask,axis=1),[1,self.maxlen,1]) * 10
        
        lookup_logits = tf.concat((out_fw, out_bw), 2)
        logits_depends = tf.layers.dense(lookup_logits, maxlen)
        
        logits_depends = tf.multiply(logits_depends, cast_mask)
        
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            logits, self.labels, self.lengths
        )
        with tf.variable_scope("depends"):
            log_likelihood_depends, transition_params_depends = tf.contrib.crf.crf_log_likelihood(
                logits_depends, self.depends, self.lengths
            )
        self.cost = tf.reduce_mean(-log_likelihood) + tf.reduce_mean(-log_likelihood_depends)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate = learning_rate
        ).minimize(self.cost)
        
        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)
        
        self.tags_seq, _ = tf.contrib.crf.crf_decode(
            logits, transition_params, self.lengths
        )
        self.tags_seq = tf.identity(self.tags_seq, name = 'logits')
        
        self.tags_seq_depends, _ = tf.contrib.crf.crf_decode(
            logits_depends, transition_params_depends, self.lengths
        )
        self.tags_seq_depends = tf.identity(self.tags_seq_depends, name = 'logits_depends')

        self.prediction = tf.boolean_mask(self.tags_seq, mask)
        mask_label = tf.boolean_mask(self.labels, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
        self.prediction = tf.boolean_mask(self.tags_seq_depends, mask)
        mask_label = tf.boolean_mask(self.depends, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy_depends = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [10]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

dim_word = 128
dim_char = 256
dropout = 0.9
learning_rate = 1e-3
hidden_size_char = 64
hidden_size_word = 64
num_layers = 2
batch_size = 16

model = Model(dim_word,dim_char,dropout,learning_rate,hidden_size_char,hidden_size_word,num_layers,
             words.shape[1])
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [11]:
import time

for e in range(30):
    lasttime = time.time()
    train_acc, train_loss, test_acc, test_loss, train_acc_depends, test_acc_depends = 0, 0, 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = train_X[i : min(i + batch_size, train_X.shape[0])]
        batch_char = train_char[i : min(i + batch_size, train_X.shape[0])]
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_depends = train_depends[i : min(i + batch_size, train_X.shape[0])]
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        train_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
        batch_char = test_char[i : min(i + batch_size, test_X.shape[0])]
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_depends = test_depends[i : min(i + batch_size, test_X.shape[0])]
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        test_loss += cost
        test_acc += acc
        test_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    train_acc_depends /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size
    test_acc_depends /= len(test_X) / batch_size

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
        % (e, train_loss, train_acc, train_acc_depends, test_loss, test_acc, test_acc_depends)
    )

train minibatch loop: 100%|██████████| 350/350 [07:26<00:00,  1.21s/it, accuracy=0.63, accuracy_depends=0.198, cost=92.1]  
test minibatch loop: 100%|██████████| 35/35 [00:16<00:00,  2.15it/s, accuracy=0.54, accuracy_depends=0.175, cost=76.5] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 462.49334955215454
epoch: 0, training loss: 115.272625, training acc: 0.293043, training depends: 0.134907, valid loss: 84.255187, valid acc: 0.572640, valid depends: 0.168858



train minibatch loop: 100%|██████████| 350/350 [07:25<00:00,  1.22s/it, accuracy=0.79, accuracy_depends=0.389, cost=62.1] 
test minibatch loop: 100%|██████████| 35/35 [00:16<00:00,  2.19it/s, accuracy=0.761, accuracy_depends=0.375, cost=53.8]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 461.60836005210876
epoch: 1, training loss: 73.682281, training acc: 0.685202, training depends: 0.251510, valid loss: 57.258023, valid acc: 0.781993, valid depends: 0.344499



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.832, accuracy_depends=0.466, cost=51.1]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.17it/s, accuracy=0.828, accuracy_depends=0.505, cost=42.8]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.88442754745483
epoch: 2, training loss: 55.785579, training acc: 0.797478, training depends: 0.376332, valid loss: 46.112198, valid acc: 0.840436, valid depends: 0.438551



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.847, accuracy_depends=0.481, cost=43.6]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.854, accuracy_depends=0.579, cost=36.6]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.24800276756287
epoch: 3, training loss: 46.923692, training acc: 0.837538, training depends: 0.464243, valid loss: 39.288552, valid acc: 0.868409, valid depends: 0.507904



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.863, accuracy_depends=0.553, cost=40.4]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.867, accuracy_depends=0.612, cost=31.3]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.2187466621399
epoch: 4, training loss: 40.390897, training acc: 0.864786, training depends: 0.533086, valid loss: 33.758809, valid acc: 0.882889, valid depends: 0.591173



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.882, accuracy_depends=0.588, cost=35.9]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.21it/s, accuracy=0.88, accuracy_depends=0.625, cost=28.1] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 459.98591589927673
epoch: 5, training loss: 35.797025, training acc: 0.881893, training depends: 0.581874, valid loss: 30.856541, valid acc: 0.894651, valid depends: 0.606799



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.912, accuracy_depends=0.576, cost=35.5]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.21it/s, accuracy=0.883, accuracy_depends=0.693, cost=25.3]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.1819784641266
epoch: 6, training loss: 31.916716, training acc: 0.896689, training depends: 0.624354, valid loss: 28.617635, valid acc: 0.905045, valid depends: 0.630784



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.893, accuracy_depends=0.611, cost=28.9]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.896, accuracy_depends=0.725, cost=22.3]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.21761178970337
epoch: 7, training loss: 28.880461, training acc: 0.906977, training depends: 0.654192, valid loss: 25.718532, valid acc: 0.920991, valid depends: 0.652286



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.20s/it, accuracy=0.92, accuracy_depends=0.702, cost=25.4] 
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.896, accuracy_depends=0.783, cost=18.5]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.3104441165924
epoch: 8, training loss: 25.449042, training acc: 0.917977, training depends: 0.700321, valid loss: 22.639463, valid acc: 0.925783, valid depends: 0.711135



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.924, accuracy_depends=0.698, cost=23.7]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.19it/s, accuracy=0.916, accuracy_depends=0.819, cost=16.2]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.29917430877686
epoch: 9, training loss: 23.016498, training acc: 0.928991, training depends: 0.723915, valid loss: 18.748300, valid acc: 0.935534, valid depends: 0.764126



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.947, accuracy_depends=0.756, cost=20.4]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.19it/s, accuracy=0.935, accuracy_depends=0.799, cost=15.1]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.2450954914093
epoch: 10, training loss: 20.552919, training acc: 0.937257, training depends: 0.750880, valid loss: 17.479696, valid acc: 0.942862, valid depends: 0.775819



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.943, accuracy_depends=0.775, cost=18]  
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.21it/s, accuracy=0.926, accuracy_depends=0.832, cost=13.7]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.2544288635254
epoch: 11, training loss: 18.106015, training acc: 0.944359, training depends: 0.782148, valid loss: 16.181224, valid acc: 0.948883, valid depends: 0.794360



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.962, accuracy_depends=0.698, cost=21.8]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.19it/s, accuracy=0.932, accuracy_depends=0.832, cost=13.7]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.1449451446533
epoch: 12, training loss: 16.952360, training acc: 0.950551, training depends: 0.793759, valid loss: 17.263710, valid acc: 0.949310, valid depends: 0.773519



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.958, accuracy_depends=0.794, cost=15.4]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.18it/s, accuracy=0.951, accuracy_depends=0.854, cost=10.7]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.46870493888855
epoch: 13, training loss: 14.983666, training acc: 0.954718, training depends: 0.819102, valid loss: 12.210587, valid acc: 0.959962, valid depends: 0.847287



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.969, accuracy_depends=0.794, cost=14.7]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.968, accuracy_depends=0.867, cost=11.6]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.4819724559784
epoch: 14, training loss: 13.309542, training acc: 0.960360, training depends: 0.838933, valid loss: 11.421844, valid acc: 0.961654, valid depends: 0.860090



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.969, accuracy_depends=0.84, cost=13.1] 
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.18it/s, accuracy=0.939, accuracy_depends=0.796, cost=14.5]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.1923680305481
epoch: 15, training loss: 12.358105, training acc: 0.964398, training depends: 0.848057, valid loss: 15.268088, valid acc: 0.961578, valid depends: 0.785067



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.22s/it, accuracy=0.954, accuracy_depends=0.828, cost=12.7]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.19it/s, accuracy=0.968, accuracy_depends=0.867, cost=7.83]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.1567351818085
epoch: 16, training loss: 11.681579, training acc: 0.967151, training depends: 0.855299, valid loss: 10.426277, valid acc: 0.967730, valid depends: 0.867949



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.977, accuracy_depends=0.878, cost=10.5]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.981, accuracy_depends=0.896, cost=7.47]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.27422404289246
epoch: 17, training loss: 10.001447, training acc: 0.972149, training depends: 0.878352, valid loss: 8.732660, valid acc: 0.973683, valid depends: 0.893555



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.973, accuracy_depends=0.874, cost=8.03]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.968, accuracy_depends=0.926, cost=7.29]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.1948437690735
epoch: 18, training loss: 9.736666, training acc: 0.973757, training depends: 0.879800, valid loss: 8.402309, valid acc: 0.975790, valid depends: 0.892428



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.966, accuracy_depends=0.866, cost=9.63]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.981, accuracy_depends=0.926, cost=6.69]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.616051197052
epoch: 19, training loss: 8.879539, training acc: 0.976093, training depends: 0.891309, valid loss: 8.159968, valid acc: 0.976003, valid depends: 0.896873



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.20s/it, accuracy=0.985, accuracy_depends=0.905, cost=7.13]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.21it/s, accuracy=0.984, accuracy_depends=0.939, cost=4.81]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.06500816345215
epoch: 20, training loss: 8.002114, training acc: 0.977810, training depends: 0.903407, valid loss: 7.608495, valid acc: 0.978042, valid depends: 0.899588



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.973, accuracy_depends=0.962, cost=4.98] 
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.971, accuracy_depends=0.916, cost=6.08]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.48553586006165
epoch: 21, training loss: 7.278349, training acc: 0.980459, training depends: 0.912862, valid loss: 6.472738, valid acc: 0.982318, valid depends: 0.914229



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.989, accuracy_depends=0.947, cost=5.19]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.19it/s, accuracy=0.984, accuracy_depends=0.945, cost=3.68]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.1815667152405
epoch: 22, training loss: 7.237885, training acc: 0.981749, training depends: 0.909883, valid loss: 5.704089, valid acc: 0.981148, valid depends: 0.928778



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.981, accuracy_depends=0.908, cost=6.53]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.21it/s, accuracy=0.99, accuracy_depends=0.968, cost=3.19] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.336722612381
epoch: 23, training loss: 7.274213, training acc: 0.981934, training depends: 0.909730, valid loss: 5.923634, valid acc: 0.982440, valid depends: 0.928985



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.20s/it, accuracy=0.981, accuracy_depends=0.916, cost=7.44] 
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.984, accuracy_depends=0.948, cost=4.3] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.70146107673645
epoch: 24, training loss: 6.519620, training acc: 0.983132, training depends: 0.920901, valid loss: 5.349658, valid acc: 0.983545, valid depends: 0.933107



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.977, accuracy_depends=0.939, cost=5.23] 
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=0.994, accuracy_depends=0.945, cost=3.39]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.3920404911041
epoch: 25, training loss: 5.670324, training acc: 0.984821, training depends: 0.932336, valid loss: 4.653565, valid acc: 0.985244, valid depends: 0.944571



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.985, accuracy_depends=0.927, cost=5.02] 
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.19it/s, accuracy=0.981, accuracy_depends=0.964, cost=3.61]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.64840054512024
epoch: 26, training loss: 5.147398, training acc: 0.986262, training depends: 0.938720, valid loss: 4.497323, valid acc: 0.987216, valid depends: 0.943368



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.989, accuracy_depends=0.962, cost=3.63] 
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, accuracy=1, accuracy_depends=0.945, cost=3.76]    
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.3136479854584
epoch: 27, training loss: 4.800829, training acc: 0.986921, training depends: 0.942915, valid loss: 4.161433, valid acc: 0.987297, valid depends: 0.950750



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.992, accuracy_depends=0.935, cost=5.74]
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.19it/s, accuracy=0.987, accuracy_depends=0.939, cost=3.87]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 460.34357595443726
epoch: 28, training loss: 5.065082, training acc: 0.987377, training depends: 0.938187, valid loss: 4.745105, valid acc: 0.988303, valid depends: 0.937136



train minibatch loop: 100%|██████████| 350/350 [07:24<00:00,  1.21s/it, accuracy=0.996, accuracy_depends=0.962, cost=3.37] 
test minibatch loop: 100%|██████████| 35/35 [00:15<00:00,  2.06it/s, accuracy=0.994, accuracy_depends=0.961, cost=2.37]

time taken: 460.4004707336426
epoch: 29, training loss: 4.743103, training acc: 0.988216, training depends: 0.942339, valid loss: 4.381818, valid acc: 0.989276, valid depends: 0.942244






In [12]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
        feed_dict={model.word_ids:batch_x[:1],
                  model.char_ids:batch_char[:1]})

In [13]:
seq = seq[0]
deps = deps[0]

In [14]:
seq[seq>0]

array([ 7,  3,  1,  3,  4,  3, 27, 16,  7, 20,  7, 20, 10], dtype=int32)

In [15]:
batch_y[0][seq>0]

array([ 7,  3,  1,  3,  4,  3, 27, 16,  7, 20,  7, 20, 10], dtype=int32)

In [16]:
deps[seq>0]

array([ 6,  6,  6,  4,  1,  9,  7,  6, 11,  9, 13,  9,  6], dtype=int32)

In [17]:
batch_depends[0][seq>0]

array([ 6,  6,  6,  4,  1,  9,  7,  6, 11,  9, 13,  9,  6], dtype=int32)

In [33]:
string = 'tolong tangkap gambar kami'

def char_str_idx(corpus, dic, UNK = 0):
    maxlen = max([len(i) for i in corpus])
    X = np.zeros((len(corpus), maxlen))
    for i in range(len(corpus)):
        for no, k in enumerate(corpus[i][:maxlen]):
            val = dic[k] if k in dic else UNK
            X[i, no] = val
    return X

def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k][::-1]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

sequence = process_string(string)[:150]
X_seq = char_str_idx([sequence], word2idx, 2)
X_char_seq = generate_char_seq([sequence])

In [34]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
        feed_dict={model.word_ids:X_seq,
                  model.char_ids:X_char_seq})

In [37]:
deps - 1

array([[2, 0, 2, 2]], dtype=int32)

In [36]:
sequence

['tolong', 'tangkap', 'gambar', 'kami']

In [22]:
[idx2tag[i] for i in seq[0]]

['advmod', 'csubj', 'compound', 'det']

In [23]:
seq

array([[14, 29, 13,  3]], dtype=int32)

In [24]:
string = []
for i in range(len(seq[0])):
    string.append('%d\t%s\t_\t_\t_\t_\t%d\t%s'%(i+1,sequence[i],deps[0,i],idx2tag[seq[0,i]]))

In [25]:
string

['1\ttolong\t_\t_\t_\t_\t3\tadvmod',
 '2\ttangkap\t_\t_\t_\t_\t1\tcsubj',
 '3\tgambar\t_\t_\t_\t_\t3\tcompound',
 '4\tkami\t_\t_\t_\t_\t3\tdet']

In [26]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'bahdanau-dependency/model.ckpt')

strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'logits_depends' in n.name
        or 'alphas' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'OptimizeLoss' not in n.name
        and 'Global_Step' not in n.name
        and 'Epoch_Step' not in n.name
        and 'learning_rate' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'Placeholder_3',
 'Variable',
 'Variable_1',
 'bidirectional_rnn_char_0/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/fw/lstm_cell/bias',
 'bidirectional_rnn_char_0/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/bw/lstm_cell/bias',
 'bidirectional_rnn_char_1/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/fw/lstm_cell/bias',
 'bidirectional_rnn_char_1/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/bw/lstm_cell/bias',
 'memory_layer/kernel',
 'memory_layer_1/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/lstm_cell/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/lstm_cell/bias',
 'bidirectional_rnn_word_0/fw/attention_wrapper/bahdanau_attention/query_layer/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/bahdanau_attention/attention_v',
 'bidirectional_rnn_word_0/fw/attention_wrapper/attention_layer/kernel',
 'bidirectional_rnn_word_0/bw/attention_wrapper/lstm_cell/kernel',
 'bidirectional_rnn_word_0/

In [27]:
import json
with open('bahdanau-dependency.json','w') as fopen:
    fopen.write(json.dumps({'idx2tag':idx2tag,'idx2word':idx2word,
           'word2idx':word2idx,'tag2idx':tag2idx,'char2idx':char2idx}))

In [38]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))
        
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [39]:
freeze_graph('bahdanau-dependency', strings)

INFO:tensorflow:Restoring parameters from bahdanau-dependency/model.ckpt
INFO:tensorflow:Froze 45 variables.
INFO:tensorflow:Converted 45 variables to const ops.
2531 ops in the final graph.


In [40]:
g = load_graph('bahdanau-dependency/frozen_model.pb')

In [42]:
word_ids = g.get_tensor_by_name('import/Placeholder:0')
char_ids = g.get_tensor_by_name('import/Placeholder_1:0')
tags_seq = g.get_tensor_by_name('import/logits:0')
depends_seq = g.get_tensor_by_name('import/logits_depends:0')
test_sess = tf.InteractiveSession(graph = g)
seq, deps = test_sess.run([tags_seq, depends_seq],
            feed_dict = {
                word_ids: X_seq,
                char_ids: X_char_seq,
            })

print(seq,deps)

[[14 19 13  3]] [[3 3 1 3]]
