In [1]:
import tensorflow as tf
from tqdm import tqdm
import numpy as np
import re

In [2]:
with open('gsd-ud-train.conllu.txt') as fopen:
    corpus = fopen.read().split('\n')
    
with open('gsd-ud-test.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))
    
with open('gsd-ud-dev.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))

In [3]:
word2idx = {'PAD': 0,'NUM':1,'UNK':2}
tag2idx = {'PAD': 0}
char2idx = {'PAD': 0,'NUM':1,'UNK':2}
word_idx = 3
tag_idx = 1
char_idx = 3

def process_string(string):
    string = re.sub('[^A-Za-z0-9\-\/ ]+', ' ', string).split()
    return [to_title(y.strip()) for y in string]

def to_title(string):
    if string.isupper():
        string = string.title()
    return string

def process_corpus(corpus, until = None):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    sentences, words, depends, labels = [], [], [], []
    temp_sentence, temp_word, temp_depend, temp_label = [], [], [], []
    for sentence in corpus:
        if len(sentence):
            if sentence[0] == '#':
                continue
            sentence = sentence.split('\t')
            temp = process_string(sentence[1])
            if not len(temp):
                sentence[1] = 'EMPTY'
            sentence[1] = process_string(sentence[1])[0]
            for c in sentence[1]:
                if c not in char2idx:
                    char2idx[c] = char_idx
                    char_idx += 1
            if sentence[7] not in tag2idx:
                tag2idx[sentence[7]] = tag_idx
                tag_idx += 1
            if sentence[1] not in word2idx:
                word2idx[sentence[1]] = word_idx
                word_idx += 1
            temp_word.append(word2idx[sentence[1]])
            temp_depend.append(int(sentence[6]) + 1)
            temp_label.append(tag2idx[sentence[7]])
            temp_sentence.append(sentence[1])
        else:
            words.append(temp_word)
            depends.append(temp_depend)
            labels.append(temp_label)
            sentences.append(temp_sentence)
            temp_word = []
            temp_depend = []
            temp_label = []
            temp_sentence = []
    return sentences[:-1], words[:-1], depends[:-1], labels[:-1]
        
sentences, words, depends, labels = process_corpus(corpus)

In [4]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [5]:
words = pad_sequences(words,padding='post')
depends = pad_sequences(depends,padding='post')
labels = pad_sequences(labels,padding='post')
words.shape

(5595, 189)

In [6]:
def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k][:maxlen][::-1]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

In [7]:
idx2word = {idx: tag for tag, idx in word2idx.items()}
idx2tag = {i: w for w, i in tag2idx.items()}
char = generate_char_seq(sentences)

In [8]:
from sklearn.cross_validation import train_test_split
train_X, test_X, train_Y, test_Y, train_depends, test_depends, train_char, test_char = train_test_split(words,
                                                                           labels,
                                                                           depends,
                                                                           char,
                                                                           test_size=0.1)
train_X = words
train_Y = labels
train_depends = depends
train_char = char



In [9]:
class Model:
    def __init__(
        self,
        dim_word,
        dim_char,
        dropout,
        learning_rate,
        hidden_size_char,
        hidden_size_word,
        num_layers,
        maxlen
    ):
        def cells(size, reuse = False):
            return tf.contrib.rnn.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(
                    size,
                    initializer = tf.orthogonal_initializer(),
                    reuse = reuse,
                ),
                output_keep_prob = dropout,
            )

        def bahdanau(embedded, size):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units = hidden_size_word, memory = embedded
            )
            return tf.contrib.seq2seq.AttentionWrapper(
                cell = cells(hidden_size_word),
                attention_mechanism = attention_mechanism,
                attention_layer_size = hidden_size_word,
            )

        self.word_ids = tf.placeholder(tf.int32, shape = [None, None])
        self.char_ids = tf.placeholder(tf.int32, shape = [None, None, None])
        self.labels = tf.placeholder(tf.int32, shape = [None, None])
        self.depends = tf.placeholder(tf.int32, shape = [None, None])
        self.maxlen = tf.shape(self.word_ids)[1]
        self.lengths = tf.count_nonzero(self.word_ids, 1)

        self.word_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(word2idx), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        self.char_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(char2idx), dim_char], stddev = 1.0 / np.sqrt(dim_char)
            )
        )

        word_embedded = tf.nn.embedding_lookup(
            self.word_embeddings, self.word_ids
        )
        char_embedded = tf.nn.embedding_lookup(
            self.char_embeddings, self.char_ids
        )
        s = tf.shape(char_embedded)
        char_embedded = tf.reshape(
            char_embedded, shape = [s[0] * s[1], s[-2], dim_char]
        )

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_char),
                cell_bw = cells(hidden_size_char),
                inputs = char_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_char_%d' % (n),
            )
            char_embedded = tf.concat((out_fw, out_bw), 2)
        output = tf.reshape(
            char_embedded[:, -1], shape = [s[0], s[1], 2 * hidden_size_char]
        )
        word_embedded = tf.concat([word_embedded, output], axis = -1)

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = bahdanau(word_embedded, hidden_size_word),
                cell_bw = bahdanau(word_embedded, hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (n),
            )
            word_embedded = tf.concat((out_fw, out_bw), 2)

        logits = tf.layers.dense(word_embedded, len(idx2tag))
        
        tag_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(idx2tag), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        logits_max = tf.argmax(logits,axis=2,output_type=tf.int32)
        lookup_logits = tf.nn.embedding_lookup(
            tag_embeddings, logits_max
        )
        (out_fw, out_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_word),
                cell_bw = cells(hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (n),
            )
        
        cast_mask = tf.cast(tf.sequence_mask(self.lengths + 1, maxlen = maxlen), dtype = tf.float32)
        cast_mask = tf.tile(tf.expand_dims(cast_mask,axis=1),[1,self.maxlen,1]) * 10
        
        lookup_logits = tf.concat((out_fw, out_bw), 2)
        logits_depends = tf.layers.dense(lookup_logits, maxlen)
        
        logits_depends = tf.multiply(logits_depends, cast_mask)
        
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            logits, self.labels, self.lengths
        )
        with tf.variable_scope("depends"):
            log_likelihood_depends, transition_params_depends = tf.contrib.crf.crf_log_likelihood(
                logits_depends, self.depends, self.lengths
            )
        self.cost = tf.reduce_mean(-log_likelihood) + tf.reduce_mean(-log_likelihood_depends)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate = learning_rate
        ).minimize(self.cost)
        
        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)
        
        self.tags_seq, _ = tf.contrib.crf.crf_decode(
            logits, transition_params, self.lengths
        )
        self.tags_seq = tf.identity(self.tags_seq, name = 'logits')
        
        self.tags_seq_depends, _ = tf.contrib.crf.crf_decode(
            logits_depends, transition_params_depends, self.lengths
        )
        self.tags_seq_depends = tf.identity(self.tags_seq_depends, name = 'logits_depends')

        self.prediction = tf.boolean_mask(self.tags_seq, mask)
        mask_label = tf.boolean_mask(self.labels, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
        self.prediction = tf.boolean_mask(self.tags_seq_depends, mask)
        mask_label = tf.boolean_mask(self.depends, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy_depends = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [10]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

dim_word = 128
dim_char = 256
dropout = 0.9
learning_rate = 1e-3
hidden_size_char = 64
hidden_size_word = 64
num_layers = 2
batch_size = 32

model = Model(dim_word,dim_char,dropout,learning_rate,hidden_size_char,hidden_size_word,num_layers,
             words.shape[1])
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [11]:
import time

for e in range(30):
    lasttime = time.time()
    train_acc, train_loss, test_acc, test_loss, train_acc_depends, test_acc_depends = 0, 0, 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = train_X[i : min(i + batch_size, train_X.shape[0])]
        batch_char = train_char[i : min(i + batch_size, train_X.shape[0])]
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_depends = train_depends[i : min(i + batch_size, train_X.shape[0])]
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        train_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
        batch_char = test_char[i : min(i + batch_size, test_X.shape[0])]
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_depends = test_depends[i : min(i + batch_size, test_X.shape[0])]
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        test_loss += cost
        test_acc += acc
        test_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    train_acc_depends /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size
    test_acc_depends /= len(test_X) / batch_size

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
        % (e, train_loss, train_acc, train_acc_depends, test_loss, test_acc, test_acc_depends)
    )

train minibatch loop: 100%|██████████| 175/175 [08:57<00:00,  2.89s/it, accuracy=0.333, accuracy_depends=0.156, cost=104] 
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.295, accuracy_depends=0.142, cost=121] 
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 558.3575489521027
epoch: 0, training loss: 127.057262, training acc: 0.181174, training depends: 0.114003, valid loss: 115.380958, valid acc: 0.323096, valid depends: 0.148569



train minibatch loop: 100%|██████████| 175/175 [08:50<00:00,  2.89s/it, accuracy=0.66, accuracy_depends=0.224, cost=75.4] 
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.661, accuracy_depends=0.199, cost=86.4]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 551.6215944290161
epoch: 1, training loss: 92.544422, training acc: 0.494969, training depends: 0.173707, valid loss: 83.178265, valid acc: 0.665197, valid depends: 0.224705



train minibatch loop: 100%|██████████| 175/175 [08:51<00:00,  2.89s/it, accuracy=0.763, accuracy_depends=0.347, cost=58]  
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.01s/it, accuracy=0.762, accuracy_depends=0.313, cost=67.5]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 551.5247929096222
epoch: 2, training loss: 70.121650, training acc: 0.700845, training depends: 0.276474, valid loss: 64.725662, valid acc: 0.792769, valid depends: 0.346397



train minibatch loop: 100%|██████████| 175/175 [08:51<00:00,  2.88s/it, accuracy=0.799, accuracy_depends=0.43, cost=49.5] 
test minibatch loop: 100%|██████████| 18/18 [00:21<00:00,  1.05s/it, accuracy=0.822, accuracy_depends=0.39, cost=57.1] 
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 552.7808158397675
epoch: 3, training loss: 57.159094, training acc: 0.781434, training depends: 0.372488, valid loss: 55.749143, valid acc: 0.847064, valid depends: 0.417716



train minibatch loop: 100%|██████████| 175/175 [08:52<00:00,  2.90s/it, accuracy=0.834, accuracy_depends=0.497, cost=43.7]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.845, accuracy_depends=0.447, cost=50.4]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 552.8781440258026
epoch: 4, training loss: 49.477134, training acc: 0.822382, training depends: 0.443484, valid loss: 48.126305, valid acc: 0.869223, valid depends: 0.494021



train minibatch loop: 100%|██████████| 175/175 [08:52<00:00,  2.90s/it, accuracy=0.835, accuracy_depends=0.572, cost=37.9]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.868, accuracy_depends=0.478, cost=46.4]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 553.6048016548157
epoch: 5, training loss: 43.898549, training acc: 0.843285, training depends: 0.502730, valid loss: 44.958184, valid acc: 0.886983, valid depends: 0.505943



train minibatch loop: 100%|██████████| 175/175 [08:54<00:00,  2.90s/it, accuracy=0.858, accuracy_depends=0.551, cost=35.8]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.889, accuracy_depends=0.568, cost=39.8]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 554.7248446941376
epoch: 6, training loss: 39.451955, training acc: 0.862932, training depends: 0.549447, valid loss: 39.604314, valid acc: 0.890252, valid depends: 0.581912



train minibatch loop: 100%|██████████| 175/175 [08:54<00:00,  2.89s/it, accuracy=0.86, accuracy_depends=0.603, cost=34.4] 
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.894, accuracy_depends=0.576, cost=37.1]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 554.9294853210449
epoch: 7, training loss: 36.070555, training acc: 0.873861, training depends: 0.586304, valid loss: 35.843321, valid acc: 0.915011, valid depends: 0.621563



train minibatch loop: 100%|██████████| 175/175 [08:53<00:00,  2.90s/it, accuracy=0.88, accuracy_depends=0.659, cost=29.4] 
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.912, accuracy_depends=0.558, cost=35.4]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 554.0591886043549
epoch: 8, training loss: 32.714707, training acc: 0.883740, training depends: 0.624559, valid loss: 34.587814, valid acc: 0.928932, valid depends: 0.606608



train minibatch loop: 100%|██████████| 175/175 [08:52<00:00,  2.91s/it, accuracy=0.893, accuracy_depends=0.614, cost=30.2]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.902, accuracy_depends=0.633, cost=30.9]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 553.5390350818634
epoch: 9, training loss: 29.959949, training acc: 0.894363, training depends: 0.654938, valid loss: 29.847070, valid acc: 0.932327, valid depends: 0.684108



train minibatch loop: 100%|██████████| 175/175 [08:54<00:00,  2.89s/it, accuracy=0.917, accuracy_depends=0.711, cost=23.7]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.904, accuracy_depends=0.62, cost=32.4] 
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 555.5344078540802
epoch: 10, training loss: 27.314152, training acc: 0.902848, training depends: 0.684169, valid loss: 29.510490, valid acc: 0.935458, valid depends: 0.673691



train minibatch loop: 100%|██████████| 175/175 [08:54<00:00,  2.90s/it, accuracy=0.903, accuracy_depends=0.737, cost=22.2]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.925, accuracy_depends=0.736, cost=24.7]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 555.2886633872986
epoch: 11, training loss: 25.153192, training acc: 0.911327, training depends: 0.707428, valid loss: 25.020954, valid acc: 0.948164, valid depends: 0.749139



train minibatch loop: 100%|██████████| 175/175 [08:54<00:00,  2.93s/it, accuracy=0.912, accuracy_depends=0.744, cost=20.6]
test minibatch loop: 100%|██████████| 18/18 [00:21<00:00,  1.03s/it, accuracy=0.928, accuracy_depends=0.757, cost=22.8]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 555.6348447799683
epoch: 12, training loss: 23.113035, training acc: 0.917519, training depends: 0.734917, valid loss: 23.136805, valid acc: 0.951224, valid depends: 0.762697



train minibatch loop: 100%|██████████| 175/175 [08:58<00:00,  2.93s/it, accuracy=0.934, accuracy_depends=0.764, cost=18.8]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.92, accuracy_depends=0.778, cost=21.7] 
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 559.2290847301483
epoch: 13, training loss: 20.231450, training acc: 0.924965, training depends: 0.770418, valid loss: 21.125830, valid acc: 0.963642, valid depends: 0.789239



train minibatch loop: 100%|██████████| 175/175 [08:58<00:00,  2.95s/it, accuracy=0.943, accuracy_depends=0.828, cost=15.7]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.951, accuracy_depends=0.767, cost=20.4]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 559.6122906208038
epoch: 14, training loss: 18.605789, training acc: 0.930739, training depends: 0.786606, valid loss: 19.762061, valid acc: 0.963161, valid depends: 0.804913



train minibatch loop: 100%|██████████| 175/175 [08:56<00:00,  2.89s/it, accuracy=0.939, accuracy_depends=0.82, cost=14.1] 
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.946, accuracy_depends=0.765, cost=19.4]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.6700410842896
epoch: 15, training loss: 18.015131, training acc: 0.936161, training depends: 0.787019, valid loss: 20.387789, valid acc: 0.970386, valid depends: 0.797880



train minibatch loop: 100%|██████████| 175/175 [08:55<00:00,  2.91s/it, accuracy=0.946, accuracy_depends=0.809, cost=14.3]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.951, accuracy_depends=0.793, cost=17.6]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.2308747768402
epoch: 16, training loss: 16.398371, training acc: 0.944262, training depends: 0.805975, valid loss: 17.875515, valid acc: 0.977298, valid depends: 0.810636



train minibatch loop: 100%|██████████| 175/175 [08:55<00:00,  2.89s/it, accuracy=0.96, accuracy_depends=0.839, cost=12.3] 
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.08s/it, accuracy=0.964, accuracy_depends=0.809, cost=15.1]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.5172815322876
epoch: 17, training loss: 14.402380, training acc: 0.950167, training depends: 0.831203, valid loss: 15.801414, valid acc: 0.980971, valid depends: 0.841222



train minibatch loop: 100%|██████████| 175/175 [08:54<00:00,  2.92s/it, accuracy=0.943, accuracy_depends=0.868, cost=11.7]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.969, accuracy_depends=0.786, cost=16.7]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 555.6660966873169
epoch: 18, training loss: 13.564262, training acc: 0.955922, training depends: 0.837798, valid loss: 16.738385, valid acc: 0.988350, valid depends: 0.815528



train minibatch loop: 100%|██████████| 175/175 [08:56<00:00,  2.91s/it, accuracy=0.946, accuracy_depends=0.841, cost=12.8]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.951, accuracy_depends=0.809, cost=17]  
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 557.5348522663116
epoch: 19, training loss: 12.664963, training acc: 0.960651, training depends: 0.845649, valid loss: 15.546729, valid acc: 0.987737, valid depends: 0.836474



train minibatch loop: 100%|██████████| 175/175 [08:55<00:00,  2.91s/it, accuracy=0.965, accuracy_depends=0.906, cost=8.91]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.956, accuracy_depends=0.842, cost=12.1]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.4496295452118
epoch: 20, training loss: 11.079982, training acc: 0.964616, training depends: 0.867020, valid loss: 12.391548, valid acc: 0.994884, valid depends: 0.876719



train minibatch loop: 100%|██████████| 175/175 [08:55<00:00,  2.93s/it, accuracy=0.962, accuracy_depends=0.901, cost=8.57]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.974, accuracy_depends=0.873, cost=11.9]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.6404461860657
epoch: 21, training loss: 10.501838, training acc: 0.967369, training depends: 0.873317, valid loss: 12.322381, valid acc: 0.995629, valid depends: 0.874118



train minibatch loop: 100%|██████████| 175/175 [08:55<00:00,  2.91s/it, accuracy=0.955, accuracy_depends=0.868, cost=11]  
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.987, accuracy_depends=0.881, cost=9.26]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.3338551521301
epoch: 22, training loss: 9.719140, training acc: 0.968852, training depends: 0.883401, valid loss: 12.303984, valid acc: 0.998993, valid depends: 0.875089



train minibatch loop: 100%|██████████| 175/175 [08:55<00:00,  2.91s/it, accuracy=0.962, accuracy_depends=0.879, cost=8.82]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.966, accuracy_depends=0.894, cost=9.35]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.6658236980438
epoch: 23, training loss: 9.652018, training acc: 0.970605, training depends: 0.880603, valid loss: 10.448509, valid acc: 1.000401, valid depends: 0.895559



train minibatch loop: 100%|██████████| 175/175 [08:55<00:00,  2.93s/it, accuracy=0.974, accuracy_depends=0.905, cost=7.22]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.969, accuracy_depends=0.855, cost=12.4]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.4406621456146
epoch: 24, training loss: 9.503980, training acc: 0.972713, training depends: 0.880009, valid loss: 13.553925, valid acc: 0.999245, valid depends: 0.860306



train minibatch loop: 100%|██████████| 175/175 [08:56<00:00,  2.92s/it, accuracy=0.976, accuracy_depends=0.887, cost=8.15]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.977, accuracy_depends=0.938, cost=7.55]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.8788993358612
epoch: 25, training loss: 8.932340, training acc: 0.974598, training depends: 0.888902, valid loss: 9.621113, valid acc: 1.002071, valid depends: 0.913567



train minibatch loop: 100%|██████████| 175/175 [08:56<00:00,  2.95s/it, accuracy=0.976, accuracy_depends=0.91, cost=6.95] 
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.984, accuracy_depends=0.922, cost=7.96]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 557.5436239242554
epoch: 26, training loss: 7.763038, training acc: 0.976600, training depends: 0.906384, valid loss: 9.311355, valid acc: 1.002204, valid depends: 0.916223



train minibatch loop: 100%|██████████| 175/175 [08:54<00:00,  2.90s/it, accuracy=0.972, accuracy_depends=0.941, cost=5.32]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.972, accuracy_depends=0.894, cost=9.01]
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 555.3644354343414
epoch: 27, training loss: 7.169568, training acc: 0.978475, training depends: 0.912633, valid loss: 8.532252, valid acc: 1.004756, valid depends: 0.924155



train minibatch loop: 100%|██████████| 175/175 [08:56<00:00,  2.91s/it, accuracy=0.976, accuracy_depends=0.929, cost=5.9] 
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.02s/it, accuracy=0.972, accuracy_depends=0.93, cost=7.81] 
train minibatch loop:   0%|          | 0/175 [00:00<?, ?it/s]

time taken: 556.7860813140869
epoch: 28, training loss: 7.115701, training acc: 0.980013, training depends: 0.911473, valid loss: 7.503770, valid acc: 1.007342, valid depends: 0.942110



train minibatch loop: 100%|██████████| 175/175 [08:56<00:00,  2.93s/it, accuracy=0.977, accuracy_depends=0.931, cost=5.13]
test minibatch loop: 100%|██████████| 18/18 [00:20<00:00,  1.03s/it, accuracy=0.979, accuracy_depends=0.935, cost=6.98]

time taken: 556.8666005134583
epoch: 29, training loss: 7.283201, training acc: 0.979966, training depends: 0.908750, valid loss: 7.462735, valid acc: 1.008739, valid depends: 0.939194






In [12]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
        feed_dict={model.word_ids:batch_x[:1],
                  model.char_ids:batch_char[:1]})

In [13]:
seq = seq[0]
deps = deps[0]

In [14]:
tag2idx['root']

4

In [15]:
seq[seq>0]

array([ 7,  8,  9, 10,  5,  6,  7,  8, 21, 15, 13, 21,  4, 12, 16, 12, 18,
        1, 13, 15, 17,  9, 24, 16,  1, 14,  6, 10], dtype=int32)

In [16]:
batch_y[0][seq>0]

array([ 7,  8,  9, 10,  5,  6,  7,  8, 21, 15, 17, 21,  4, 12, 16, 12, 18,
        1, 13, 15, 17,  9, 24, 16,  1, 14,  6, 10], dtype=int32)

In [17]:
deps[seq>0]

array([ 3, 14,  3,  3,  7,  3,  9,  7,  9, 12,  9, 12,  1, 14, 14, 16, 14,
       24, 19, 22, 20, 22, 16, 24, 28, 28, 25, 14], dtype=int32)

In [18]:
batch_depends[0][seq>0]

array([ 3, 14,  3,  3,  7,  3,  9,  7,  9, 12,  9, 12,  1, 14, 14, 16, 24,
       24, 19, 22, 20, 22, 16, 24, 28, 28, 25, 14], dtype=int32)

In [28]:
string = 'tolong tangkap gambar kami'

def char_str_idx(corpus, dic, UNK = 0):
    maxlen = max([len(i) for i in corpus])
    X = np.zeros((len(corpus), maxlen))
    for i in range(len(corpus)):
        for no, k in enumerate(corpus[i][:maxlen]):
            val = dic[k] if k in dic else UNK
            X[i, no] = val
    return X

def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k][::-1]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

sequence = process_string(string)[:150]
X_seq = char_str_idx([sequence], word2idx, 2)
X_char_seq = generate_char_seq([sequence])

In [31]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
        feed_dict={model.word_ids:X_seq,
                  model.char_ids:X_char_seq})

In [36]:
deps -1

array([[1, 1, 2, 3]], dtype=int32)

In [33]:
sequence

['tolong', 'tangkap', 'gambar', 'kami']

In [34]:
[idx2tag[i] for i in seq[0]]

['nsubj', 'compound', 'compound', 'det']

In [25]:
string = []
for i in range(len(seq[0])):
    string.append('%d\t%s\t_\t_\t_\t_\t%d\t%s'%(i+1,sequence[i],deps[0,i],idx2tag[seq[0,i]]))

In [26]:
string

['1\ttolong\t_\t_\t_\t_\t5\tnsubj',
 '2\ttangkap\t_\t_\t_\t_\t2\tacl',
 '3\tgambar\t_\t_\t_\t_\t3\tobj',
 '4\tkami\t_\t_\t_\t_\t4\tdet']

In [37]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'luong-dependency/model.ckpt')

strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'logits_depends' in n.name
        or 'alphas' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'OptimizeLoss' not in n.name
        and 'Global_Step' not in n.name
        and 'Epoch_Step' not in n.name
        and 'learning_rate' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'Placeholder_3',
 'Variable',
 'Variable_1',
 'bidirectional_rnn_char_0/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/fw/lstm_cell/bias',
 'bidirectional_rnn_char_0/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/bw/lstm_cell/bias',
 'bidirectional_rnn_char_1/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/fw/lstm_cell/bias',
 'bidirectional_rnn_char_1/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/bw/lstm_cell/bias',
 'memory_layer/kernel',
 'memory_layer_1/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/lstm_cell/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/lstm_cell/bias',
 'bidirectional_rnn_word_0/fw/attention_wrapper/bahdanau_attention/query_layer/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/bahdanau_attention/attention_v',
 'bidirectional_rnn_word_0/fw/attention_wrapper/attention_layer/kernel',
 'bidirectional_rnn_word_0/bw/attention_wrapper/lstm_cell/kernel',
 'bidirectional_rnn_word_0/

In [38]:
import json
with open('luong-dependency.json','w') as fopen:
    fopen.write(json.dumps({'idx2tag':idx2tag,'idx2word':idx2word,
           'word2idx':word2idx,'tag2idx':tag2idx,'char2idx':char2idx}))

In [39]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))
        
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [40]:
freeze_graph('luong-dependency', strings)

INFO:tensorflow:Restoring parameters from luong-dependency/model.ckpt
INFO:tensorflow:Froze 45 variables.
INFO:tensorflow:Converted 45 variables to const ops.
2531 ops in the final graph.


In [41]:
g = load_graph('luong-dependency/frozen_model.pb')

In [42]:
word_ids = g.get_tensor_by_name('import/Placeholder:0')
char_ids = g.get_tensor_by_name('import/Placeholder_1:0')
tags_seq = g.get_tensor_by_name('import/logits:0')
depends_seq = g.get_tensor_by_name('import/logits_depends:0')
test_sess = tf.InteractiveSession(graph = g)
seq, deps = test_sess.run([tags_seq, depends_seq],
            feed_dict = {
                word_ids: X_seq,
                char_ids: X_char_seq,
            })

print(seq,deps)



[[ 1  6 13  3]] [[2 2 3 4]]
