In [1]:
import tensorflow as tf
from tqdm import tqdm
import numpy as np
import re

In [2]:
with open('id_gsd-ud-train.conllu.txt') as fopen:
    corpus = fopen.read().split('\n')
    
with open('id_gsd-ud-test.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))
    
with open('id_gsd-ud-dev.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))

In [3]:
word2idx = {'PAD': 0,'NUM':1,'UNK':2}
tag2idx = {'PAD': 0}
char2idx = {'PAD': 0,'NUM':1,'UNK':2}
word_idx = 3
tag_idx = 1
char_idx = 3

def process_string(string):
    string = re.sub('[^A-Za-z0-9\-\/ ]+', ' ', string).split()
    return [to_title(y.strip()) for y in string]

def to_title(string):
    if string.isupper():
        string = string.title()
    return string

def process_corpus(corpus, until = None):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    sentences, words, depends, labels = [], [], [], []
    temp_sentence, temp_word, temp_depend, temp_label = [], [], [], []
    for sentence in corpus:
        if len(sentence):
            if sentence[0] == '#':
                continue
            sentence = sentence.split('\t')
            temp = process_string(sentence[1])
            if not len(temp):
                sentence[1] = 'EMPTY'
            sentence[1] = process_string(sentence[1])[0]
            for c in sentence[1]:
                if c not in char2idx:
                    char2idx[c] = char_idx
                    char_idx += 1
            if sentence[7] not in tag2idx:
                tag2idx[sentence[7]] = tag_idx
                tag_idx += 1
            if sentence[1] not in word2idx:
                word2idx[sentence[1]] = word_idx
                word_idx += 1
            temp_word.append(word2idx[sentence[1]])
            temp_depend.append(int(sentence[6]) + 1)
            temp_label.append(tag2idx[sentence[7]])
            temp_sentence.append(sentence[1])
        else:
            words.append(temp_word)
            depends.append(temp_depend)
            labels.append(temp_label)
            sentences.append(temp_sentence)
            temp_word = []
            temp_depend = []
            temp_label = []
            temp_sentence = []
    return sentences[:-1], words[:-1], depends[:-1], labels[:-1]
        
sentences, words, depends, labels = process_corpus(corpus)

In [4]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [5]:
words = pad_sequences(words,padding='post')
depends = pad_sequences(depends,padding='post')
labels = pad_sequences(labels,padding='post')
words.shape

(5595, 189)

In [6]:
def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k][:maxlen][::-1]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

In [7]:
idx2word = {idx: tag for tag, idx in word2idx.items()}
idx2tag = {i: w for w, i in tag2idx.items()}
char = generate_char_seq(sentences)

In [8]:
from sklearn.cross_validation import train_test_split
train_X, test_X, train_Y, test_Y, train_depends, test_depends, train_char, test_char = train_test_split(words,
                                                                           labels,
                                                                           depends,
                                                                           char,
                                                                           test_size=0.1)
train_X = words
train_Y = labels
train_depends = depends
train_char = char



In [9]:
class Model:
    def __init__(
        self,
        dim_word,
        dim_char,
        dropout,
        learning_rate,
        hidden_size_char,
        hidden_size_word,
        num_layers,
        maxlen
    ):
        def cells(size, reuse = False):
            return tf.contrib.rnn.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(
                    size,
                    initializer = tf.orthogonal_initializer(),
                    reuse = reuse,
                ),
                output_keep_prob = dropout,
            )
        
        self.word_ids = tf.placeholder(tf.int32, shape = [None, None])
        self.char_ids = tf.placeholder(tf.int32, shape = [None, None, None])
        self.labels = tf.placeholder(tf.int32, shape = [None, None])
        self.depends = tf.placeholder(tf.int32, shape = [None, None])
        self.maxlen = tf.shape(self.word_ids)[1]
        self.lengths = tf.count_nonzero(self.word_ids, 1)

        self.word_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(word2idx), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        self.char_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(char2idx), dim_char], stddev = 1.0 / np.sqrt(dim_char)
            )
        )

        word_embedded = tf.nn.embedding_lookup(
            self.word_embeddings, self.word_ids
        )
        char_embedded = tf.nn.embedding_lookup(
            self.char_embeddings, self.char_ids
        )
        s = tf.shape(char_embedded)
        char_embedded = tf.reshape(
            char_embedded, shape = [s[0] * s[1], s[-2], dim_char]
        )

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_char),
                cell_bw = cells(hidden_size_char),
                inputs = char_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_char_%d' % (n),
            )
            char_embedded = tf.concat((out_fw, out_bw), 2)
        output = tf.reshape(
            char_embedded[:, -1], shape = [s[0], s[1], 2 * hidden_size_char]
        )
        word_embedded = tf.concat([word_embedded, output], axis = -1)

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_word),
                cell_bw = cells(hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (n),
            )
            word_embedded = tf.concat((out_fw, out_bw), 2)

        logits = tf.layers.dense(word_embedded, len(idx2tag))
        
        tag_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(idx2tag), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        logits_max = tf.argmax(logits,axis=2,output_type=tf.int32)
        lookup_logits = tf.nn.embedding_lookup(
            tag_embeddings, logits_max
        )
        (out_fw, out_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_word),
                cell_bw = cells(hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (10),
            )
        
        cast_mask = tf.cast(tf.sequence_mask(self.lengths + 1, maxlen = maxlen), dtype = tf.float32)
        cast_mask = tf.tile(tf.expand_dims(cast_mask,axis=1),[1,self.maxlen,1]) * 10
        
        lookup_logits = tf.concat((out_fw, out_bw), 2)
        logits_depends = tf.layers.dense(lookup_logits, maxlen)
        
        logits_depends = tf.multiply(logits_depends, cast_mask)
        
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            logits, self.labels, self.lengths
        )
        with tf.variable_scope("depends"):
            log_likelihood_depends, transition_params_depends = tf.contrib.crf.crf_log_likelihood(
                logits_depends, self.depends, self.lengths
            )
        self.cost = tf.reduce_mean(-log_likelihood) + tf.reduce_mean(-log_likelihood_depends)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate = learning_rate
        ).minimize(self.cost)
        
        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)
        
        self.tags_seq, _ = tf.contrib.crf.crf_decode(
            logits, transition_params, self.lengths
        )
        self.tags_seq = tf.identity(self.tags_seq, name = 'logits')
        
        self.tags_seq_depends, _ = tf.contrib.crf.crf_decode(
            logits_depends, transition_params_depends, self.lengths
        )
        self.tags_seq_depends = tf.identity(self.tags_seq_depends, name = 'logits_depends')

        self.prediction = tf.boolean_mask(self.tags_seq, mask)
        mask_label = tf.boolean_mask(self.labels, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
        self.prediction = tf.boolean_mask(self.tags_seq_depends, mask)
        mask_label = tf.boolean_mask(self.depends, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy_depends = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [10]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

dim_word = 128
dim_char = 256
dropout = 0.9
learning_rate = 1e-3
hidden_size_char = 64
hidden_size_word = 64
num_layers = 2
batch_size = 16

model = Model(dim_word,dim_char,dropout,learning_rate,hidden_size_char,hidden_size_word,num_layers,
             words.shape[1])
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [11]:
import time

for e in range(30):
    lasttime = time.time()
    train_acc, train_loss, test_acc, test_loss, train_acc_depends, test_acc_depends = 0, 0, 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = train_X[i : min(i + batch_size, train_X.shape[0])]
        batch_char = train_char[i : min(i + batch_size, train_X.shape[0])]
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_depends = train_depends[i : min(i + batch_size, train_X.shape[0])]
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        train_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
        batch_char = test_char[i : min(i + batch_size, test_X.shape[0])]
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_depends = test_depends[i : min(i + batch_size, test_X.shape[0])]
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        test_loss += cost
        test_acc += acc
        test_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    train_acc_depends /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size
    test_acc_depends /= len(test_X) / batch_size

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
        % (e, train_loss, train_acc, train_acc_depends, test_loss, test_acc, test_acc_depends)
    )

train minibatch loop: 100%|██████████| 350/350 [04:54<00:00,  1.25it/s, accuracy=0.668, accuracy_depends=0.183, cost=87.1] 
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.80it/s, accuracy=0.667, accuracy_depends=0.156, cost=106] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 307.36950397491455
epoch: 0, training loss: 113.604116, training acc: 0.334303, training depends: 0.132292, valid loss: 89.666181, valid acc: 0.612814, valid depends: 0.180209



train minibatch loop: 100%|██████████| 350/350 [04:52<00:00,  1.27it/s, accuracy=0.824, accuracy_depends=0.279, cost=65.2]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.873, accuracy_depends=0.217, cost=78.3]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 304.94627022743225
epoch: 1, training loss: 73.298046, training acc: 0.723462, training depends: 0.224598, valid loss: 66.050747, valid acc: 0.805965, valid depends: 0.260614



train minibatch loop: 100%|██████████| 350/350 [04:49<00:00,  1.25it/s, accuracy=0.851, accuracy_depends=0.401, cost=53.8]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.84it/s, accuracy=0.894, accuracy_depends=0.283, cost=66.2]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.33295607566833
epoch: 2, training loss: 58.497387, training acc: 0.814718, training depends: 0.310092, valid loss: 54.758951, valid acc: 0.848820, valid depends: 0.352095



train minibatch loop: 100%|██████████| 350/350 [04:49<00:00,  1.25it/s, accuracy=0.859, accuracy_depends=0.45, cost=47]   
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.84it/s, accuracy=0.885, accuracy_depends=0.398, cost=57.2]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.4782962799072
epoch: 3, training loss: 49.618007, training acc: 0.845828, training depends: 0.398147, valid loss: 46.800803, valid acc: 0.866843, valid depends: 0.449136



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.87, accuracy_depends=0.511, cost=41.3] 
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.9, accuracy_depends=0.441, cost=50.7]  
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 303.01437997817993
epoch: 4, training loss: 43.798713, training acc: 0.864436, training depends: 0.464102, valid loss: 41.958503, valid acc: 0.882352, valid depends: 0.500184



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.25it/s, accuracy=0.878, accuracy_depends=0.523, cost=38.3]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.84it/s, accuracy=0.894, accuracy_depends=0.491, cost=48.9]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.65866446495056
epoch: 5, training loss: 39.696546, training acc: 0.878472, training depends: 0.512806, valid loss: 38.750176, valid acc: 0.891852, valid depends: 0.525846



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.27it/s, accuracy=0.912, accuracy_depends=0.527, cost=35.9]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.91, accuracy_depends=0.428, cost=49.3] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.6099064350128
epoch: 6, training loss: 36.642407, training acc: 0.890689, training depends: 0.544802, valid loss: 38.512453, valid acc: 0.899887, valid depends: 0.504156



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.25it/s, accuracy=0.92, accuracy_depends=0.576, cost=32.6] 
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.84it/s, accuracy=0.912, accuracy_depends=0.566, cost=41.9]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.74964022636414
epoch: 7, training loss: 33.838288, training acc: 0.899438, training depends: 0.573574, valid loss: 32.416899, valid acc: 0.908697, valid depends: 0.602199



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.25it/s, accuracy=0.905, accuracy_depends=0.588, cost=31.3]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.84it/s, accuracy=0.925, accuracy_depends=0.441, cost=47.4]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.71827507019043
epoch: 8, training loss: 31.085550, training acc: 0.908227, training depends: 0.607838, valid loss: 32.883002, valid acc: 0.914782, valid depends: 0.561821



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.897, accuracy_depends=0.626, cost=28.1]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.91, accuracy_depends=0.536, cost=43.8] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.8139932155609
epoch: 9, training loss: 29.351552, training acc: 0.914069, training depends: 0.626075, valid loss: 29.849570, valid acc: 0.922865, valid depends: 0.607743



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.25it/s, accuracy=0.927, accuracy_depends=0.653, cost=26.4]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.934, accuracy_depends=0.538, cost=37.8]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.7449142932892
epoch: 10, training loss: 27.637502, training acc: 0.919449, training depends: 0.643172, valid loss: 27.538111, valid acc: 0.928473, valid depends: 0.637713



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.935, accuracy_depends=0.656, cost=24.5]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.925, accuracy_depends=0.622, cost=33.7]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.79065346717834
epoch: 11, training loss: 25.440289, training acc: 0.925219, training depends: 0.673297, valid loss: 25.778858, valid acc: 0.931724, valid depends: 0.663602



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.24it/s, accuracy=0.943, accuracy_depends=0.683, cost=22.7]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.81it/s, accuracy=0.932, accuracy_depends=0.59, cost=36.3] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.62933683395386
epoch: 12, training loss: 24.122699, training acc: 0.929413, training depends: 0.687950, valid loss: 26.009962, valid acc: 0.933347, valid depends: 0.651207



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.943, accuracy_depends=0.645, cost=22.4]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.84it/s, accuracy=0.939, accuracy_depends=0.647, cost=31.4]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.6395833492279
epoch: 13, training loss: 23.656343, training acc: 0.932355, training depends: 0.686377, valid loss: 22.376074, valid acc: 0.938260, valid depends: 0.719997



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.939, accuracy_depends=0.656, cost=22.8]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.84it/s, accuracy=0.937, accuracy_depends=0.633, cost=31.2]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.59862756729126
epoch: 14, training loss: 21.349029, training acc: 0.936764, training depends: 0.725945, valid loss: 21.321049, valid acc: 0.944395, valid depends: 0.716793



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.943, accuracy_depends=0.729, cost=20.7]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.80it/s, accuracy=0.948, accuracy_depends=0.6, cost=33.4]  
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.7840073108673
epoch: 15, training loss: 19.482525, training acc: 0.941480, training depends: 0.754640, valid loss: 23.376568, valid acc: 0.942785, valid depends: 0.669307



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.24it/s, accuracy=0.939, accuracy_depends=0.786, cost=16.8]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.84it/s, accuracy=0.939, accuracy_depends=0.706, cost=27.5]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.6658065319061
epoch: 16, training loss: 18.540819, training acc: 0.944208, training depends: 0.764462, valid loss: 19.479202, valid acc: 0.947233, valid depends: 0.741194



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.95, accuracy_depends=0.79, cost=16.3]  
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.83it/s, accuracy=0.957, accuracy_depends=0.769, cost=24.9]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.5469253063202
epoch: 17, training loss: 18.119465, training acc: 0.945945, training depends: 0.767728, valid loss: 17.386115, valid acc: 0.949905, valid depends: 0.793675



train minibatch loop: 100%|██████████| 350/350 [04:49<00:00,  1.25it/s, accuracy=0.947, accuracy_depends=0.798, cost=15.5]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.962, accuracy_depends=0.715, cost=24.5]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.50203037261963
epoch: 18, training loss: 16.895199, training acc: 0.948845, training depends: 0.786115, valid loss: 17.764167, valid acc: 0.949173, valid depends: 0.784863



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.25it/s, accuracy=0.954, accuracy_depends=0.813, cost=14.6]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.955, accuracy_depends=0.74, cost=24]   
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.8649597167969
epoch: 19, training loss: 15.900881, training acc: 0.951792, training depends: 0.798421, valid loss: 16.822653, valid acc: 0.954231, valid depends: 0.788080



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.969, accuracy_depends=0.87, cost=12.4] 
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.83it/s, accuracy=0.966, accuracy_depends=0.799, cost=21.7]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.54734802246094
epoch: 20, training loss: 14.606359, training acc: 0.954795, training depends: 0.818364, valid loss: 15.647238, valid acc: 0.956492, valid depends: 0.801869



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.25it/s, accuracy=0.966, accuracy_depends=0.84, cost=10.6] 
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.82it/s, accuracy=0.964, accuracy_depends=0.83, cost=19.2] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.83012104034424
epoch: 21, training loss: 13.618865, training acc: 0.958872, training depends: 0.831980, valid loss: 13.762990, valid acc: 0.958590, valid depends: 0.839101



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.25it/s, accuracy=0.969, accuracy_depends=0.79, cost=12.6] 
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.84it/s, accuracy=0.968, accuracy_depends=0.817, cost=17.6]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.7250463962555
epoch: 22, training loss: 12.954482, training acc: 0.959575, training depends: 0.839193, valid loss: 13.363716, valid acc: 0.961949, valid depends: 0.827933



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.969, accuracy_depends=0.847, cost=11.5]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.82it/s, accuracy=0.959, accuracy_depends=0.81, cost=18.1] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.57196974754333
epoch: 23, training loss: 12.346895, training acc: 0.962427, training depends: 0.846693, valid loss: 13.538606, valid acc: 0.961974, valid depends: 0.839913



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.962, accuracy_depends=0.897, cost=9.42]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.79it/s, accuracy=0.964, accuracy_depends=0.81, cost=17.4] 
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.7886848449707
epoch: 24, training loss: 11.754256, training acc: 0.964245, training depends: 0.852127, valid loss: 12.165086, valid acc: 0.964785, valid depends: 0.854131



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.25it/s, accuracy=0.977, accuracy_depends=0.901, cost=7.78]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.83it/s, accuracy=0.962, accuracy_depends=0.862, cost=14.2]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.55317759513855
epoch: 25, training loss: 10.910226, training acc: 0.966214, training depends: 0.866050, valid loss: 11.051731, valid acc: 0.966341, valid depends: 0.869671



train minibatch loop: 100%|██████████| 350/350 [04:49<00:00,  1.26it/s, accuracy=0.985, accuracy_depends=0.893, cost=7.27]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.82it/s, accuracy=0.968, accuracy_depends=0.855, cost=15.1]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.4679582118988
epoch: 26, training loss: 10.355563, training acc: 0.968222, training depends: 0.872719, valid loss: 10.513800, valid acc: 0.968562, valid depends: 0.875760



train minibatch loop: 100%|██████████| 350/350 [04:49<00:00,  1.25it/s, accuracy=0.966, accuracy_depends=0.882, cost=8.58]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.977, accuracy_depends=0.846, cost=13.1]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.3526186943054
epoch: 27, training loss: 9.721893, training acc: 0.970057, training depends: 0.882153, valid loss: 10.249874, valid acc: 0.970564, valid depends: 0.876219



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.25it/s, accuracy=0.973, accuracy_depends=0.87, cost=9.36] 
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.82it/s, accuracy=0.966, accuracy_depends=0.812, cost=15.4]
train minibatch loop:   0%|          | 0/350 [00:00<?, ?it/s]

time taken: 302.7226359844208
epoch: 28, training loss: 9.570963, training acc: 0.971573, training depends: 0.881705, valid loss: 12.513657, valid acc: 0.969140, valid depends: 0.831850



train minibatch loop: 100%|██████████| 350/350 [04:50<00:00,  1.26it/s, accuracy=0.973, accuracy_depends=0.912, cost=7.45]
test minibatch loop: 100%|██████████| 35/35 [00:12<00:00,  2.85it/s, accuracy=0.977, accuracy_depends=0.855, cost=13.8]

time taken: 302.54565358161926
epoch: 29, training loss: 9.403749, training acc: 0.971946, training depends: 0.882790, valid loss: 9.385898, valid acc: 0.974280, valid depends: 0.889292






In [12]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
        feed_dict={model.word_ids:batch_x[:1],
                  model.char_ids:batch_char[:1]})

In [13]:
seq = seq[0]
deps = deps[0]

In [14]:
seq[seq>0]

array([ 1,  2, 10,  1,  4,  7, 21,  8, 12,  7, 20, 10, 18, 11, 15, 17, 21,
       10, 21, 20, 10, 17, 15, 17, 21, 10, 21, 20, 15, 17,  7,  8, 15, 17,
       21, 10, 21, 20, 10,  3,  1, 13,  3,  9, 14, 22,  7,  8, 21,  7,  7,
       21, 20, 10], dtype=int32)

In [15]:
batch_y[0][seq>0]

array([ 1,  2, 10,  1,  4,  7, 21,  8, 12,  7, 20, 10, 18, 11, 15, 17, 21,
       10, 21, 20, 10, 17, 15, 17, 21, 10, 21, 20, 15, 17,  7,  8, 15, 17,
       21, 10, 21, 20, 10,  3,  1, 13,  3,  9, 14, 22,  7,  8, 21,  7,  7,
       21, 20, 10], dtype=int32)

In [16]:
deps[seq>0]

array([ 6,  6,  6,  6,  1,  9,  9,  6,  9, 12,  9,  9, 15,  9, 17, 15, 21,
       21, 21, 15, 23, 21, 25, 23, 29, 29, 29, 24, 31, 16, 35, 31, 37, 33,
       39, 40, 40, 32,  7, 43, 47, 42, 43, 44, 47,  6, 49, 49, 49, 49, 53,
       53, 53, 33], dtype=int32)

In [17]:
batch_depends[0][seq>0]

array([ 6,  6,  6,  6,  1,  9,  9,  6,  9, 12,  9,  9, 15,  9, 17, 15, 21,
       21, 21, 15, 23, 15, 25, 23, 29, 29, 29, 23, 31, 15, 33, 31, 35, 33,
       39, 39, 39, 33,  6, 42, 47, 42, 42, 44, 47,  6, 49, 47, 49, 54, 54,
       54, 49, 47], dtype=int32)

In [18]:
string = 'tolong tangkap gambar kami'

def char_str_idx(corpus, dic, UNK = 0):
    maxlen = max([len(i) for i in corpus])
    X = np.zeros((len(corpus), maxlen))
    for i in range(len(corpus)):
        for no, k in enumerate(corpus[i][:maxlen]):
            val = dic[k] if k in dic else UNK
            X[i, no] = val
    return X

def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k][::-1]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

sequence = process_string(string)[:150]
X_seq = char_str_idx([sequence], word2idx, 2)
X_char_seq = generate_char_seq([sequence])

In [19]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
        feed_dict={model.word_ids:X_seq,
                  model.char_ids:X_char_seq})

In [20]:
deps - 1

array([[2, 0, 2, 3]], dtype=int32)

In [21]:
sequence

['tolong', 'tangkap', 'gambar', 'kami']

In [22]:
[idx2tag[i] for i in seq[0]]

['nsubj', 'root', 'compound', 'det']

In [23]:
seq

array([[ 1,  4, 13,  3]], dtype=int32)

In [24]:
string = []
for i in range(len(seq[0])):
    string.append('%d\t%s\t_\t_\t_\t_\t%d\t%s'%(i+1,sequence[i],deps[0,i],idx2tag[seq[0,i]]))

In [25]:
string

['1\ttolong\t_\t_\t_\t_\t3\tnsubj',
 '2\ttangkap\t_\t_\t_\t_\t1\troot',
 '3\tgambar\t_\t_\t_\t_\t3\tcompound',
 '4\tkami\t_\t_\t_\t_\t4\tdet']

In [26]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'concat-dependency/model.ckpt')

strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'logits_depends' in n.name
        or 'alphas' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'OptimizeLoss' not in n.name
        and 'Global_Step' not in n.name
        and 'Epoch_Step' not in n.name
        and 'learning_rate' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'Placeholder_3',
 'Variable',
 'Variable_1',
 'bidirectional_rnn_char_0/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/fw/lstm_cell/bias',
 'bidirectional_rnn_char_0/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/bw/lstm_cell/bias',
 'bidirectional_rnn_char_1/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/fw/lstm_cell/bias',
 'bidirectional_rnn_char_1/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/bw/lstm_cell/bias',
 'bidirectional_rnn_word_0/fw/lstm_cell/kernel',
 'bidirectional_rnn_word_0/fw/lstm_cell/bias',
 'bidirectional_rnn_word_0/bw/lstm_cell/kernel',
 'bidirectional_rnn_word_0/bw/lstm_cell/bias',
 'bidirectional_rnn_word_1/fw/lstm_cell/kernel',
 'bidirectional_rnn_word_1/fw/lstm_cell/bias',
 'bidirectional_rnn_word_1/bw/lstm_cell/kernel',
 'bidirectional_rnn_word_1/bw/lstm_cell/bias',
 'dense/kernel',
 'dense/bias',
 'Variable_2',
 'bidirectional_rnn_word_10/fw/lstm_cell/kernel',
 'bidirectional_rnn_word_10/fw/lstm_c

In [27]:
import json
with open('concat-dependency.json','w') as fopen:
    fopen.write(json.dumps({'idx2tag':idx2tag,'idx2word':idx2word,
           'word2idx':word2idx,'tag2idx':tag2idx,'char2idx':char2idx}))

In [28]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))
        
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [29]:
freeze_graph('concat-dependency', strings)

INFO:tensorflow:Restoring parameters from concat-dependency/model.ckpt
INFO:tensorflow:Froze 29 variables.
INFO:tensorflow:Converted 29 variables to const ops.
2135 ops in the final graph.


In [30]:
g = load_graph('concat-dependency/frozen_model.pb')

In [31]:
word_ids = g.get_tensor_by_name('import/Placeholder:0')
char_ids = g.get_tensor_by_name('import/Placeholder_1:0')
tags_seq = g.get_tensor_by_name('import/logits:0')
depends_seq = g.get_tensor_by_name('import/logits_depends:0')
test_sess = tf.InteractiveSession(graph = g)
seq, deps = test_sess.run([tags_seq, depends_seq],
            feed_dict = {
                word_ids: X_seq,
                char_ids: X_char_seq,
            })

print(seq,deps)



[[ 1  4 13  3]] [[3 1 3 4]]


In [33]:
[n.name for n in g.as_graph_def().node][::-1]

['import/logits_depends',
 'import/cond_3/Merge',
 'import/cond_3/ReverseSequence_1',
 'import/cond_3/concat',
 'import/cond_3/concat/axis',
 'import/cond_3/Squeeze_2',
 'import/cond_3/rnn_1/transpose_1',
 'import/cond_3/rnn_1/concat_2',
 'import/cond_3/rnn_1/concat_2/axis',
 'import/cond_3/rnn_1/concat_2/values_0',
 'import/cond_3/rnn_1/range_1',
 'import/cond_3/rnn_1/range_1/delta',
 'import/cond_3/rnn_1/range_1/start',
 'import/cond_3/rnn_1/Rank_1',
 'import/cond_3/rnn_1/TensorArrayStack/TensorArrayGatherV3',
 'import/cond_3/rnn_1/TensorArrayStack/range',
 'import/cond_3/rnn_1/TensorArrayStack/range/delta',
 'import/cond_3/rnn_1/TensorArrayStack/range/start',
 'import/cond_3/rnn_1/TensorArrayStack/TensorArraySizeV3',
 'import/cond_3/rnn_1/while/Exit_2',
 'import/cond_3/rnn_1/while/NextIteration_3',
 'import/cond_3/rnn_1/while/NextIteration_2',
 'import/cond_3/rnn_1/while/NextIteration_1',
 'import/cond_3/rnn_1/while/NextIteration',
 'import/cond_3/rnn_1/while/add_1',
 'import/cond_3

In [51]:
q = g.get_tensor_by_name('import/transitions:0')
w = g.get_tensor_by_name('import/Variable:0')
e = g.get_tensor_by_name('import/depends/transitions:0')

In [52]:
r = test_sess.run([q,w,e])

In [54]:
r[1].shape

(21707, 128)

In [55]:
r[0].shape

(32, 32)

In [59]:
sorted(list(idx2tag.values()))

['PAD',
 'acl',
 'advcl',
 'advmod',
 'amod',
 'appos',
 'aux',
 'case',
 'cc',
 'ccomp',
 'compound',
 'compound:plur',
 'conj',
 'cop',
 'csubj',
 'csubj:pass',
 'dep',
 'det',
 'fixed',
 'flat',
 'iobj',
 'mark',
 'nmod',
 'nsubj',
 'nsubj:pass',
 'nummod',
 'obj',
 'obl',
 'parataxis',
 'punct',
 'root',
 'xcomp']