In [1]:
import tensorflow as tf
from tqdm import tqdm
import numpy as np
import re

In [2]:
with open('id_gsd-ud-train.conllu.txt') as fopen:
    corpus = fopen.read().split('\n')
    
with open('id_gsd-ud-test.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))
    
with open('id_gsd-ud-dev.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))

In [3]:
word2idx = {'PAD': 0,'NUM':1,'UNK':2}
tag2idx = {'PAD': 0}
char2idx = {'PAD': 0,'NUM':1,'UNK':2}
word_idx = 3
tag_idx = 1
char_idx = 3

def process_string(string):
    string = re.sub('[^A-Za-z0-9\-\/ ]+', ' ', string).split()
    return [to_title(y.strip()) for y in string]

def to_title(string):
    if string.isupper():
        string = string.title()
    return string

def process_corpus(corpus, until = None):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    sentences, words, depends, labels, pos = [], [], [], [], []
    temp_sentence, temp_word, temp_depend, temp_label, temp_pos = [], [], [], [], []
    for sentence in corpus:
        if len(sentence):
            if sentence[0] == '#':
                continue
            sentence = sentence.split('\t')
            temp = process_string(sentence[1])
            if not len(temp):
                sentence[1] = 'EMPTY'
            sentence[1] = process_string(sentence[1])[0]
            for c in sentence[1]:
                if c not in char2idx:
                    char2idx[c] = char_idx
                    char_idx += 1
            if sentence[7] not in tag2idx:
                tag2idx[sentence[7]] = tag_idx
                tag_idx += 1
            if sentence[1] not in word2idx:
                word2idx[sentence[1]] = word_idx
                word_idx += 1
            temp_word.append(word2idx[sentence[1]])
            temp_depend.append(int(sentence[6]) + 1)
            temp_label.append(tag2idx[sentence[7]])
            temp_sentence.append(sentence[1])
            temp_pos.append(sentence[3])
        else:
            words.append(temp_word)
            depends.append(temp_depend)
            labels.append(temp_label)
            sentences.append(temp_sentence)
            pos.append(temp_pos)
            temp_word = []
            temp_depend = []
            temp_label = []
            temp_sentence = []
            temp_pos = []
    return sentences[:-1], words[:-1], depends[:-1], labels[:-1], pos[:-1]
        
sentences, words, depends, labels, pos = process_corpus(corpus)

In [4]:
import json

with open('augmented.json') as fopen:
    augmented = json.load(fopen)

In [5]:
def parse_XY(texts):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    outside, sentences = [], []
    for no, text in enumerate(texts):
        s = process_string(text)
        sentences.append(s)
        inside = []
        for w in s:
            for c in w:
                if c not in char2idx:
                    char2idx[c] = char_idx
                    char_idx += 1
            
            if w not in word2idx:
                word2idx[w] = word_idx
                word_idx += 1
                
            inside.append(word2idx[w])
        outside.append(inside)
    return outside, sentences

In [6]:
text_augmented = []
for a in augmented:
    text_augmented.extend(a[0])
    depends.extend(a[1])
    labels.extend(a[2])

In [7]:
outside, new_sentences = parse_XY(text_augmented)

In [8]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [9]:
words.extend(outside)
sentences.extend(new_sentences)

In [10]:
len(words), len(depends), len(labels), len(sentences)

(50365, 50365, 50365, 50365)

In [11]:
def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k][:maxlen][::-1]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

In [12]:
idx2word = {idx: tag for tag, idx in word2idx.items()}
idx2tag = {i: w for w, i in tag2idx.items()}
char = generate_char_seq(sentences)

In [13]:
words = pad_sequences(words,padding='post')
depends = pad_sequences(depends,padding='post')
labels = pad_sequences(labels,padding='post')
words.shape

(50365, 189)

In [14]:
from sklearn.cross_validation import train_test_split
train_X, test_X, train_Y, test_Y, train_depends, test_depends, train_char, test_char = train_test_split(
                                                                           words,
                                                                           labels,
                                                                           depends,
                                                                           char,
                                                                           test_size=0.1)



In [15]:
class Model:
    def __init__(
        self,
        dim_word,
        dim_char,
        dropout,
        learning_rate,
        hidden_size_char,
        hidden_size_word,
        num_layers,
        maxlen
    ):
        def cells(size, reuse = False):
            return tf.contrib.rnn.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(
                    size,
                    initializer = tf.orthogonal_initializer(),
                    reuse = reuse,
                ),
                output_keep_prob = dropout,
            )

        def luong(embedded, size):
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                num_units = hidden_size_word, memory = embedded
            )
            return tf.contrib.seq2seq.AttentionWrapper(
                cell = cells(hidden_size_word),
                attention_mechanism = attention_mechanism,
                attention_layer_size = hidden_size_word,
            )
        self.word_ids = tf.placeholder(tf.int32, shape = [None, None])
        self.char_ids = tf.placeholder(tf.int32, shape = [None, None, None])
        self.labels = tf.placeholder(tf.int32, shape = [None, None])
        self.depends = tf.placeholder(tf.int32, shape = [None, None])
        self.maxlen = tf.shape(self.word_ids)[1]
        self.lengths = tf.count_nonzero(self.word_ids, 1)

        self.word_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(word2idx), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        self.char_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(char2idx), dim_char], stddev = 1.0 / np.sqrt(dim_char)
            )
        )

        word_embedded = tf.nn.embedding_lookup(
            self.word_embeddings, self.word_ids
        )
        char_embedded = tf.nn.embedding_lookup(
            self.char_embeddings, self.char_ids
        )
        s = tf.shape(char_embedded)
        char_embedded = tf.reshape(
            char_embedded, shape = [s[0] * s[1], s[-2], dim_char]
        )
        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_char),
                cell_bw = cells(hidden_size_char),
                inputs = char_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_char_%d' % (n),
            )
            char_embedded = tf.concat((out_fw, out_bw), 2)
        output = tf.reshape(
            char_embedded[:, -1], shape = [s[0], s[1], 2 * hidden_size_char]
        )
        word_embedded = tf.concat([word_embedded, output], axis = -1)

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = luong(word_embedded, hidden_size_word),
                cell_bw = luong(word_embedded, hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (n),
            )
            word_embedded = tf.concat((out_fw, out_bw), 2)

        logits = tf.layers.dense(word_embedded, len(idx2tag))
        tag_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(idx2tag), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        logits_max = tf.argmax(logits,axis=2,output_type=tf.int32)
        lookup_logits = tf.nn.embedding_lookup(
            tag_embeddings, logits_max
        )
        (out_fw, out_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_word),
                cell_bw = cells(hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (n),
            )
        
        cast_mask = tf.cast(tf.sequence_mask(self.lengths + 1, maxlen = maxlen), dtype = tf.float32)
        cast_mask = tf.tile(tf.expand_dims(cast_mask,axis=1),[1,self.maxlen,1]) * 10
        
        lookup_logits = tf.concat((out_fw, out_bw), 2)
        logits_depends = tf.layers.dense(lookup_logits, maxlen)
        
        logits_depends = tf.multiply(logits_depends, cast_mask)
        
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            logits, self.labels, self.lengths
        )
        with tf.variable_scope("depends"):
            log_likelihood_depends, transition_params_depends = tf.contrib.crf.crf_log_likelihood(
                logits_depends, self.depends, self.lengths
            )
        self.cost = tf.reduce_mean(-log_likelihood) + tf.reduce_mean(-log_likelihood_depends)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate = learning_rate
        ).minimize(self.cost)
        
        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)
        
        self.tags_seq, _ = tf.contrib.crf.crf_decode(
            logits, transition_params, self.lengths
        )
        self.tags_seq = tf.identity(self.tags_seq, name = 'logits')
        
        self.tags_seq_depends, _ = tf.contrib.crf.crf_decode(
            logits_depends, transition_params_depends, self.lengths
        )
        self.tags_seq_depends = tf.identity(self.tags_seq_depends, name = 'logits_depends')

        self.prediction = tf.boolean_mask(self.tags_seq, mask)
        mask_label = tf.boolean_mask(self.labels, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
        self.prediction = tf.boolean_mask(self.tags_seq_depends, mask)
        mask_label = tf.boolean_mask(self.depends, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy_depends = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [16]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

dim_word = 128
dim_char = 256
dropout = 0.85
learning_rate = 1e-3
hidden_size_char = 128
hidden_size_word = 64
num_layers = 2
batch_size = 16

model = Model(dim_word,dim_char,dropout,learning_rate,hidden_size_char,hidden_size_word,num_layers,
             words.shape[1])
sess.run(tf.global_variables_initializer())


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [17]:
import time

for e in range(10):
    lasttime = time.time()
    train_acc, train_loss, test_acc, test_loss, train_acc_depends, test_acc_depends = 0, 0, 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = train_X[i : min(i + batch_size, train_X.shape[0])]
        batch_char = train_char[i : min(i + batch_size, train_X.shape[0])]
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_depends = train_depends[i : min(i + batch_size, train_X.shape[0])]
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        train_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
        batch_char = test_char[i : min(i + batch_size, test_X.shape[0])]
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_depends = test_depends[i : min(i + batch_size, test_X.shape[0])]
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y,
                model.depends: batch_depends
            },
        )
        assert not np.isnan(cost)
        test_loss += cost
        test_acc += acc
        test_acc_depends += acc_depends
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    train_acc_depends /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size
    test_acc_depends /= len(test_X) / batch_size

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
        % (e, train_loss, train_acc, train_acc_depends, test_loss, test_acc, test_acc_depends)
    )

train minibatch loop: 100%|██████████| 2833/2833 [1:12:52<00:00,  1.54s/it, accuracy=0.875, accuracy_depends=0.693, cost=24.4]
test minibatch loop: 100%|██████████| 315/315 [03:10<00:00,  1.75it/s, accuracy=0.829, accuracy_depends=0.665, cost=29]  
train minibatch loop:   0%|          | 0/2833 [00:00<?, ?it/s]

time taken: 4563.180447816849
epoch: 0, training loss: 61.783961, training acc: 0.686706, training depends: 0.425950, valid loss: 39.344299, valid acc: 0.825150, valid depends: 0.600830



train minibatch loop: 100%|██████████| 2833/2833 [1:12:42<00:00,  1.54s/it, accuracy=0.922, accuracy_depends=0.851, cost=13.2]
test minibatch loop: 100%|██████████| 315/315 [03:09<00:00,  1.74it/s, accuracy=0.894, accuracy_depends=0.8, cost=16]    
train minibatch loop:   0%|          | 0/2833 [00:00<?, ?it/s]

time taken: 4551.800651788712
epoch: 1, training loss: 28.065757, training acc: 0.868640, training depends: 0.720747, valid loss: 24.269384, valid acc: 0.883178, valid depends: 0.773814



train minibatch loop: 100%|██████████| 2833/2833 [1:12:40<00:00,  1.54s/it, accuracy=0.973, accuracy_depends=0.939, cost=5.88]
test minibatch loop: 100%|██████████| 315/315 [03:09<00:00,  1.75it/s, accuracy=0.935, accuracy_depends=0.914, cost=9.33]
train minibatch loop:   0%|          | 0/2833 [00:00<?, ?it/s]

time taken: 4549.277266025543
epoch: 2, training loss: 16.749653, training acc: 0.919367, training depends: 0.835569, valid loss: 17.442899, valid acc: 0.911510, valid depends: 0.851503



train minibatch loop: 100%|██████████| 2833/2833 [1:12:38<00:00,  1.54s/it, accuracy=0.983, accuracy_depends=0.949, cost=3.36]
test minibatch loop: 100%|██████████| 315/315 [03:09<00:00,  1.74it/s, accuracy=0.939, accuracy_depends=0.935, cost=6.91]
train minibatch loop:   0%|          | 0/2833 [00:00<?, ?it/s]

time taken: 4547.3607811927795
epoch: 3, training loss: 11.579865, training acc: 0.945103, training depends: 0.885029, valid loss: 14.599322, valid acc: 0.926866, valid depends: 0.878660



train minibatch loop: 100%|██████████| 2833/2833 [1:12:39<00:00,  1.53s/it, accuracy=0.99, accuracy_depends=0.956, cost=3.35] 
test minibatch loop: 100%|██████████| 315/315 [03:09<00:00,  1.73it/s, accuracy=0.955, accuracy_depends=0.959, cost=6.04]
train minibatch loop:   0%|          | 0/2833 [00:00<?, ?it/s]

time taken: 4548.768400430679
epoch: 4, training loss: 7.585491, training acc: 0.963065, training depends: 0.925449, valid loss: 12.118104, valid acc: 0.938473, valid depends: 0.908994



train minibatch loop: 100%|██████████| 2833/2833 [1:12:38<00:00,  1.54s/it, accuracy=0.997, accuracy_depends=0.959, cost=1.85] 
test minibatch loop: 100%|██████████| 315/315 [03:09<00:00,  1.75it/s, accuracy=0.935, accuracy_depends=0.943, cost=6.83]
train minibatch loop:   0%|          | 0/2833 [00:00<?, ?it/s]

time taken: 4548.058335542679
epoch: 5, training loss: 6.261957, training acc: 0.971101, training depends: 0.938693, valid loss: 11.085619, valid acc: 0.946185, valid depends: 0.923939



train minibatch loop: 100%|██████████| 2833/2833 [1:12:43<00:00,  1.55s/it, accuracy=0.99, accuracy_depends=0.973, cost=1.6]   
test minibatch loop: 100%|██████████| 315/315 [03:09<00:00,  1.73it/s, accuracy=0.951, accuracy_depends=0.955, cost=5.28]
train minibatch loop:   0%|          | 0/2833 [00:00<?, ?it/s]

time taken: 4553.394236087799
epoch: 6, training loss: 4.545035, training acc: 0.978248, training depends: 0.955501, valid loss: 10.506786, valid acc: 0.949097, valid depends: 0.932812



train minibatch loop: 100%|██████████| 2833/2833 [1:12:42<00:00,  1.54s/it, accuracy=0.997, accuracy_depends=0.976, cost=1.69] 
test minibatch loop: 100%|██████████| 315/315 [03:09<00:00,  1.73it/s, accuracy=0.951, accuracy_depends=0.959, cost=4.65]
train minibatch loop:   0%|          | 0/2833 [00:00<?, ?it/s]

time taken: 4552.490092277527
epoch: 7, training loss: 3.915957, training acc: 0.982050, training depends: 0.961163, valid loss: 10.037659, valid acc: 0.952602, valid depends: 0.937374



train minibatch loop: 100%|██████████| 2833/2833 [1:12:38<00:00,  1.54s/it, accuracy=0.983, accuracy_depends=0.929, cost=4.88] 
test minibatch loop: 100%|██████████| 315/315 [03:09<00:00,  1.72it/s, accuracy=0.931, accuracy_depends=0.922, cost=9.41]
train minibatch loop:   0%|          | 0/2833 [00:00<?, ?it/s]

time taken: 4547.812601804733
epoch: 8, training loss: 3.559482, training acc: 0.984057, training depends: 0.964962, valid loss: 17.300933, valid acc: 0.924944, valid depends: 0.872288



train minibatch loop: 100%|██████████| 2833/2833 [1:12:46<00:00,  1.54s/it, accuracy=1, accuracy_depends=0.99, cost=1.06]      
test minibatch loop: 100%|██████████| 315/315 [03:09<00:00,  1.75it/s, accuracy=0.947, accuracy_depends=0.988, cost=2.97]

time taken: 4555.395034074783
epoch: 9, training loss: 3.126417, training acc: 0.985561, training depends: 0.969058, valid loss: 9.102047, valid acc: 0.957782, valid depends: 0.949280






In [18]:
def pred2label(pred):
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            out_i.append(idx2tag[p])
        out.append(out_i)
    return out

In [19]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
            },
)

In [20]:
real_Y, predict_Y, real_depends, predict_depends = [], [], [], []

pbar = tqdm(
    range(0, len(test_X), batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
    batch_char = test_char[i : min(i + batch_size, test_X.shape[0])]
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    batch_depends = test_depends[i : min(i + batch_size, test_X.shape[0])]
    seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
            },
    )
    predicted = pred2label(seq)
    real = pred2label(batch_y)
    predict_Y.extend(predicted)
    real_Y.extend(real)
    
    real_depends.extend(batch_depends.tolist())
    predict_depends.extend(deps.tolist())

validation minibatch loop: 100%|██████████| 315/315 [03:04<00:00,  1.75it/s]


In [21]:
from sklearn.metrics import classification_report
print(classification_report(np.array(real_Y).ravel(), np.array(predict_Y).ravel(), digits = 4))

               precision    recall  f1-score   support

          PAD     1.0000    1.0000    1.0000    840905
          acl     0.9249    0.9392    0.9320      3094
        advcl     0.8952    0.8478    0.8709      1209
       advmod     0.9629    0.9475    0.9551      4952
         amod     0.9288    0.9246    0.9267      4218
        appos     0.9535    0.9204    0.9367      2426
          aux     1.0000    1.0000    1.0000         1
         case     0.9796    0.9795    0.9796     10991
           cc     0.9686    0.9739    0.9713      3298
        ccomp     0.8426    0.8501    0.8463       447
     compound     0.9170    0.9477    0.9321      6787
compound:plur     0.9428    0.9744    0.9584       626
         conj     0.9539    0.9581    0.9560      4251
          cop     0.9625    0.9809    0.9716       993
        csubj     0.9655    0.8750    0.9180        32
   csubj:pass     1.0000    0.9167    0.9565        12
          dep     0.8905    0.8320    0.8603       518
         

In [22]:
from sklearn.metrics import classification_report
print(classification_report(np.array(real_depends).ravel(), 
                            np.array(predict_depends).ravel(), digits = 4))

             precision    recall  f1-score   support

          0     1.0000    1.0000    1.0000    840905
          1     0.9709    0.9726    0.9717      5037
          2     0.9310    0.9534    0.9420      4271
          3     0.9543    0.9485    0.9514      5148
          4     0.9587    0.9514    0.9551      6220
          5     0.9471    0.9631    0.9550      5984
          6     0.9593    0.9585    0.9589      5827
          7     0.9597    0.9554    0.9576      5789
          8     0.9657    0.9527    0.9592      5559
          9     0.9548    0.9517    0.9532      5088
         10     0.9565    0.9492    0.9528      4427
         11     0.9458    0.9631    0.9544      4280
         12     0.9584    0.9540    0.9562      3910
         13     0.9481    0.9586    0.9533      3791
         14     0.9385    0.9563    0.9473      3272
         15     0.9577    0.9389    0.9482      3306
         16     0.9383    0.9560    0.9471      3023
         17     0.9629    0.9417    0.9522   

In [23]:
string = 'tolong tangkap gambar kami'

def char_str_idx(corpus, dic, UNK = 0):
    maxlen = max([len(i) for i in corpus])
    X = np.zeros((len(corpus), maxlen))
    for i in range(len(corpus)):
        for no, k in enumerate(corpus[i][:maxlen]):
            val = dic[k] if k in dic else UNK
            X[i, no] = val
    return X

def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k][::-1]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

sequence = process_string(string)
sequence

['tolong', 'tangkap', 'gambar', 'kami']

In [24]:
X_seq = char_str_idx([sequence], word2idx, 2)
X_char_seq = generate_char_seq([sequence])

In [25]:
X_char_seq.shape

(1, 4, 7)

In [26]:
seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],
        feed_dict={model.word_ids:X_seq,
                  model.char_ids:X_char_seq})

In [27]:
deps[0] - 1

array([2, 0, 2, 1], dtype=int32)

In [28]:
[idx2tag[i] for i in seq[0]]

['parataxis', 'compound', 'compound', 'det']

In [29]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'luong-dependency/model.ckpt')

strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'logits_depends' in n.name
        or 'alphas' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'OptimizeLoss' not in n.name
        and 'Global_Step' not in n.name
        and 'Epoch_Step' not in n.name
        and 'learning_rate' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'Placeholder_3',
 'Variable',
 'Variable_1',
 'bidirectional_rnn_char_0/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/fw/lstm_cell/bias',
 'bidirectional_rnn_char_0/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/bw/lstm_cell/bias',
 'bidirectional_rnn_char_1/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/fw/lstm_cell/bias',
 'bidirectional_rnn_char_1/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/bw/lstm_cell/bias',
 'memory_layer/kernel',
 'memory_layer_1/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/lstm_cell/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/lstm_cell/bias',
 'bidirectional_rnn_word_0/fw/attention_wrapper/attention_layer/kernel',
 'bidirectional_rnn_word_0/bw/attention_wrapper/lstm_cell/kernel',
 'bidirectional_rnn_word_0/bw/attention_wrapper/lstm_cell/bias',
 'bidirectional_rnn_word_0/bw/attention_wrapper/attention_layer/kernel',
 'memory_layer_2/kernel',
 'memory_layer_3/kernel',
 'bidi

In [30]:
import json
with open('luong-dependency.json','w') as fopen:
    fopen.write(json.dumps({'idx2tag':idx2tag,'idx2word':idx2word,
           'word2idx':word2idx,'tag2idx':tag2idx,'char2idx':char2idx}))

In [31]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))
        
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [32]:
freeze_graph('luong-dependency', strings)

INFO:tensorflow:Restoring parameters from luong-dependency/model.ckpt
INFO:tensorflow:Froze 37 variables.
INFO:tensorflow:Converted 37 variables to const ops.
2491 ops in the final graph.


In [33]:
g = load_graph('luong-dependency/frozen_model.pb')

In [38]:
word_ids = g.get_tensor_by_name('import/Placeholder:0')
char_ids = g.get_tensor_by_name('import/Placeholder_1:0')
tags_seq = g.get_tensor_by_name('import/logits:0')
depends_seq = g.get_tensor_by_name('import/logits_depends:0')
test_sess = tf.InteractiveSession(graph = g)
seq, deps = test_sess.run([tags_seq, depends_seq],
            feed_dict = {
                word_ids: X_seq,
                char_ids: X_char_seq,
            })

print(seq,deps - 1)
[idx2tag[i] for i in seq[0]]

[[ 1 13 13  3]] [[3 1 0 1]]


['nsubj', 'compound', 'compound', 'det']