In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [2]:
with open('../Malaya-Dataset/dependency/gsd-ud-train.conllu.txt') as fopen:
    corpus = fopen.read().split('\n')
    
with open('../Malaya-Dataset/dependency/gsd-ud-test.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))
    
with open('../Malaya-Dataset/dependency/gsd-ud-dev.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))

In [3]:
import xlnet
import numpy as np
import tensorflow as tf
from tqdm import tqdm
import model_utils
import pickle
import json
pad_sequences = tf.keras.preprocessing.sequence.pad_sequences




In [8]:
import sentencepiece as spm
from prepro_utils import preprocess_text, encode_ids

sp_model = spm.SentencePieceProcessor()
sp_model.Load('alxlnet-base/sp10m.cased.v9.model')

def tokenize_fn(text):
    text = preprocess_text(text, lower= False)
    return encode_ids(sp_model, text)

In [9]:
SEG_ID_A   = 0
SEG_ID_B   = 1
SEG_ID_CLS = 2
SEG_ID_SEP = 3
SEG_ID_PAD = 4

special_symbols = {
    "<unk>"  : 0,
    "<s>"    : 1,
    "</s>"   : 2,
    "<cls>"  : 3,
    "<sep>"  : 4,
    "<pad>"  : 5,
    "<mask>" : 6,
    "<eod>"  : 7,
    "<eop>"  : 8,
}

VOCAB_SIZE = 32000
UNK_ID = special_symbols["<unk>"]
CLS_ID = special_symbols["<cls>"]
SEP_ID = special_symbols["<sep>"]
MASK_ID = special_symbols["<mask>"]
EOD_ID = special_symbols["<eod>"]

In [10]:
tag2idx = {'PAD': 0, 'X': 1}
tag_idx = 2

def process_corpus(corpus, until = None):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    sentences, words, depends, labels, pos, sequences = [], [], [], [], [], []
    temp_sentence, temp_word, temp_depend, temp_label, temp_pos = [], [], [], [], []
    segments, masks = [], []
    first_time = True
    for sentence in corpus:
        try:
            if len(sentence):
                if sentence[0] == '#':
                    continue
                if first_time:
                    print(sentence)
                    first_time = False
                sentence = sentence.split('\t')
                if sentence[7] not in tag2idx:
                    tag2idx[sentence[7]] = tag_idx
                    tag_idx += 1
                temp_word.append(sentence[1])
                temp_depend.append(int(sentence[6]) + 1)
                temp_label.append(tag2idx[sentence[7]])
                temp_sentence.append(sentence[1])
                temp_pos.append(sentence[3])
            else:
                if len(temp_sentence) < 2 or len(temp_word) != len(temp_label):
                    temp_word = []
                    temp_depend = []
                    temp_label = []
                    temp_sentence = []
                    temp_pos = []
                    continue
                bert_tokens = []
                labels_ = []
                depends_ = []
                seq_ = []
                for no, orig_token in enumerate(temp_word):
                    labels_.append(temp_label[no])
                    depends_.append(temp_depend[no])
                    t = tokenize_fn(orig_token)
                    bert_tokens.extend(t)
                    labels_.extend([1] * (len(t) - 1))
                    depends_.extend([0] * (len(t) - 1))
                    seq_.append(no + 1)
                bert_tokens.extend([4, 3])
                labels_.extend([0, 0])
                depends_.extend([0, 0])
                segment = [0] * (len(bert_tokens) - 1) + [SEG_ID_CLS]
                input_mask = [0] * len(segment)
                words.append(bert_tokens)
                depends.append(depends_)
                labels.append(labels_)
                sentences.append(bert_tokens)
                pos.append(temp_pos)
                sequences.append(seq_)
                segments.append(segment)
                masks.append(input_mask)
                temp_word = []
                temp_depend = []
                temp_label = []
                temp_sentence = []
                temp_pos = []
        except Exception as e:
            print(e, sentence)
    return sentences[:-1], words[:-1], depends[:-1], labels[:-1], pos[:-1], sequences[:-1], segments[:-1], masks[:-1]

In [11]:
sentences, words, depends, labels, _, _, segments, masks = process_corpus(corpus)

1	Sembungan	sembungan	PROPN	X--	_	4	nsubj	_	MorphInd=^sembungan<x>_X--$


In [12]:
import json

with open('../Malaya-Dataset/dependency/augmented-dependency.json') as fopen:
    augmented = json.load(fopen)

In [13]:
text_augmented, depends_augmented, labels_augmented = [], [], []

for a in augmented:
    text_augmented.extend(a[0])
    depends_augmented.extend(a[1])
    labels_augmented.extend((np.array(a[2]) + 1).tolist())

In [14]:
def parse_XY(texts, depends, labels):
    outside, sentences, outside_depends, outside_labels = [], [], [], []
    segments, masks = [], []
    for no, text in enumerate(texts):
        temp_depend = depends[no]
        temp_label = labels[no]
        s = text.split()
        sentences.append(s)
        bert_tokens = []
        labels_ = []
        depends_ = []
        for no, orig_token in enumerate(s):
            labels_.append(temp_label[no])
            depends_.append(temp_depend[no])
            t = tokenize_fn(orig_token)
            bert_tokens.extend(t)
            labels_.extend([1] * (len(t) - 1))
            depends_.extend([0] * (len(t) - 1))
        bert_tokens.extend([4, 3])
        labels_.extend([0, 0])
        depends_.extend([0, 0])
        segment = [0] * (len(bert_tokens) - 1) + [SEG_ID_CLS]
        input_mask = [0] * len(segment)
        outside.append(bert_tokens)
        outside_depends.append(depends_)
        outside_labels.append(labels_)
        segments.append(segment)
        masks.append(input_mask)
    return outside, sentences, outside_depends, outside_labels, segments, masks

In [15]:
outside, _, outside_depends, outside_labels, outside_segments, outside_masks = parse_XY(text_augmented, 
                                                       depends_augmented, 
                                                       labels_augmented)

In [16]:
words.extend(outside)
depends.extend(outside_depends)
labels.extend(outside_labels)
segments.extend(outside_segments)
masks.extend(outside_masks)

In [17]:
idx2tag = {v:k for k, v in tag2idx.items()}
idx2tag

{0: 'PAD',
 1: 'X',
 2: 'nsubj',
 3: 'cop',
 4: 'det',
 5: 'root',
 6: 'nsubj:pass',
 7: 'acl',
 8: 'case',
 9: 'obl',
 10: 'flat',
 11: 'punct',
 12: 'appos',
 13: 'amod',
 14: 'compound',
 15: 'advmod',
 16: 'cc',
 17: 'obj',
 18: 'conj',
 19: 'mark',
 20: 'advcl',
 21: 'nmod',
 22: 'nummod',
 23: 'dep',
 24: 'xcomp',
 25: 'ccomp',
 26: 'parataxis',
 27: 'compound:plur',
 28: 'fixed',
 29: 'aux',
 30: 'csubj',
 31: 'iobj',
 32: 'csubj:pass'}

In [18]:
from sklearn.model_selection import train_test_split

words_train, words_test, depends_train, depends_test, labels_train, labels_test, \
segments_train, segments_test, masks_train, masks_test \
= train_test_split(words, depends, labels, segments, masks, test_size = 0.2)

In [19]:
train_X = words_train
train_Y = labels_train
train_depends = depends_train

test_X = words_test
test_Y = labels_test
test_depends = depends_test

In [21]:
import xlnet
import tensorflow as tf
import numpy as np

kwargs = dict(
      is_training=True,
      use_tpu=False,
      use_bfloat16=False,
      dropout=0.1,
      dropatt=0.1,
      init='normal',
      init_range=0.1,
      init_std=0.05,
      clamp_len=-1)

xlnet_parameters = xlnet.RunConfig(**kwargs)
xlnet_config = xlnet.XLNetConfig(json_path='alxlnet-base/config.json')

In [22]:
epoch = 15
batch_size = 16
warmup_proportion = 0.1
num_train_steps = int(len(train_X) / batch_size * epoch)
num_warmup_steps = int(num_train_steps * warmup_proportion)
print(num_train_steps, num_warmup_steps)

training_parameters = dict(
      decay_method = 'poly',
      train_steps = num_train_steps,
      learning_rate = 2e-5,
      warmup_steps = num_warmup_steps,
      min_lr_ratio = 0.0,
      weight_decay = 0.00,
      adam_epsilon = 1e-8,
      num_core_per_host = 1,
      lr_layer_decay_rate = 1,
      use_tpu=False,
      use_bfloat16=False,
      dropout=0.0,
      dropatt=0.0,
      init='normal',
      init_range=0.1,
      init_std=0.02,
      clip = 1.0,
      clamp_len=-1,)

37770 3777


In [23]:
class Parameter:
    def __init__(self, decay_method, warmup_steps, weight_decay, adam_epsilon, 
                num_core_per_host, lr_layer_decay_rate, use_tpu, learning_rate, train_steps,
                min_lr_ratio, clip, **kwargs):
        self.decay_method = decay_method
        self.warmup_steps = warmup_steps
        self.weight_decay = weight_decay
        self.adam_epsilon = adam_epsilon
        self.num_core_per_host = num_core_per_host
        self.lr_layer_decay_rate = lr_layer_decay_rate
        self.use_tpu = use_tpu
        self.learning_rate = learning_rate
        self.train_steps = train_steps
        self.min_lr_ratio = min_lr_ratio
        self.clip = clip
        
training_parameters = Parameter(**training_parameters)

In [24]:
class BiAAttention:
    def __init__(self, input_size_encoder, input_size_decoder, num_labels):
        self.input_size_encoder = input_size_encoder
        self.input_size_decoder = input_size_decoder
        self.num_labels = num_labels
        
        self.W_d = tf.get_variable("W_d", shape=[self.num_labels, self.input_size_decoder],
           initializer=tf.contrib.layers.xavier_initializer())
        self.W_e = tf.get_variable("W_e", shape=[self.num_labels, self.input_size_encoder],
           initializer=tf.contrib.layers.xavier_initializer())
        self.U = tf.get_variable("U", shape=[self.num_labels, self.input_size_decoder, self.input_size_encoder],
           initializer=tf.contrib.layers.xavier_initializer())
        
    def forward(self, input_d, input_e, mask_d=None, mask_e=None):
        batch = tf.shape(input_d)[0]
        length_decoder = tf.shape(input_d)[1]
        length_encoder = tf.shape(input_e)[1]
        out_d = tf.expand_dims(tf.matmul(self.W_d, tf.transpose(input_d, [0, 2, 1])), 3)
        out_e = tf.expand_dims(tf.matmul(self.W_e, tf.transpose(input_e, [0, 2, 1])), 2)
        output = tf.matmul(tf.expand_dims(input_d, 1), self.U)
        output = tf.matmul(output, tf.transpose(tf.expand_dims(input_e, 1), [0, 1, 3, 2]))
        
        output = output + out_d + out_e
        
        if mask_d is not None:
            d = tf.expand_dims(tf.expand_dims(mask_d, 1), 3)
            e = tf.expand_dims(tf.expand_dims(mask_e, 1), 2)
            output = output * d * e
            
        return output
    
class BiLinear:
    def __init__(self, left_features, right_features, out_features):
        self.left_features = left_features
        self.right_features = right_features
        self.out_features = out_features
        
        self.U = tf.get_variable("U-bi", shape=[out_features, left_features, right_features],
           initializer=tf.contrib.layers.xavier_initializer())
        self.W_l = tf.get_variable("Wl", shape=[out_features, left_features],
           initializer=tf.contrib.layers.xavier_initializer())
        self.W_r = tf.get_variable("Wr", shape=[out_features, right_features],
           initializer=tf.contrib.layers.xavier_initializer())
    
    def forward(self, input_left, input_right):
        left_size = tf.shape(input_left)
        output_shape = tf.concat([left_size[:-1], [self.out_features]], axis = 0)
        batch = tf.cast(tf.reduce_prod(left_size[:-1]), tf.int32)
        input_left = tf.reshape(input_left, (batch, self.left_features))
        input_right = tf.reshape(input_right, (batch, self.right_features))
        tiled = tf.tile(tf.expand_dims(input_left, axis = 0), (self.out_features,1,1))
        output = tf.transpose(tf.reduce_sum(tf.matmul(tiled, self.U), axis = 2))
        output = output + tf.matmul(input_left, tf.transpose(self.W_l))\
        + tf.matmul(input_right, tf.transpose(self.W_r))
        
        return tf.reshape(output, output_shape)


    
    def decode(self, input_word, input_char, mask, leading_symbolic=0):
        out_arc, out_type, _ = self.forward(input_word, input_char, mask)
        batch = tf.shape(out_arc)[0]
        max_len = tf.shape(out_arc)[1]
        sec_max_len = tf.shape(out_arc)[2]
        out_arc = out_arc + tf.linalg.diag(tf.fill([max_len], -np.inf))
        minus_mask = tf.expand_dims(tf.cast(1 - mask, tf.bool), axis = 2)
        minus_mask = tf.tile(minus_mask, [1, 1, sec_max_len])
        out_arc = tf.where(minus_mask, tf.fill(tf.shape(out_arc), -np.inf), out_arc)
        heads = tf.argmax(out_arc, axis = 1)
        type_h, type_c = out_type
        batch = tf.shape(type_h)[0]
        max_len = tf.shape(type_h)[1]
        batch_index = tf.range(0, batch)
        t = tf.cast(tf.transpose(heads), tf.int32)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        type_h = tf.gather_nd(type_h, concatenated)
        out_type = self.bilinear.forward(type_h, type_c)
        out_type = out_type[:, :, leading_symbolic:]
        types = tf.argmax(out_type, axis = 2)
        return heads, types
    
class Model:
    def __init__(
        self,
        learning_rate,
        hidden_size_word,
        cov = 0.0):
        
        self.words = tf.placeholder(tf.int32, (None, None))
        self.segment_ids = tf.placeholder(tf.int32, [None, None])
        self.input_masks = tf.placeholder(tf.float32, [None, None])
        self.heads = tf.placeholder(tf.int32, (None, None))
        self.types = tf.placeholder(tf.int32, (None, None))
        self.switch = tf.placeholder(tf.bool, None)
        self.mask = tf.cast(tf.math.not_equal(self.words, 0), tf.float32)
        self.maxlen = tf.shape(self.words)[1]
        self.lengths = tf.count_nonzero(self.words, 1)
        mask = self.mask
        heads = self.heads
        types = self.types
        
        self.arc_h = tf.layers.Dense(hidden_size_word)
        self.arc_c = tf.layers.Dense(hidden_size_word)
        self.attention = BiAAttention(hidden_size_word, hidden_size_word, 1)

        self.type_h = tf.layers.Dense(hidden_size_word)
        self.type_c = tf.layers.Dense(hidden_size_word)
        self.bilinear = BiLinear(hidden_size_word, hidden_size_word, len(tag2idx))
        
        xlnet_model = xlnet.XLNetModel(
            xlnet_config=xlnet_config,
            run_config=xlnet_parameters,
            input_ids=tf.transpose(self.words, [1, 0]),
            seg_ids=tf.transpose(self.segment_ids, [1, 0]),
            input_mask=tf.transpose(self.input_masks, [1, 0]))
        output_layer = xlnet_model.get_sequence_output()
        output_layer = tf.transpose(output_layer, [1, 0, 2])
        
        arc_h = tf.nn.elu(self.arc_h(output_layer))
        arc_c = tf.nn.elu(self.arc_c(output_layer))
        
        type_h = tf.nn.elu(self.type_h(output_layer))
        type_c = tf.nn.elu(self.type_c(output_layer))
        
        out_arc = tf.squeeze(self.attention.forward(arc_h, arc_c, mask_d=self.mask, 
                                                    mask_e=self.mask), axis = 1)
        
        batch = tf.shape(out_arc)[0]
        max_len = tf.shape(out_arc)[1]
        sec_max_len = tf.shape(out_arc)[2]
        batch_index = tf.range(0, batch)
        
        decode_arc = out_arc + tf.linalg.diag(tf.fill([max_len], -np.inf))
        minus_mask = tf.expand_dims(tf.cast(1 - mask, tf.bool), axis = 2)
        minus_mask = tf.tile(minus_mask, [1, 1, sec_max_len])
        decode_arc = tf.where(minus_mask, tf.fill(tf.shape(decode_arc), -np.inf), decode_arc)
        self.heads_seq = tf.argmax(decode_arc, axis = 1)
        self.heads_seq = tf.identity(self.heads_seq, name = 'heads_seq')
        
        t = tf.cast(tf.transpose(self.heads_seq), tf.int32)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        type_h = tf.gather_nd(type_h, concatenated)
        out_type = self.bilinear.forward(type_h, type_c)
        self.tags_seq = tf.argmax(out_type, axis = 2)
        self.tags_seq = tf.identity(self.tags_seq, name = 'tags_seq')
        
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            out_type, self.types, self.lengths
        )
        crf_loss = tf.reduce_mean(-log_likelihood)
        self.logits, _ = tf.contrib.crf.crf_decode(
            out_type, transition_params, self.lengths
        )
        self.logits = tf.identity(self.logits, name = 'logits')
        
        batch = tf.shape(out_arc)[0]
        max_len = tf.shape(out_arc)[1]
        batch_index = tf.range(0, batch)
        t = tf.transpose(heads)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        type_h = tf.gather_nd(type_h, concatenated)
        out_type = self.bilinear.forward(type_h, type_c)
        minus_inf = -1e8
        minus_mask = (1 - mask) * minus_inf
        out_arc = out_arc + tf.expand_dims(minus_mask, axis = 2) + tf.expand_dims(minus_mask, axis = 1)
        loss_arc = tf.nn.log_softmax(out_arc, dim=1)
        loss_type = tf.nn.log_softmax(out_type, dim=2)
        loss_arc = loss_arc * tf.expand_dims(mask, axis = 2) * tf.expand_dims(mask, axis = 1)
        loss_type = loss_type * tf.expand_dims(mask, axis = 2)
        num = tf.reduce_sum(mask) - tf.cast(batch, tf.float32)
        child_index = tf.tile(tf.expand_dims(tf.range(0, max_len), 1), [1, batch])
        t = tf.transpose(heads)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0),
                                               tf.expand_dims(t, axis = 0),
                                               tf.expand_dims(child_index, axis = 0)], axis = 0))
        loss_arc = tf.gather_nd(loss_arc, concatenated)
        loss_arc = tf.transpose(loss_arc, [1, 0])
        
        t = tf.transpose(types)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0),
                                               tf.expand_dims(child_index, axis = 0),
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        loss_type = tf.gather_nd(loss_type, concatenated)
        loss_type = tf.transpose(loss_type, [1, 0])
        cost = (tf.reduce_sum(-loss_arc) / num) + (tf.reduce_sum(-loss_type) / num)
        
        self.cost = tf.cond(self.switch, lambda: cost + crf_loss, lambda: cost)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate = learning_rate
        ).minimize(self.cost)
        
        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)
        
        self.prediction = tf.boolean_mask(self.logits, mask)
        mask_label = tf.boolean_mask(self.types, mask)
        correct_pred = tf.equal(tf.cast(self.prediction, tf.int32), mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
        self.prediction = tf.cast(tf.boolean_mask(self.heads_seq, mask), tf.int32)
        mask_label = tf.boolean_mask(self.heads, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy_depends = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [25]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

learning_rate = 2e-5
hidden_size_word = 128

model = Model(learning_rate, hidden_size_word)
sess.run(tf.global_variables_initializer())

Instructions for updating:
reduction_indices is deprecated, use axis instead
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.




INFO:tensorflow:memory input None
INFO:tensorflow:Use float type <dtype: 'float32'>
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
dim is deprecated, use axis instead


In [26]:
import collections
import re

def get_assignment_map_from_checkpoint(tvars, init_checkpoint):
    """Compute the union of the current variables and checkpoint variables."""
    assignment_map = {}
    initialized_variable_names = {}

    name_to_variable = collections.OrderedDict()
    for var in tvars:
        name = var.name
        m = re.match('^(.*):\\d+$', name)
        if m is not None:
            name = m.group(1)
        name_to_variable[name] = var

    init_vars = tf.train.list_variables(init_checkpoint)

    assignment_map = collections.OrderedDict()
    for x in init_vars:
        (name, var) = (x[0], x[1])
        if name not in name_to_variable:
            continue
        assignment_map[name] = name_to_variable[name]
        initialized_variable_names[name] = 1
        initialized_variable_names[name + ':0'] = 1

    return (assignment_map, initialized_variable_names)

In [27]:
tvars = tf.trainable_variables()
checkpoint = 'alxlnet-base/model.ckpt'
assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint(tvars, 
                                                                                checkpoint)

In [28]:
saver = tf.train.Saver(var_list = assignment_map)
saver.restore(sess, checkpoint)

INFO:tensorflow:Restoring parameters from alxlnet-base/model.ckpt


In [29]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

batch_x = train_X[:5]
batch_x = pad_sequences(batch_x,padding='post')
batch_y = train_Y[:5]
batch_y = pad_sequences(batch_y,padding='post')
batch_depends = train_depends[:5]
batch_depends = pad_sequences(batch_depends,padding='post')
batch_segments = segments_train[:5]
batch_segments = pad_sequences(batch_segments, padding='post', value = 4)
batch_masks = masks_train[:5]
batch_masks = pad_sequences(batch_masks, padding='post', value = 1)

In [30]:
sess.run([model.accuracy, model.accuracy_depends, model.cost],
        feed_dict = {model.words: batch_x,
                model.types: batch_y,
                model.heads: batch_depends,
                model.segment_ids: batch_segments,
                model.input_masks: batch_masks,
                model.switch: False})

[0.028846154, 0.014423077, 44.482525]

In [32]:
tags_seq, heads = sess.run(
    [model.logits, model.heads_seq],
    feed_dict = {
        model.words: batch_x,
        model.segment_ids: batch_segments,
        model.input_masks: batch_masks
    },
)
tags_seq[0], heads[0], batch_depends[0]

(array([ 2,  4, 22,  7, 26, 12, 22,  3, 28,  8, 23, 14, 13, 28, 29,  8, 22,
        17, 29, 22, 19, 22, 28, 16,  8, 28,  3,  1, 16,  3, 30, 23, 22, 16,
        16,  8,  8, 22, 32, 32, 12, 16, 16, 16, 22, 23, 22,  7, 23,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0], dtype=int32),
 array([11, 22, 11, 19, 34, 35, 47, 19, 39, 10, 34, 24, 34, 43, 10, 10, 11,
        18, 34, 22, 23, 22, 43, 10, 37, 41, 19, 34, 10, 19, 10, 34, 11,  2,
        27, 10, 37, 11,  7, 11, 19, 25, 31, 25, 42, 34, 22, 34, 34,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0]),
 array([ 9,  0,  0,  2,  0,  3,  0,  0,  0,  3,  0,  9,  9,  7,  1, 11,  9,
        11, 12,  0, 11,  0,  0, 11, 15,  0,  0, 16, 15,  0, 15, 19, 20,  0,
         0,  0,  0,  0, 19,  0, 19,  0,  0,  9,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0], dtype=int3

In [33]:
from tqdm import tqdm

epoch = 20
for e in range(epoch):
    train_acc, train_loss = [], []
    test_acc, test_loss = [], []
    train_acc_depends, test_acc_depends = [], []
    
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x = train_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_y = train_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = train_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')
        batch_segments = segments_train[i: index]
        batch_segments = pad_sequences(batch_segments, padding='post', value = 4)
        batch_masks = masks_train[i: index]
        batch_masks = pad_sequences(batch_masks, padding='post', value = 1)
        
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.words: batch_x,
                model.types: batch_y,
                model.heads: batch_depends,
                model.segment_ids: batch_segments,
                model.input_masks: batch_masks,
                model.switch: False
            },
        )
        train_loss.append(cost)
        train_acc.append(acc)
        train_acc_depends.append(acc_depends)
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x = test_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_y = test_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = test_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')
        batch_segments = segments_test[i: index]
        batch_segments = pad_sequences(batch_segments, padding='post', value = 4)
        batch_masks = masks_test[i: index]
        batch_masks = pad_sequences(batch_masks, padding='post', value = 1)
        
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.words: batch_x,
                model.types: batch_y,
                model.heads: batch_depends,
                model.segment_ids: batch_segments,
                model.input_masks: batch_masks,
                model.switch: False
            },
        )
        test_loss.append(cost)
        test_acc.append(acc)
        test_acc_depends.append(acc_depends)
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    
    print(
    'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
    % (e, np.mean(train_loss), 
       np.mean(train_acc), 
       np.mean(train_acc_depends), 
       np.mean(test_loss), 
       np.mean(test_acc), 
       np.mean(test_acc_depends)
    ))

train minibatch loop: 100%|██████████| 2519/2519 [14:07<00:00,  2.97it/s, accuracy=0.875, accuracy_depends=0.75, cost=0.65]  
test minibatch loop: 100%|██████████| 630/630 [01:55<00:00,  5.45it/s, accuracy=0.844, accuracy_depends=0.543, cost=2]   
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 0, training loss: 3.291615, training acc: 0.697776, training depends: 0.453986, valid loss: 1.953125, valid acc: 0.845485, valid depends: 0.548856



train minibatch loop: 100%|██████████| 2519/2519 [14:09<00:00,  2.96it/s, accuracy=1, accuracy_depends=1, cost=0.113]        
test minibatch loop: 100%|██████████| 630/630 [01:57<00:00,  5.37it/s, accuracy=0.87, accuracy_depends=0.656, cost=1.52]  
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 1, training loss: 1.659280, training acc: 0.863677, training depends: 0.603499, valid loss: 1.461601, valid acc: 0.876069, valid depends: 0.648819



train minibatch loop: 100%|██████████| 2519/2519 [14:15<00:00,  2.94it/s, accuracy=1, accuracy_depends=1, cost=0.00758]      
test minibatch loop: 100%|██████████| 630/630 [01:58<00:00,  5.31it/s, accuracy=0.905, accuracy_depends=0.711, cost=1.27] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 2, training loss: 1.285719, training acc: 0.884337, training depends: 0.684235, valid loss: 1.193979, valid acc: 0.889046, valid depends: 0.707937



train minibatch loop: 100%|██████████| 2519/2519 [14:28<00:00,  2.90it/s, accuracy=1, accuracy_depends=1, cost=0.0161]       
test minibatch loop: 100%|██████████| 630/630 [01:58<00:00,  5.30it/s, accuracy=0.902, accuracy_depends=0.717, cost=1.12] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 3, training loss: 1.071584, training acc: 0.895355, training depends: 0.731697, valid loss: 1.047562, valid acc: 0.898181, valid depends: 0.740972



train minibatch loop: 100%|██████████| 2519/2519 [14:22<00:00,  2.92it/s, accuracy=1, accuracy_depends=1, cost=0.00245]      
test minibatch loop: 100%|██████████| 630/630 [02:00<00:00,  5.24it/s, accuracy=0.905, accuracy_depends=0.737, cost=1]    
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 4, training loss: 0.929319, training acc: 0.904813, training depends: 0.762597, valid loss: 0.926313, valid acc: 0.906362, valid depends: 0.768899



test minibatch loop: 100%|██████████| 630/630 [02:01<00:00,  5.18it/s, accuracy=0.899, accuracy_depends=0.766, cost=0.924]08]
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 5, training loss: 0.823146, training acc: 0.912222, training depends: 0.785236, valid loss: 0.848220, valid acc: 0.910573, valid depends: 0.788035



train minibatch loop: 100%|██████████| 2519/2519 [14:33<00:00,  2.88it/s, accuracy=1, accuracy_depends=1, cost=0.00253]      
test minibatch loop: 100%|██████████| 630/630 [02:00<00:00,  5.23it/s, accuracy=0.91, accuracy_depends=0.789, cost=0.822] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 6, training loss: 0.740302, training acc: 0.920387, training depends: 0.802050, valid loss: 0.783450, valid acc: 0.919229, valid depends: 0.800596



train minibatch loop: 100%|██████████| 2519/2519 [14:30<00:00,  2.89it/s, accuracy=1, accuracy_depends=0.938, cost=0.156]    
test minibatch loop: 100%|██████████| 630/630 [02:00<00:00,  5.21it/s, accuracy=0.899, accuracy_depends=0.789, cost=0.807]
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 7, training loss: 0.669474, training acc: 0.926937, training depends: 0.817003, valid loss: 0.746666, valid acc: 0.922263, valid depends: 0.809512



train minibatch loop: 100%|██████████| 2519/2519 [14:29<00:00,  2.90it/s, accuracy=1, accuracy_depends=1, cost=0.0024]       
test minibatch loop: 100%|██████████| 630/630 [02:00<00:00,  5.23it/s, accuracy=0.925, accuracy_depends=0.789, cost=0.775]
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 8, training loss: 0.612354, training acc: 0.932819, training depends: 0.828322, valid loss: 0.719806, valid acc: 0.927458, valid depends: 0.808264



train minibatch loop: 100%|██████████| 2519/2519 [14:36<00:00,  2.88it/s, accuracy=1, accuracy_depends=1, cost=0.00618]      
test minibatch loop: 100%|██████████| 630/630 [02:00<00:00,  5.21it/s, accuracy=0.919, accuracy_depends=0.78, cost=0.781] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 9, training loss: 0.558708, training acc: 0.938466, training depends: 0.838968, valid loss: 0.658846, valid acc: 0.931531, valid depends: 0.827488



train minibatch loop: 100%|██████████| 2519/2519 [14:29<00:00,  2.90it/s, accuracy=1, accuracy_depends=1, cost=0.00506]      
test minibatch loop: 100%|██████████| 630/630 [02:00<00:00,  5.22it/s, accuracy=0.928, accuracy_depends=0.815, cost=0.78] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 10, training loss: 0.515627, training acc: 0.943391, training depends: 0.846884, valid loss: 0.629950, valid acc: 0.936099, valid depends: 0.833015



train minibatch loop: 100%|██████████| 2519/2519 [14:29<00:00,  2.90it/s, accuracy=1, accuracy_depends=1, cost=0.00351]      
test minibatch loop: 100%|██████████| 630/630 [02:00<00:00,  5.22it/s, accuracy=0.925, accuracy_depends=0.853, cost=0.656]
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 11, training loss: 0.475926, training acc: 0.948360, training depends: 0.854740, valid loss: 0.598152, valid acc: 0.938958, valid depends: 0.838362



train minibatch loop: 100%|██████████| 2519/2519 [14:28<00:00,  2.90it/s, accuracy=1, accuracy_depends=1, cost=0.00395]      
test minibatch loop: 100%|██████████| 630/630 [02:00<00:00,  5.23it/s, accuracy=0.922, accuracy_depends=0.818, cost=0.586]
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 12, training loss: 0.439909, training acc: 0.952905, training depends: 0.861413, valid loss: 0.560686, valid acc: 0.939924, valid depends: 0.851370



train minibatch loop: 100%|██████████| 2519/2519 [14:33<00:00,  2.89it/s, accuracy=1, accuracy_depends=1, cost=0.00072]      
test minibatch loop: 100%|██████████| 630/630 [02:01<00:00,  5.19it/s, accuracy=0.931, accuracy_depends=0.861, cost=0.543]
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 13, training loss: 0.407941, training acc: 0.956605, training depends: 0.867787, valid loss: 0.543810, valid acc: 0.944994, valid depends: 0.855204



train minibatch loop: 100%|██████████| 2519/2519 [14:28<00:00,  2.90it/s, accuracy=1, accuracy_depends=1, cost=0.00081]      
test minibatch loop: 100%|██████████| 630/630 [02:00<00:00,  5.22it/s, accuracy=0.945, accuracy_depends=0.847, cost=0.578]
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 14, training loss: 0.379358, training acc: 0.959959, training depends: 0.873444, valid loss: 0.514084, valid acc: 0.948265, valid depends: 0.860735



train minibatch loop: 100%|██████████| 2519/2519 [14:28<00:00,  2.90it/s, accuracy=1, accuracy_depends=1, cost=0.000883]      
test minibatch loop: 100%|██████████| 630/630 [02:01<00:00,  5.19it/s, accuracy=0.948, accuracy_depends=0.87, cost=0.487] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 15, training loss: 0.352141, training acc: 0.963349, training depends: 0.878968, valid loss: 0.509567, valid acc: 0.950678, valid depends: 0.861329



train minibatch loop: 100%|██████████| 2519/2519 [14:30<00:00,  2.89it/s, accuracy=1, accuracy_depends=1, cost=0.000291]      
test minibatch loop: 100%|██████████| 630/630 [01:57<00:00,  5.36it/s, accuracy=0.951, accuracy_depends=0.844, cost=0.497]
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 16, training loss: 0.331996, training acc: 0.967374, training depends: 0.882138, valid loss: 0.480076, valid acc: 0.952791, valid depends: 0.865909



train minibatch loop: 100%|██████████| 2519/2519 [14:11<00:00,  2.96it/s, accuracy=1, accuracy_depends=1, cost=0.000665]      
test minibatch loop: 100%|██████████| 630/630 [01:57<00:00,  5.37it/s, accuracy=0.954, accuracy_depends=0.853, cost=0.52] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 17, training loss: 0.310162, training acc: 0.969735, training depends: 0.886819, valid loss: 0.514947, valid acc: 0.955326, valid depends: 0.852492



train minibatch loop: 100%|██████████| 2519/2519 [14:15<00:00,  2.95it/s, accuracy=1, accuracy_depends=1, cost=6.9e-5]        
test minibatch loop: 100%|██████████| 630/630 [02:02<00:00,  5.15it/s, accuracy=0.945, accuracy_depends=0.853, cost=0.601]
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 18, training loss: 0.293612, training acc: 0.972500, training depends: 0.889618, valid loss: 0.454646, valid acc: 0.955980, valid depends: 0.875600



train minibatch loop: 100%|██████████| 2519/2519 [14:38<00:00,  2.87it/s, accuracy=1, accuracy_depends=1, cost=0.000245]      
test minibatch loop: 100%|██████████| 630/630 [02:01<00:00,  5.16it/s, accuracy=0.951, accuracy_depends=0.896, cost=0.451]

epoch: 19, training loss: 0.273301, training acc: 0.974312, training depends: 0.894809, valid loss: 0.463847, valid acc: 0.958968, valid depends: 0.871426






In [34]:
from tqdm import tqdm

epoch = 5
for e in range(epoch):
    train_acc, train_loss = [], []
    test_acc, test_loss = [], []
    train_acc_depends, test_acc_depends = [], []
    
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x = train_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_y = train_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = train_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')
        batch_segments = segments_train[i: index]
        batch_segments = pad_sequences(batch_segments, padding='post', value = 4)
        batch_masks = masks_train[i: index]
        batch_masks = pad_sequences(batch_masks, padding='post', value = 1)
        
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.words: batch_x,
                model.types: batch_y,
                model.heads: batch_depends,
                model.segment_ids: batch_segments,
                model.input_masks: batch_masks,
                model.switch: True
            },
        )
        train_loss.append(cost)
        train_acc.append(acc)
        train_acc_depends.append(acc_depends)
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x = test_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_y = test_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = test_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')
        batch_segments = segments_test[i: index]
        batch_segments = pad_sequences(batch_segments, padding='post', value = 4)
        batch_masks = masks_test[i: index]
        batch_masks = pad_sequences(batch_masks, padding='post', value = 1)
        
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.words: batch_x,
                model.types: batch_y,
                model.heads: batch_depends,
                model.segment_ids: batch_segments,
                model.input_masks: batch_masks,
                model.switch: True
            },
        )
        test_loss.append(cost)
        test_acc.append(acc)
        test_acc_depends.append(acc_depends)
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    
    print(
    'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
    % (e, np.mean(train_loss), 
       np.mean(train_acc), 
       np.mean(train_acc_depends), 
       np.mean(test_loss), 
       np.mean(test_acc), 
       np.mean(test_acc_depends)
    ))

train minibatch loop: 100%|██████████| 2519/2519 [14:37<00:00,  2.87it/s, accuracy=1, accuracy_depends=1, cost=0.00701]      
test minibatch loop: 100%|██████████| 630/630 [02:02<00:00,  5.15it/s, accuracy=0.962, accuracy_depends=0.853, cost=4.4] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 0, training loss: 3.009300, training acc: 0.978004, training depends: 0.877064, valid loss: 5.160714, valid acc: 0.968942, valid depends: 0.854963



train minibatch loop: 100%|██████████| 2519/2519 [14:37<00:00,  2.87it/s, accuracy=1, accuracy_depends=1, cost=0.00202]      
test minibatch loop: 100%|██████████| 630/630 [02:02<00:00,  5.14it/s, accuracy=0.98, accuracy_depends=0.887, cost=3.67] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 1, training loss: 1.917773, training acc: 0.986471, training depends: 0.877437, valid loss: 4.625864, valid acc: 0.972671, valid depends: 0.856262



train minibatch loop: 100%|██████████| 2519/2519 [14:41<00:00,  2.86it/s, accuracy=1, accuracy_depends=1, cost=0.00113]      
test minibatch loop: 100%|██████████| 630/630 [02:02<00:00,  5.15it/s, accuracy=0.974, accuracy_depends=0.85, cost=5.15]  
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 2, training loss: 1.547230, training acc: 0.989546, training depends: 0.875326, valid loss: 4.631369, valid acc: 0.974594, valid depends: 0.853452



train minibatch loop: 100%|██████████| 2519/2519 [14:37<00:00,  2.87it/s, accuracy=1, accuracy_depends=1, cost=0.000275]     
test minibatch loop: 100%|██████████| 630/630 [02:01<00:00,  5.17it/s, accuracy=0.983, accuracy_depends=0.876, cost=3.34] 
train minibatch loop:   0%|          | 0/2519 [00:00<?, ?it/s]

epoch: 3, training loss: 1.307442, training acc: 0.991479, training depends: 0.874090, valid loss: 4.400067, valid acc: 0.975908, valid depends: 0.854123



train minibatch loop: 100%|██████████| 2519/2519 [14:39<00:00,  2.86it/s, accuracy=1, accuracy_depends=1, cost=0.000533]     
test minibatch loop: 100%|██████████| 630/630 [02:01<00:00,  5.17it/s, accuracy=0.986, accuracy_depends=0.855, cost=3.28] 

epoch: 4, training loss: 1.156469, training acc: 0.992754, training depends: 0.873411, valid loss: 4.205251, valid acc: 0.977972, valid depends: 0.853291






In [35]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'alxlnet-base-dependency/model.ckpt')

'alxlnet-base-dependency/model.ckpt'

In [36]:
kwargs = dict(
      is_training=False,
      use_tpu=False,
      use_bfloat16=False,
      dropout=0.0,
      dropatt=0.0,
      init='normal',
      init_range=0.1,
      init_std=0.05,
      clamp_len=-1)

xlnet_parameters = xlnet.RunConfig(**kwargs)
xlnet_config = xlnet.XLNetConfig(json_path='alxlnet-base/config.json')

In [37]:
learning_rate = 2e-5
hidden_size_word = 128

tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(learning_rate, hidden_size_word)
sess.run(tf.global_variables_initializer())

INFO:tensorflow:memory input None
INFO:tensorflow:Use float type <dtype: 'float32'>




In [38]:
saver = tf.train.Saver(tf.trainable_variables())
saver.restore(sess, 'alxlnet-base-dependency/model.ckpt')

INFO:tensorflow:Restoring parameters from alxlnet-base-dependency/model.ckpt


In [39]:
def pred2label(pred):
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            out_i.append(idx2tag[p])
        out.append(out_i)
    return out

In [40]:
def evaluate(heads_pred, types_pred, heads, types, lengths,
             symbolic_root=False, symbolic_end=False):
    batch_size, _ = heads_pred.shape
    ucorr = 0.
    lcorr = 0.
    total = 0.
    ucomplete_match = 0.
    lcomplete_match = 0.

    corr_root = 0.
    total_root = 0.
    start = 1 if symbolic_root else 0
    end = 1 if symbolic_end else 0
    for i in range(batch_size):
        ucm = 1.
        lcm = 1.
        for j in range(start, lengths[i] - end):

            total += 1
            if heads[i, j] == heads_pred[i, j]:
                ucorr += 1
                if types[i, j] == types_pred[i, j]:
                    lcorr += 1
                else:
                    lcm = 0
            else:
                ucm = 0
                lcm = 0

            if heads[i, j] == 0:
                total_root += 1
                corr_root += 1 if heads_pred[i, j] == 0 else 0

        ucomplete_match += ucm
        lcomplete_match += lcm
    
    return ucorr / total, lcorr / total, corr_root / total_root

In [41]:
arcs, types, roots = [], [], []
real_Y, predict_Y = [], []

for i in tqdm(range(0, len(test_X), batch_size)):
    index = min(i + batch_size, len(test_X))
    batch_x = test_X[i: index]
    batch_x = pad_sequences(batch_x,padding='post')
    batch_y = test_Y[i: index]
    batch_y = pad_sequences(batch_y,padding='post')
    batch_depends = test_depends[i: index]
    batch_depends = pad_sequences(batch_depends,padding='post')
    batch_segments = segments_test[i: index]
    batch_segments = pad_sequences(batch_segments, padding='post', value = 4)
    batch_masks = masks_test[i: index]
    batch_masks = pad_sequences(batch_masks, padding='post', value = 1)
    
    tags_seq, heads = sess.run(
        [model.logits, model.heads_seq],
        feed_dict = {
            model.words: batch_x,
            model.segment_ids: batch_segments,
            model.input_masks: batch_masks
        },
    )
    
    arc_accuracy, type_accuracy, root_accuracy = evaluate(heads - 1, tags_seq, batch_depends - 1, batch_y, 
            np.count_nonzero(batch_x, axis = 1))
    arcs.append(arc_accuracy)
    types.append(type_accuracy)
    roots.append(root_accuracy)
    predicted = pred2label(tags_seq)
    real = pred2label(batch_y)
    predict_Y.extend(predicted)
    real_Y.extend(real)

100%|██████████| 630/630 [01:59<00:00,  5.27it/s]


In [42]:
temp_real_Y = []
for r in real_Y:
    temp_real_Y.extend(r)
    
temp_predict_Y = []
for r in predict_Y:
    temp_predict_Y.extend(r)

In [43]:
from sklearn.metrics import classification_report
print(classification_report(temp_real_Y, temp_predict_Y, digits = 5))

               precision    recall  f1-score   support

          PAD    0.99999   1.00000   0.99999    644667
            X    0.99998   0.99999   0.99998    144988
          acl    0.95995   0.96137   0.96066      6058
        advcl    0.91687   0.93839   0.92751      2386
       advmod    0.97160   0.97620   0.97389      9496
         amod    0.95264   0.94761   0.95012      8342
        appos    0.97560   0.97638   0.97599      4995
          aux    1.00000   1.00000   1.00000         6
         case    0.99147   0.98685   0.98916     21680
           cc    0.97523   0.99377   0.98441      6418
        ccomp    0.95249   0.90112   0.92610       890
     compound    0.95478   0.95656   0.95567     13399
compound:plur    0.97575   0.98067   0.97821      1190
         conj    0.96575   0.98929   0.97738      8494
          cop    0.98201   0.98708   0.98454      1935
        csubj    1.00000   0.90476   0.95000        42
   csubj:pass    0.91667   0.91667   0.91667        12
         

In [44]:
print('arc accuracy:', np.mean(arcs))
print('types accuracy:', np.mean(types))
print('root accuracy:', np.mean(roots))

arc accuracy: 0.8943757029483008
types accuracy: 0.88690168487317
root accuracy: 0.9425595238095238


In [45]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or '_seq' in n.name
        or 'logits' in n.name
        or 'alphas' in n.name
        or 'self/Softmax' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'global_step' not in n.name
        and 'adam' not in n.name
        and 'gradients/bert' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'Placeholder_3',
 'Placeholder_4',
 'Placeholder_5',
 'W_d',
 'W_e',
 'U',
 'U-bi',
 'Wl',
 'Wr',
 'model/transformer/r_w_bias',
 'model/transformer/r_r_bias',
 'model/transformer/word_embedding/lookup_table',
 'model/transformer/word_embedding/lookup_table_2',
 'model/transformer/r_s_bias',
 'model/transformer/seg_embed',
 'model/transformer/layer_shared/rel_attn/q/kernel',
 'model/transformer/layer_shared/rel_attn/k/kernel',
 'model/transformer/layer_shared/rel_attn/v/kernel',
 'model/transformer/layer_shared/rel_attn/r/kernel',
 'model/transformer/layer_shared/rel_attn/o/kernel',
 'model/transformer/layer_shared/rel_attn/LayerNorm/gamma',
 'model/transformer/layer_shared/ff/layer_1/kernel',
 'model/transformer/layer_shared/ff/layer_1/bias',
 'model/transformer/layer_shared/ff/layer_2/kernel',
 'model/transformer/layer_shared/ff/layer_2/bias',
 'model/transformer/layer_shared/ff/LayerNorm/gamma',
 'dense/kernel',
 'dense/bias',
 'd

In [46]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [47]:
freeze_graph('alxlnet-base-dependency', strings)

INFO:tensorflow:Restoring parameters from alxlnet-base-dependency/model.ckpt
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 34 variables.
INFO:tensorflow:Converted 34 variables to const ops.
7923 ops in the final graph.


In [48]:
import boto3

bucketName = 'huseinhouse-storage'
Key = 'alxlnet-base-dependency/frozen_model.pb'
outPutname = "v34/dependency/alxlnet-base-dependency.pb"

s3 = boto3.client('s3')

s3.upload_file(Key,bucketName,outPutname)