In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [3]:
from albert import modeling
from albert import optimization
from albert import tokenization
import tensorflow as tf
import numpy as np




In [4]:
tokenizer = tokenization.FullTokenizer(
      vocab_file='albert-tiny-2020-04-17/sp10m.cased.v10.vocab', do_lower_case=False,
      spm_model_file='albert-tiny-2020-04-17/sp10m.cased.v10.model')


INFO:tensorflow:loading sentence piece model


In [5]:
albert_config = modeling.AlbertConfig.from_json_file('albert-tiny-2020-04-17/config.json')
albert_config




<albert.modeling.AlbertConfig at 0x7f5f8410c438>

In [6]:
with open('../Malaya-Dataset/dependency/gsd-ud-train.conllu.txt') as fopen:
    corpus = fopen.read().split('\n')
    
with open('../Malaya-Dataset/dependency/gsd-ud-test.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))
    
with open('../Malaya-Dataset/dependency/gsd-ud-dev.conllu.txt') as fopen:
    corpus.extend(fopen.read().split('\n'))

In [7]:
tag2idx = {'PAD': 0, 'X': 1}
tag_idx = 2

def process_corpus(corpus, until = None):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    sentences, words, depends, labels, pos, sequences = [], [], [], [], [], []
    temp_sentence, temp_word, temp_depend, temp_label, temp_pos = [], [], [], [], []
    first_time = True
    for sentence in corpus:
        try:
            if len(sentence):
                if sentence[0] == '#':
                    continue
                if first_time:
                    print(sentence)
                    first_time = False
                sentence = sentence.split('\t')
                if sentence[7] not in tag2idx:
                    tag2idx[sentence[7]] = tag_idx
                    tag_idx += 1
                temp_word.append(sentence[1])
                temp_depend.append(int(sentence[6]) + 1)
                temp_label.append(tag2idx[sentence[7]])
                temp_sentence.append(sentence[1])
                temp_pos.append(sentence[3])
            else:
                if len(temp_sentence) < 2 or len(temp_word) != len(temp_label):
                    temp_word = []
                    temp_depend = []
                    temp_label = []
                    temp_sentence = []
                    temp_pos = []
                    continue
                bert_tokens = ['[CLS]']
                labels_ = [0]
                depends_ = [0]
                seq_ = []
                for no, orig_token in enumerate(temp_word):
                    labels_.append(temp_label[no])
                    depends_.append(temp_depend[no])
                    t = tokenizer.tokenize(orig_token)
                    bert_tokens.extend(t)
                    labels_.extend([1] * (len(t) - 1))
                    depends_.extend([0] * (len(t) - 1))
                    seq_.append(no + 1)
                bert_tokens.append('[SEP]')
                labels_.append(0)
                depends_.append(0)
                words.append(tokenizer.convert_tokens_to_ids(bert_tokens))
                depends.append(depends_)
                labels.append(labels_)
                sentences.append(bert_tokens)
                pos.append(temp_pos)
                sequences.append(seq_)
                temp_word = []
                temp_depend = []
                temp_label = []
                temp_sentence = []
                temp_pos = []
        except Exception as e:
            print(e, sentence)
    return sentences[:-1], words[:-1], depends[:-1], labels[:-1], pos[:-1], sequences[:-1]

In [8]:
sentences, words, depends, labels, _, _ = process_corpus(corpus)

1	Sembungan	sembungan	PROPN	X--	_	4	nsubj	_	MorphInd=^sembungan<x>_X--$


In [9]:
import json

with open('../Malaya-Dataset/dependency/augmented-dependency.json') as fopen:
    augmented = json.load(fopen)

In [10]:
text_augmented, depends_augmented, labels_augmented = [], [], []

for a in augmented:
    text_augmented.extend(a[0])
    depends_augmented.extend(a[1])
    labels_augmented.extend((np.array(a[2]) + 1).tolist())

In [11]:
def parse_XY(texts, depends, labels):
    outside, sentences, outside_depends, outside_labels = [], [], [], []
    for no, text in enumerate(texts):
        temp_depend = depends[no]
        temp_label = labels[no]
        s = text.split()
        sentences.append(s)
        bert_tokens = ['[CLS]']
        labels_ = [0]
        depends_ = [0]
        for no, orig_token in enumerate(s):
            labels_.append(temp_label[no])
            depends_.append(temp_depend[no])
            t = tokenizer.tokenize(orig_token)
            bert_tokens.extend(t)
            labels_.extend([1] * (len(t) - 1))
            depends_.extend([0] * (len(t) - 1))
        bert_tokens.append('[SEP]')
        labels_.append(0)
        depends_.append(0)
        outside.append(tokenizer.convert_tokens_to_ids(bert_tokens))
        outside_depends.append(depends_)
        outside_labels.append(labels_)
    return outside, sentences, outside_depends, outside_labels

In [12]:
outside, _, outside_depends, outside_labels = parse_XY(text_augmented, 
                                                       depends_augmented, 
                                                       labels_augmented)

In [13]:
words.extend(outside)
depends.extend(outside_depends)
labels.extend(outside_labels)

In [14]:
idx2tag = {v:k for k, v in tag2idx.items()}

In [15]:
from sklearn.model_selection import train_test_split

words_train, words_test, depends_train, depends_test, labels_train, labels_test \
= train_test_split(words, depends, labels, test_size = 0.2)

In [16]:
train_X = words_train
train_Y = labels_train
train_depends = depends_train

test_X = words_test
test_Y = labels_test
test_depends = depends_test

In [17]:
BERT_INIT_CHKPNT = 'albert-tiny-2020-04-17/model.ckpt-1000000'

In [18]:
epoch = 30
batch_size = 32
warmup_proportion = 0.1
num_train_steps = int(len(train_X) / batch_size * epoch)
num_warmup_steps = int(num_train_steps * warmup_proportion)

In [19]:
class BiAAttention:
    def __init__(self, input_size_encoder, input_size_decoder, num_labels):
        self.input_size_encoder = input_size_encoder
        self.input_size_decoder = input_size_decoder
        self.num_labels = num_labels
        
        self.W_d = tf.get_variable("W_d", shape=[self.num_labels, self.input_size_decoder],
           initializer=tf.contrib.layers.xavier_initializer())
        self.W_e = tf.get_variable("W_e", shape=[self.num_labels, self.input_size_encoder],
           initializer=tf.contrib.layers.xavier_initializer())
        self.U = tf.get_variable("U", shape=[self.num_labels, self.input_size_decoder, self.input_size_encoder],
           initializer=tf.contrib.layers.xavier_initializer())
        
    def forward(self, input_d, input_e, mask_d=None, mask_e=None):
        batch = tf.shape(input_d)[0]
        length_decoder = tf.shape(input_d)[1]
        length_encoder = tf.shape(input_e)[1]
        out_d = tf.expand_dims(tf.matmul(self.W_d, tf.transpose(input_d, [0, 2, 1])), 3)
        out_e = tf.expand_dims(tf.matmul(self.W_e, tf.transpose(input_e, [0, 2, 1])), 2)
        output = tf.matmul(tf.expand_dims(input_d, 1), self.U)
        output = tf.matmul(output, tf.transpose(tf.expand_dims(input_e, 1), [0, 1, 3, 2]))
        
        output = output + out_d + out_e
        
        if mask_d is not None:
            d = tf.expand_dims(tf.expand_dims(mask_d, 1), 3)
            e = tf.expand_dims(tf.expand_dims(mask_e, 1), 2)
            output = output * d * e
            
        return output
    
class BiLinear:
    def __init__(self, left_features, right_features, out_features):
        self.left_features = left_features
        self.right_features = right_features
        self.out_features = out_features
        
        self.U = tf.get_variable("U-bi", shape=[out_features, left_features, right_features],
           initializer=tf.contrib.layers.xavier_initializer())
        self.W_l = tf.get_variable("Wl", shape=[out_features, left_features],
           initializer=tf.contrib.layers.xavier_initializer())
        self.W_r = tf.get_variable("Wr", shape=[out_features, right_features],
           initializer=tf.contrib.layers.xavier_initializer())
    
    def forward(self, input_left, input_right):
        left_size = tf.shape(input_left)
        output_shape = tf.concat([left_size[:-1], [self.out_features]], axis = 0)
        batch = tf.cast(tf.reduce_prod(left_size[:-1]), tf.int32)
        input_left = tf.reshape(input_left, (batch, self.left_features))
        input_right = tf.reshape(input_right, (batch, self.right_features))
        tiled = tf.tile(tf.expand_dims(input_left, axis = 0), (self.out_features,1,1))
        output = tf.transpose(tf.reduce_sum(tf.matmul(tiled, self.U), axis = 2))
        output = output + tf.matmul(input_left, tf.transpose(self.W_l))\
        + tf.matmul(input_right, tf.transpose(self.W_r))
        
        return tf.reshape(output, output_shape)
    
class Model:
    def __init__(
        self,
        learning_rate,
        hidden_size_word,
        training = True,
        cov = 0.0):
        
        self.words = tf.placeholder(tf.int32, (None, None))
        self.heads = tf.placeholder(tf.int32, (None, None))
        self.types = tf.placeholder(tf.int32, (None, None))
        self.switch = tf.placeholder(tf.bool, None)
        self.mask = tf.cast(tf.math.not_equal(self.words, 0), tf.float32)
        self.maxlen = tf.shape(self.words)[1]
        self.lengths = tf.count_nonzero(self.words, 1)
        mask = self.mask
        heads = self.heads
        types = self.types
        
        self.arc_h = tf.layers.Dense(hidden_size_word)
        self.arc_c = tf.layers.Dense(hidden_size_word)
        self.attention = BiAAttention(hidden_size_word, hidden_size_word, 1)

        self.type_h = tf.layers.Dense(hidden_size_word)
        self.type_c = tf.layers.Dense(hidden_size_word)
        self.bilinear = BiLinear(hidden_size_word, hidden_size_word, len(tag2idx))
        
        model = modeling.AlbertModel(
            config=albert_config,
            is_training=training,
            input_ids=self.words,
            use_one_hot_embeddings=False)
        output_layer = model.get_sequence_output()
        
        arc_h = tf.nn.elu(self.arc_h(output_layer))
        arc_c = tf.nn.elu(self.arc_c(output_layer))
        
        type_h = tf.nn.elu(self.type_h(output_layer))
        type_c = tf.nn.elu(self.type_c(output_layer))
        
        out_arc = tf.squeeze(self.attention.forward(arc_h, arc_c, mask_d=self.mask, 
                                                    mask_e=self.mask), axis = 1)
        self.out_arc = out_arc
        
        batch = tf.shape(out_arc)[0]
        max_len = tf.shape(out_arc)[1]
        sec_max_len = tf.shape(out_arc)[2]
        batch_index = tf.range(0, batch)
        
        decode_arc = out_arc + tf.linalg.diag(tf.fill([max_len], -np.inf))
        minus_mask = tf.expand_dims(tf.cast(1 - mask, tf.bool), axis = 2)
        minus_mask = tf.tile(minus_mask, [1, 1, sec_max_len])
        decode_arc = tf.where(minus_mask, tf.fill(tf.shape(decode_arc), -np.inf), decode_arc)
        self.decode_arc = decode_arc
        self.heads_seq = tf.argmax(decode_arc, axis = 1)
        self.heads_seq = tf.identity(self.heads_seq, name = 'heads_seq')
        
        t = tf.cast(tf.transpose(self.heads_seq), tf.int32)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        type_h = tf.gather_nd(type_h, concatenated)
        out_type = self.bilinear.forward(type_h, type_c)
        self.tags_seq = tf.argmax(out_type, axis = 2)
        self.tags_seq = tf.identity(self.tags_seq, name = 'tags_seq')
        
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            out_type, self.types, self.lengths
        )
        crf_loss = tf.reduce_mean(-log_likelihood)
        self.logits, _ = tf.contrib.crf.crf_decode(
            out_type, transition_params, self.lengths
        )
        self.logits = tf.identity(self.logits, name = 'logits')
        
        batch = tf.shape(out_arc)[0]
        max_len = tf.shape(out_arc)[1]
        batch_index = tf.range(0, batch)
        t = tf.transpose(heads)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        type_h = tf.gather_nd(type_h, concatenated)
        out_type = self.bilinear.forward(type_h, type_c)
        minus_inf = -1e8
        minus_mask = (1 - mask) * minus_inf
        out_arc = out_arc + tf.expand_dims(minus_mask, axis = 2) + tf.expand_dims(minus_mask, axis = 1)
        loss_arc = tf.nn.log_softmax(out_arc, dim=1)
        loss_type = tf.nn.log_softmax(out_type, dim=2)
        loss_arc = loss_arc * tf.expand_dims(mask, axis = 2) * tf.expand_dims(mask, axis = 1)
        loss_type = loss_type * tf.expand_dims(mask, axis = 2)
        num = tf.reduce_sum(mask) - tf.cast(batch, tf.float32)
        child_index = tf.tile(tf.expand_dims(tf.range(0, max_len), 1), [1, batch])
        t = tf.transpose(heads)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0),
                                               tf.expand_dims(t, axis = 0),
                                               tf.expand_dims(child_index, axis = 0)], axis = 0))
        loss_arc = tf.gather_nd(loss_arc, concatenated)
        loss_arc = tf.transpose(loss_arc, [1, 0])
        
        t = tf.transpose(types)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0),
                                               tf.expand_dims(child_index, axis = 0),
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        loss_type = tf.gather_nd(loss_type, concatenated)
        loss_type = tf.transpose(loss_type, [1, 0])
        cost = (tf.reduce_sum(-loss_arc) / num) + (tf.reduce_sum(-loss_type) / num)
        
        self.cost = tf.cond(self.switch, lambda: cost + crf_loss, lambda: cost)
        self.optimizer = optimization.create_optimizer(self.cost, learning_rate, 
                                                       num_train_steps, num_warmup_steps, False)
        
        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)
        
        self.prediction = tf.boolean_mask(self.logits, mask)
        mask_label = tf.boolean_mask(self.types, mask)
        correct_pred = tf.equal(tf.cast(self.prediction, tf.int32), mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
        self.prediction = tf.cast(tf.boolean_mask(self.heads_seq, mask), tf.int32)
        mask_label = tf.boolean_mask(self.heads, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy_depends = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [20]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

learning_rate = 2e-5
hidden_size_word = 128

model = Model(learning_rate, hidden_size_word)
sess.run(tf.global_variables_initializer())

Instructions for updating:
reduction_indices is deprecated, use axis instead
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.




Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
dim is deprecated, use axis instead


INFO:tensorflow:++++++ warmup starts at step 0, for 3777 steps ++++++
INFO:tensorflow:using adamw



In [21]:
var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'bert')
saver = tf.train.Saver(var_list = var_lists)
saver.restore(sess, BERT_INIT_CHKPNT)

INFO:tensorflow:Restoring parameters from albert-tiny-2020-04-17/model.ckpt-1000000


In [22]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

batch_x = train_X[:5]
batch_x = pad_sequences(batch_x,padding='post')
batch_y = train_Y[:5]
batch_y = pad_sequences(batch_y,padding='post')
batch_depends = train_depends[:5]
batch_depends = pad_sequences(batch_depends,padding='post')

In [23]:
from tqdm import tqdm

epoch = 20

for e in range(epoch):
    train_acc, train_loss = [], []
    test_acc, test_loss = [], []
    train_acc_depends, test_acc_depends = [], []
    
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x = train_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_y = train_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = train_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')
        
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.words: batch_x,
                model.types: batch_y,
                model.heads: batch_depends,
                model.switch: False
            },
        )
        train_loss.append(cost)
        train_acc.append(acc)
        train_acc_depends.append(acc_depends)
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x = test_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_y = test_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = test_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')
        
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.words: batch_x,
                model.types: batch_y,
                model.heads: batch_depends,
                model.switch: False
            },
        )
        test_loss.append(cost)
        test_acc.append(acc)
        test_acc_depends.append(acc_depends)
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    
    print(
    'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
    % (e, np.mean(train_loss), 
       np.mean(train_acc), 
       np.mean(train_acc_depends), 
       np.mean(test_loss), 
       np.mean(test_acc), 
       np.mean(test_acc_depends)
    ))

train minibatch loop: 100%|██████████| 1260/1260 [04:52<00:00,  4.31it/s, accuracy=0.438, accuracy_depends=0.75, cost=1.91]  
test minibatch loop: 100%|██████████| 315/315 [00:54<00:00,  5.83it/s, accuracy=0.257, accuracy_depends=0.502, cost=2.65]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 0, training loss: 4.816708, training acc: 0.169616, training depends: 0.458381, valid loss: 2.643911, valid acc: 0.272960, valid depends: 0.511708



train minibatch loop: 100%|██████████| 1260/1260 [04:57<00:00,  4.24it/s, accuracy=0.688, accuracy_depends=0.812, cost=1.48]
test minibatch loop: 100%|██████████| 315/315 [00:57<00:00,  5.51it/s, accuracy=0.52, accuracy_depends=0.534, cost=2.12] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 1, training loss: 2.339175, training acc: 0.435387, training depends: 0.530496, valid loss: 2.102147, valid acc: 0.550210, valid depends: 0.540273



train minibatch loop: 100%|██████████| 1260/1260 [05:07<00:00,  4.10it/s, accuracy=0.688, accuracy_depends=0.75, cost=1.01] 
test minibatch loop: 100%|██████████| 315/315 [00:56<00:00,  5.59it/s, accuracy=0.576, accuracy_depends=0.593, cost=1.77]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 2, training loss: 1.898127, training acc: 0.583338, training depends: 0.571907, valid loss: 1.739949, valid acc: 0.603071, valid depends: 0.593208



train minibatch loop: 100%|██████████| 1260/1260 [05:05<00:00,  4.12it/s, accuracy=0.812, accuracy_depends=0.812, cost=0.676]
test minibatch loop: 100%|██████████| 315/315 [00:56<00:00,  5.59it/s, accuracy=0.621, accuracy_depends=0.618, cost=1.63]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 3, training loss: 1.638650, training acc: 0.631884, training depends: 0.612456, valid loss: 1.568366, valid acc: 0.638094, valid depends: 0.624893



train minibatch loop: 100%|██████████| 1260/1260 [05:05<00:00,  4.12it/s, accuracy=0.875, accuracy_depends=0.938, cost=0.35]
test minibatch loop: 100%|██████████| 315/315 [00:56<00:00,  5.60it/s, accuracy=0.697, accuracy_depends=0.616, cost=1.58]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 4, training loss: 1.489354, training acc: 0.722123, training depends: 0.640292, valid loss: 1.474930, valid acc: 0.717230, valid depends: 0.640443



train minibatch loop: 100%|██████████| 1260/1260 [04:55<00:00,  4.27it/s, accuracy=0.812, accuracy_depends=0.938, cost=0.221]
test minibatch loop: 100%|██████████| 315/315 [00:53<00:00,  5.87it/s, accuracy=0.767, accuracy_depends=0.642, cost=1.51]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 5, training loss: 1.384911, training acc: 0.771040, training depends: 0.661793, valid loss: 1.381199, valid acc: 0.780014, valid depends: 0.664027



train minibatch loop: 100%|██████████| 1260/1260 [04:46<00:00,  4.40it/s, accuracy=0.75, accuracy_depends=0.938, cost=0.15] 
test minibatch loop: 100%|██████████| 315/315 [00:51<00:00,  6.10it/s, accuracy=0.771, accuracy_depends=0.652, cost=1.47]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 6, training loss: 1.304746, training acc: 0.789611, training depends: 0.678879, valid loss: 1.339804, valid acc: 0.782671, valid depends: 0.670663



train minibatch loop: 100%|██████████| 1260/1260 [04:44<00:00,  4.43it/s, accuracy=0.812, accuracy_depends=0.938, cost=0.12] 
test minibatch loop: 100%|██████████| 315/315 [00:52<00:00,  6.01it/s, accuracy=0.789, accuracy_depends=0.684, cost=1.42]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 7, training loss: 1.238359, training acc: 0.802682, training depends: 0.693803, valid loss: 1.275019, valid acc: 0.802976, valid depends: 0.688489



train minibatch loop: 100%|██████████| 1260/1260 [04:46<00:00,  4.40it/s, accuracy=0.812, accuracy_depends=0.938, cost=0.0916]
test minibatch loop: 100%|██████████| 315/315 [00:52<00:00,  5.98it/s, accuracy=0.791, accuracy_depends=0.693, cost=1.39] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 8, training loss: 1.188670, training acc: 0.814486, training depends: 0.703934, valid loss: 1.232425, valid acc: 0.812130, valid depends: 0.698801



train minibatch loop: 100%|██████████| 1260/1260 [04:53<00:00,  4.30it/s, accuracy=0.812, accuracy_depends=0.938, cost=0.0607]
test minibatch loop: 100%|██████████| 315/315 [00:53<00:00,  5.90it/s, accuracy=0.805, accuracy_depends=0.705, cost=1.37] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 9, training loss: 1.142564, training acc: 0.827337, training depends: 0.714674, valid loss: 1.207361, valid acc: 0.825732, valid depends: 0.705253



train minibatch loop: 100%|██████████| 1260/1260 [04:59<00:00,  4.21it/s, accuracy=0.812, accuracy_depends=0.938, cost=0.0462]
test minibatch loop: 100%|██████████| 315/315 [00:53<00:00,  5.84it/s, accuracy=0.817, accuracy_depends=0.701, cost=1.34] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 10, training loss: 1.102756, training acc: 0.834300, training depends: 0.723572, valid loss: 1.174121, valid acc: 0.831028, valid depends: 0.713229



train minibatch loop: 100%|██████████| 1260/1260 [04:59<00:00,  4.21it/s, accuracy=0.875, accuracy_depends=0.938, cost=0.033]
test minibatch loop: 100%|██████████| 315/315 [00:53<00:00,  5.86it/s, accuracy=0.819, accuracy_depends=0.707, cost=1.32] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 11, training loss: 1.068813, training acc: 0.838724, training depends: 0.731223, valid loss: 1.150037, valid acc: 0.836768, valid depends: 0.718662



train minibatch loop: 100%|██████████| 1260/1260 [04:59<00:00,  4.20it/s, accuracy=0.812, accuracy_depends=0.938, cost=0.0252]
test minibatch loop: 100%|██████████| 315/315 [00:53<00:00,  5.91it/s, accuracy=0.824, accuracy_depends=0.712, cost=1.31] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 12, training loss: 1.035727, training acc: 0.843621, training depends: 0.739361, valid loss: 1.132964, valid acc: 0.839806, valid depends: 0.722642



train minibatch loop: 100%|██████████| 1260/1260 [04:58<00:00,  4.22it/s, accuracy=0.875, accuracy_depends=0.938, cost=0.0201]
test minibatch loop: 100%|██████████| 315/315 [00:53<00:00,  5.85it/s, accuracy=0.831, accuracy_depends=0.717, cost=1.29] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 13, training loss: 1.005898, training acc: 0.847731, training depends: 0.746342, valid loss: 1.114029, valid acc: 0.844609, valid depends: 0.727709



train minibatch loop: 100%|██████████| 1260/1260 [04:59<00:00,  4.21it/s, accuracy=0.875, accuracy_depends=0.938, cost=0.019]
test minibatch loop: 100%|██████████| 315/315 [00:54<00:00,  5.83it/s, accuracy=0.837, accuracy_depends=0.723, cost=1.28] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 14, training loss: 0.981429, training acc: 0.850139, training depends: 0.751663, valid loss: 1.097263, valid acc: 0.847585, valid depends: 0.731702



train minibatch loop: 100%|██████████| 1260/1260 [04:58<00:00,  4.21it/s, accuracy=0.938, accuracy_depends=0.938, cost=0.0136]
test minibatch loop: 100%|██████████| 315/315 [00:53<00:00,  5.87it/s, accuracy=0.841, accuracy_depends=0.725, cost=1.25] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 15, training loss: 0.958836, training acc: 0.854603, training depends: 0.757034, valid loss: 1.085392, valid acc: 0.850718, valid depends: 0.734842



train minibatch loop: 100%|██████████| 1260/1260 [04:55<00:00,  4.26it/s, accuracy=0.938, accuracy_depends=0.938, cost=0.0127]
test minibatch loop: 100%|██████████| 315/315 [00:53<00:00,  5.92it/s, accuracy=0.842, accuracy_depends=0.719, cost=1.27] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 16, training loss: 0.937661, training acc: 0.856612, training depends: 0.762016, valid loss: 1.090674, valid acc: 0.853433, valid depends: 0.732561



train minibatch loop: 100%|██████████| 1260/1260 [04:45<00:00,  4.41it/s, accuracy=0.938, accuracy_depends=0.938, cost=0.00851]
test minibatch loop: 100%|██████████| 315/315 [00:52<00:00,  6.01it/s, accuracy=0.842, accuracy_depends=0.73, cost=1.26]  
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 17, training loss: 0.919759, training acc: 0.859255, training depends: 0.765594, valid loss: 1.063900, valid acc: 0.855051, valid depends: 0.740344



train minibatch loop: 100%|██████████| 1260/1260 [04:45<00:00,  4.41it/s, accuracy=0.938, accuracy_depends=0.938, cost=0.00658]
test minibatch loop: 100%|██████████| 315/315 [00:52<00:00,  6.00it/s, accuracy=0.843, accuracy_depends=0.726, cost=1.25] 
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 18, training loss: 0.901599, training acc: 0.861192, training depends: 0.770272, valid loss: 1.058046, valid acc: 0.856491, valid depends: 0.741880



train minibatch loop: 100%|██████████| 1260/1260 [04:46<00:00,  4.41it/s, accuracy=0.938, accuracy_depends=0.938, cost=0.00555]
test minibatch loop: 100%|██████████| 315/315 [00:52<00:00,  5.99it/s, accuracy=0.848, accuracy_depends=0.723, cost=1.26] 

epoch: 19, training loss: 0.886771, training acc: 0.863076, training depends: 0.773270, valid loss: 1.073614, valid acc: 0.858486, valid depends: 0.735672






In [24]:
from tqdm import tqdm

epoch = 5

for e in range(epoch):
    train_acc, train_loss = [], []
    test_acc, test_loss = [], []
    train_acc_depends, test_acc_depends = [], []
    
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x = train_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_y = train_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = train_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')
        
        acc_depends, acc, cost, _ = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.words: batch_x,
                model.types: batch_y,
                model.heads: batch_depends,
                model.switch: True
            },
        )
        train_loss.append(cost)
        train_acc.append(acc)
        train_acc_depends.append(acc_depends)
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x = test_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_y = test_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = test_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')
        
        acc_depends, acc, cost = sess.run(
            [model.accuracy_depends, model.accuracy, model.cost],
            feed_dict = {
                model.words: batch_x,
                model.types: batch_y,
                model.heads: batch_depends,
                model.switch: True
            },
        )
        test_loss.append(cost)
        test_acc.append(acc)
        test_acc_depends.append(acc_depends)
        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)
    
    
    print(
    'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\n'
    % (e, np.mean(train_loss), 
       np.mean(train_acc), 
       np.mean(train_acc_depends), 
       np.mean(test_loss), 
       np.mean(test_acc), 
       np.mean(test_acc_depends)
    ))

train minibatch loop: 100%|██████████| 1260/1260 [04:46<00:00,  4.40it/s, accuracy=1, accuracy_depends=0.938, cost=0.195]   
test minibatch loop: 100%|██████████| 315/315 [00:52<00:00,  6.00it/s, accuracy=0.884, accuracy_depends=0.713, cost=19.2]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 0, training loss: 14.238445, training acc: 0.899972, training depends: 0.760470, valid loss: 15.305143, valid acc: 0.895161, valid depends: 0.726821



train minibatch loop: 100%|██████████| 1260/1260 [04:42<00:00,  4.46it/s, accuracy=1, accuracy_depends=0.938, cost=0.176]   
test minibatch loop: 100%|██████████| 315/315 [00:52<00:00,  6.05it/s, accuracy=0.888, accuracy_depends=0.705, cost=18.6]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 1, training loss: 13.270262, training acc: 0.907359, training depends: 0.749328, valid loss: 14.915302, valid acc: 0.898323, valid depends: 0.718794



train minibatch loop: 100%|██████████| 1260/1260 [04:54<00:00,  4.28it/s, accuracy=1, accuracy_depends=0.938, cost=0.16]    
test minibatch loop: 100%|██████████| 315/315 [00:50<00:00,  6.21it/s, accuracy=0.889, accuracy_depends=0.701, cost=18.3]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 2, training loss: 12.749146, training acc: 0.911169, training depends: 0.742163, valid loss: 14.635544, valid acc: 0.900516, valid depends: 0.714716



train minibatch loop: 100%|██████████| 1260/1260 [04:36<00:00,  4.55it/s, accuracy=1, accuracy_depends=0.938, cost=0.145]   
test minibatch loop: 100%|██████████| 315/315 [00:50<00:00,  6.25it/s, accuracy=0.891, accuracy_depends=0.695, cost=18.1]
train minibatch loop:   0%|          | 0/1260 [00:00<?, ?it/s]

epoch: 3, training loss: 12.350799, training acc: 0.914132, training depends: 0.737238, valid loss: 14.427675, valid acc: 0.902087, valid depends: 0.711193



train minibatch loop: 100%|██████████| 1260/1260 [04:36<00:00,  4.57it/s, accuracy=1, accuracy_depends=0.938, cost=0.128]   
test minibatch loop: 100%|██████████| 315/315 [00:50<00:00,  6.22it/s, accuracy=0.891, accuracy_depends=0.693, cost=17.9]

epoch: 4, training loss: 12.030086, training acc: 0.916556, training depends: 0.733690, valid loss: 14.261791, valid acc: 0.903387, valid depends: 0.708722






In [25]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'albert-tiny-dependency/model.ckpt')

'albert-tiny-dependency/model.ckpt'

In [26]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

learning_rate = 2e-5
hidden_size_word = 128

model = Model(learning_rate, hidden_size_word, training = False)

sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.trainable_variables())
saver.restore(sess, 'albert-tiny-dependency/model.ckpt')



INFO:tensorflow:++++++ warmup starts at step 0, for 3777 steps ++++++
INFO:tensorflow:using adamw
INFO:tensorflow:Restoring parameters from albert-tiny-dependency/model.ckpt


In [27]:
def pred2label(pred):
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            out_i.append(idx2tag[p])
        out.append(out_i)
    return out

In [28]:
def evaluate(heads_pred, types_pred, heads, types, lengths,
             symbolic_root=False, symbolic_end=False):
    batch_size, _ = heads_pred.shape
    ucorr = 0.
    lcorr = 0.
    total = 0.
    ucomplete_match = 0.
    lcomplete_match = 0.

    corr_root = 0.
    total_root = 0.
    start = 1 if symbolic_root else 0
    end = 1 if symbolic_end else 0
    for i in range(batch_size):
        ucm = 1.
        lcm = 1.
        for j in range(start, lengths[i] - end):

            total += 1
            if heads[i, j] == heads_pred[i, j]:
                ucorr += 1
                if types[i, j] == types_pred[i, j]:
                    lcorr += 1
                else:
                    lcm = 0
            else:
                ucm = 0
                lcm = 0

            if heads[i, j] == 0:
                total_root += 1
                corr_root += 1 if heads_pred[i, j] == 0 else 0

        ucomplete_match += ucm
        lcomplete_match += lcm
    
    return ucorr / total, lcorr / total, corr_root / total_root

In [29]:
arcs, types, roots = [], [], []
real_Y, predict_Y = [], []

for i in tqdm(range(0, len(test_X), batch_size)):
    index = min(i + batch_size, len(test_X))
    batch_x = test_X[i: index]
    batch_x = pad_sequences(batch_x,padding='post')
    batch_y = test_Y[i: index]
    batch_y = pad_sequences(batch_y,padding='post')
    batch_depends = test_depends[i: index]
    batch_depends = pad_sequences(batch_depends,padding='post')
    
    tags_seq, heads = sess.run(
        [model.logits, model.heads_seq],
        feed_dict = {
            model.words: batch_x,
        },
    )
    
    arc_accuracy, type_accuracy, root_accuracy = evaluate(heads - 1, tags_seq, batch_depends - 1, batch_y, 
            np.count_nonzero(batch_x, axis = 1))
    arcs.append(arc_accuracy)
    types.append(type_accuracy)
    roots.append(root_accuracy)
    predicted = pred2label(tags_seq)
    real = pred2label(batch_y)
    predict_Y.extend(predicted)
    real_Y.extend(real)

100%|██████████| 315/315 [00:54<00:00,  5.80it/s]


In [30]:
temp_real_Y = []
for r in real_Y:
    temp_real_Y.extend(r)
    
temp_predict_Y = []
for r in predict_Y:
    temp_predict_Y.extend(r)

In [31]:
from sklearn.metrics import classification_report
print(classification_report(temp_real_Y, temp_predict_Y, digits = 5))

  _warn_prf(average, modifier, msg_start, len(result))


               precision    recall  f1-score   support

          PAD    1.00000   1.00000   1.00000    901404
            X    0.99997   0.99998   0.99997    158217
          acl    0.74523   0.72259   0.73374      6056
        advcl    0.44763   0.44416   0.44589      2319
       advmod    0.80839   0.80245   0.80541      9537
         amod    0.74481   0.69167   0.71726      8144
        appos    0.71137   0.68084   0.69577      4963
          aux    0.00000   0.00000   0.00000         9
         case    0.90625   0.93745   0.92159     21056
           cc    0.92435   0.90888   0.91655      6453
        ccomp    0.32162   0.13918   0.19429       855
     compound    0.76535   0.75323   0.75924     13008
compound:plur    0.76103   0.77066   0.76581      1186
         conj    0.79454   0.78507   0.78978      8640
          cop    0.87581   0.90736   0.89130      1943
        csubj    0.66667   0.04082   0.07692        49
   csubj:pass    0.00000   0.00000   0.00000        18
         

In [32]:
print('arc accuracy:', np.mean(arcs))
print('types accuracy:', np.mean(types))
print('root accuracy:', np.mean(roots))

arc accuracy: 0.7087220659183397
types accuracy: 0.6735055899028873
root accuracy: 0.8178452380952382


In [33]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or '_seq' in n.name
        or 'alphas' in n.name
        or 'logits' in n.name
        or 'self/Softmax' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'global_step' not in n.name
        and 'adam' not in n.name
        and 'gradients/bert' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'Placeholder_3',
 'W_d',
 'W_e',
 'U',
 'U-bi',
 'Wl',
 'Wr',
 'bert/embeddings/word_embeddings',
 'bert/embeddings/token_type_embeddings',
 'bert/embeddings/position_embeddings',
 'bert/embeddings/LayerNorm/gamma',
 'bert/encoder/embedding_hidden_mapping_in/kernel',
 'bert/encoder/embedding_hidden_mapping_in/bias',
 'bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel',
 'bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias',
 'bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel',
 'bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias',
 'bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel',
 'bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias',
 'bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel',
 'bert/encoder/transformer/group_0/inner_group_0/attention_1/output/de

In [34]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [35]:
freeze_graph('albert-tiny-dependency', strings)

INFO:tensorflow:Restoring parameters from albert-tiny-dependency/model.ckpt
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 40 variables.
INFO:tensorflow:Converted 40 variables to const ops.
1730 ops in the final graph.


In [36]:
string = 'husein makan ayam'

import re

def entities_textcleaning(string, lowering = False):
    """
    use by entities recognition, pos recognition and dependency parsing
    """
    string = re.sub('[^A-Za-z0-9\-\/() ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    original_string = string.split()
    if lowering:
        string = string.lower()
    string = [
        (original_string[no], word.title() if word.isupper() else word)
        for no, word in enumerate(string.split())
        if len(word)
    ]
    return [s[0] for s in string], [s[1] for s in string]

def parse_X(left):
    bert_tokens = ['[CLS]']
    for no, orig_token in enumerate(left):
        t = tokenizer.tokenize(orig_token)
        bert_tokens.extend(t)
    bert_tokens.append("[SEP]")
    return tokenizer.convert_tokens_to_ids(bert_tokens), bert_tokens

sequence = entities_textcleaning(string)[1]
parsed_sequence, bert_sequence = parse_X(sequence)

In [37]:
def merge_sentencepiece_tokens_tagging(x, y):
    new_paired_tokens = []
    n_tokens = len(x)
    rejected = ['[CLS]', '[SEP]']

    i = 0

    while i < n_tokens:

        current_token, current_label = x[i], y[i]
        if not current_token.startswith('▁') and current_token not in rejected:
            previous_token, previous_label = new_paired_tokens.pop()
            merged_token = previous_token
            merged_label = [previous_label]
            while (
                not current_token.startswith('▁')
                and current_token not in rejected
            ):
                merged_token = merged_token + current_token.replace('▁', '')
                merged_label.append(current_label)
                i = i + 1
                current_token, current_label = x[i], y[i]
            merged_label = merged_label[0]
            new_paired_tokens.append((merged_token, merged_label))

        else:
            new_paired_tokens.append((current_token, current_label))
            i = i + 1

    words = [
        i[0].replace('▁', '')
        for i in new_paired_tokens
        if i[0] not in rejected
    ]
    labels = [i[1] for i in new_paired_tokens if i[0] not in rejected]
    return words, labels

In [38]:
import boto3

bucketName = 'huseinhouse-storage'
Key = 'albert-tiny-dependency/frozen_model.pb'
outPutname = "v34/dependency/albert-tiny-dependency.pb"

s3 = boto3.client('s3')

s3.upload_file(Key,bucketName,outPutname)