In [1]:
# !wget https://raw.githubusercontent.com/UniversalDependencies/UD_English-EWT/master/en_ewt-ud-dev.conllu
# !wget https://raw.githubusercontent.com/UniversalDependencies/UD_English-EWT/master/en_ewt-ud-train.conllu
# !wget https://raw.githubusercontent.com/UniversalDependencies/UD_English-EWT/master/en_ewt-ud-test.conllu

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [3]:
import malaya
import re
from malaya.texts._text_functions import split_into_sentences
from malaya.texts import _regex
import numpy as np
import itertools

tokenizer = malaya.preprocessing._tokenizer
splitter = split_into_sentences

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [4]:
def is_number_regex(s):
    if re.match("^\d+?\.\d+?$", s) is None:
        return s.isdigit()
    return True

def preprocessing(w):
    if is_number_regex(w):
        return '<NUM>'
    elif re.match(_regex._money, w):
        return '<MONEY>'
    elif re.match(_regex._date, w):
        return '<DATE>'
    elif re.match(_regex._expressions['email'], w):
        return '<EMAIL>'
    elif re.match(_regex._expressions['url'], w):
        return '<URL>'
    else:
        w = ''.join(''.join(s)[:2] for _, s in itertools.groupby(w))
        return w

In [5]:
word2idx = {'PAD': 0,'UNK':1, '_ROOT': 2}
tag2idx = {'PAD': 0, '_<ROOT>': 1}
char2idx = {'PAD': 0,'UNK':1, '_ROOT': 2}
word_idx = 3
tag_idx = 2
char_idx = 3

special_tokens = ['<NUM>', '<MONEY>', '<DATE>', '<URL>', '<EMAIL>']

for t in special_tokens:
    word2idx[t] = word_idx
    word_idx += 1
    char2idx[t] = char_idx
    char_idx += 1
    
word2idx, char2idx

({'PAD': 0,
  'UNK': 1,
  '_ROOT': 2,
  '<NUM>': 3,
  '<MONEY>': 4,
  '<DATE>': 5,
  '<URL>': 6,
  '<EMAIL>': 7},
 {'PAD': 0,
  'UNK': 1,
  '_ROOT': 2,
  '<NUM>': 3,
  '<MONEY>': 4,
  '<DATE>': 5,
  '<URL>': 6,
  '<EMAIL>': 7})

In [6]:
PAD = "_PAD"
PAD_POS = "_PAD_POS"
PAD_TYPE = "_<PAD>"
PAD_CHAR = "_PAD_CHAR"
ROOT = "_ROOT"
ROOT_POS = "_ROOT_POS"
ROOT_TYPE = "_<ROOT>"
ROOT_CHAR = "_ROOT_CHAR"
END = "_END"
END_POS = "_END_POS"
END_TYPE = "_<END>"
END_CHAR = "_END_CHAR"

def process_corpus(corpus, until = None):
    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx
    sentences, words, depends, labels, pos, chars = [], [], [], [], [], []
    temp_sentence, temp_word, temp_depend, temp_label, temp_pos = [], [], [], [], []
    first_time = True
    for sentence in corpus:
        try:
            if len(sentence):
                if sentence[0] == '#':
                    continue
                if first_time:
                    print(sentence)
                    first_time = False
                sentence = sentence.split('\t')
                for c in sentence[1]:
                    if c not in char2idx:
                        char2idx[c] = char_idx
                        char_idx += 1
                if sentence[7] not in tag2idx:
                    tag2idx[sentence[7]] = tag_idx
                    tag_idx += 1
                sentence[1] = preprocessing(sentence[1])
                if sentence[1] not in word2idx:
                    word2idx[sentence[1]] = word_idx
                    word_idx += 1
                temp_word.append(word2idx[sentence[1]])
                temp_depend.append(int(sentence[6]))
                temp_label.append(tag2idx[sentence[7]])
                temp_sentence.append(sentence[1])
                temp_pos.append(sentence[3])
            else:
                if len(temp_sentence) < 2 or len(temp_word) != len(temp_label):
                    temp_word = []
                    temp_depend = []
                    temp_label = []
                    temp_sentence = []
                    temp_pos = []
                    continue
                words.append([word2idx['_ROOT']] + temp_word)
                depends.append([0] + temp_depend)
                labels.append([tag2idx['_<ROOT>']] + temp_label)
                sentences.append([ROOT] + temp_sentence)
                pos.append([ROOT_POS] + temp_pos)
                char_ = [[char2idx['_ROOT']]]
                for w in temp_sentence:
                    if w in char2idx:
                        char_.append([char2idx[w]])
                    else:
                        char_.append([char2idx[c] for c in w])
                chars.append(char_)
                temp_word = []
                temp_depend = []
                temp_label = []
                temp_sentence = []
                temp_pos = []
        except Exception as e:
            print(e, sentence)
    return sentences[:-1], words[:-1], depends[:-1], labels[:-1], pos[:-1], chars[:-1]

In [7]:
def _obtain_child_index_for_left2right(heads):
    child_ids = [[] for _ in range(len(heads))]
    # skip the symbolic root.
    for child in range(1, len(heads)):
        head = heads[child]
        child_ids[head].append(child)
    return child_ids


def _obtain_child_index_for_inside_out(heads):
    child_ids = [[] for _ in range(len(heads))]
    for head in range(len(heads)):
        # first find left children inside-out
        for child in reversed(range(1, head)):
            if heads[child] == head:
                child_ids[head].append(child)
        # second find right children inside-out
        for child in range(head + 1, len(heads)):
            if heads[child] == head:
                child_ids[head].append(child)
    return child_ids


def _obtain_child_index_for_depth(heads, reverse):
    def calc_depth(head):
        children = child_ids[head]
        max_depth = 0
        for child in children:
            depth = calc_depth(child)
            child_with_depth[head].append((child, depth))
            max_depth = max(max_depth, depth + 1)
        child_with_depth[head] = sorted(child_with_depth[head], key=lambda x: x[1], reverse=reverse)
        return max_depth

    child_ids = _obtain_child_index_for_left2right(heads)
    child_with_depth = [[] for _ in range(len(heads))]
    calc_depth(0)
    return [[child for child, depth in child_with_depth[head]] for head in range(len(heads))]


def _generate_stack_inputs(heads, types, prior_order):
    if prior_order == 'deep_first':
        child_ids = _obtain_child_index_for_depth(heads, True)
    elif prior_order == 'shallow_first':
        child_ids = _obtain_child_index_for_depth(heads, False)
    elif prior_order == 'left2right':
        child_ids = _obtain_child_index_for_left2right(heads)
    elif prior_order == 'inside_out':
        child_ids = _obtain_child_index_for_inside_out(heads)
    else:
        raise ValueError('Unknown prior order: %s' % prior_order)

    stacked_heads = []
    children = []
    siblings = []
    stacked_types = []
    skip_connect = []
    prev = [0 for _ in range(len(heads))]
    sibs = [0 for _ in range(len(heads))]
    stack = [0]
    position = 1
    while len(stack) > 0:
        head = stack[-1]
        stacked_heads.append(head)
        siblings.append(sibs[head])
        child_id = child_ids[head]
        skip_connect.append(prev[head])
        prev[head] = position
        if len(child_id) == 0:
            children.append(head)
            sibs[head] = 0
            stacked_types.append(tag2idx['PAD'])
            stack.pop()
        else:
            child = child_id.pop(0)
            children.append(child)
            sibs[head] = child
            stack.append(child)
            stacked_types.append(types[child])
        position += 1

    return stacked_heads, children, siblings, stacked_types, skip_connect

In [8]:
with open('en_ewt-ud-dev.conllu') as fopen:
    dev = fopen.read().split('\n')

sentences_dev, words_dev, depends_dev, labels_dev, _, seq_dev = process_corpus(dev)

1	From	from	ADP	IN	_	3	case	3:case	_
invalid literal for int() with base 10: '_' ['10.1', 'has', 'have', 'VERB', 'VBZ', '_', '_', '_', '8:parataxis', 'CopyOf=-1']
invalid literal for int() with base 10: '_' ['21.1', 'has', 'have', 'VERB', 'VBZ', '_', '_', '_', '16:conj:and', 'CopyOf=-1']


In [9]:
stacked_heads_test, children_test, siblings_test, stacked_types_test = [], [], [], []
for i in range(len(sentences_dev)):
    stacked_heads, children, siblings, stacked_types, _ = _generate_stack_inputs(depends_dev[i], 
                                                                                 labels_dev[i], 'deep_first')
    stacked_heads_test.append(stacked_heads)
    children_test.append(children)
    siblings_test.append(siblings)
    stacked_types_test.append(stacked_types)

In [10]:
with open('en_ewt-ud-test.conllu') as fopen:
    test = fopen.read().split('\n')

sentences_test, words_test, depends_test, labels_test, _, seq_test = process_corpus(test)


1	What	what	PRON	WP	PronType=Int	0	root	0:root	_
invalid literal for int() with base 10: '_' ['24.1', 'left', 'left', 'VERB', 'VBN', 'Tense=Past|VerbForm=Part', '_', '_', '6:parataxis', 'CopyOf=6']


In [11]:
for i in range(len(sentences_test)):
    stacked_heads, children, siblings, stacked_types, _ = _generate_stack_inputs(depends_test[i], 
                                                                                 labels_test[i], 'deep_first')
    stacked_heads_test.append(stacked_heads)
    children_test.append(children)
    siblings_test.append(siblings)
    stacked_types_test.append(stacked_types)
    
sentences_test.extend(sentences_dev)
words_test.extend(words_dev)
depends_test.extend(depends_dev)
labels_test.extend(labels_dev)
seq_test.extend(seq_dev)

In [12]:
with open('en_ewt-ud-train.conllu') as fopen:
    train = fopen.read().split('\n')

sentences_train, words_train, depends_train, labels_train, _, _ = process_corpus(train)

stacked_heads_train, children_train, siblings_train, stacked_types_train = [], [], [], []
for i in range(len(sentences_train)):
    stacked_heads, children, siblings, stacked_types, _ = _generate_stack_inputs(depends_train[i], 
                                                                                 labels_train[i], 'deep_first')
    stacked_heads_train.append(stacked_heads)
    children_train.append(children)
    siblings_train.append(siblings)
    stacked_types_train.append(stacked_types)

1	Al	Al	PROPN	NNP	Number=Sing	0	root	0:root	SpaceAfter=No
invalid literal for int() with base 10: '_' ['8.1', 'reported', 'report', 'VERB', 'VBN', 'Tense=Past|VerbForm=Part|Voice=Pass', '_', '_', '5:conj:and', 'CopyOf=5']
invalid literal for int() with base 10: '_' ['22.1', 'used', 'use', 'VERB', 'VBN', 'Tense=Past|VerbForm=Part', '_', '_', '13:advcl:with|17:conj:and', 'CopyOf=17']
invalid literal for int() with base 10: '_' ['22.1', 'used', 'use', 'VERB', 'VBN', 'Tense=Past|VerbForm=Part', '_', '_', '13:advcl:with|17:conj:and', 'CopyOf=17']
invalid literal for int() with base 10: '_' ['11.1', 'called', 'call', 'VERB', 'VBN', 'Tense=Past|VerbForm=Part|Voice=Pass', '_', '_', '3:conj:and', 'CopyOf=3']
invalid literal for int() with base 10: '_' ['14.1', 'is', 'be', 'VERB', 'VBZ', '_', '_', '_', '1:conj:and', 'CopyOf=1']
invalid literal for int() with base 10: '_' ['20.1', 'reflect', 'reflect', 'VERB', 'VBP', 'Mood=Ind|Tense=Pres|VerbForm=Fin', '_', '_', '7:acl:relcl|9:conj', 'CopyOf=9']


In [13]:
len(sentences_train), len(sentences_test)

(12000, 3824)

In [14]:
idx2word = {v:k for k, v in word2idx.items()}
idx2tag = {v:k for k, v in tag2idx.items()}
len(idx2word)

21974

In [15]:
import tensorflow as tf

In [16]:
from enum import Enum

class PriorOrder(Enum):
    DEPTH = 0
    INSIDE_OUT = 1
    LEFT2RIGTH = 2

class BiAAttention:
    def __init__(self, input_size_encoder, input_size_decoder, num_labels):
        self.input_size_encoder = input_size_encoder
        self.input_size_decoder = input_size_decoder
        self.num_labels = num_labels
        
        self.W_d = tf.get_variable("W_d", shape=[self.num_labels, self.input_size_decoder],
           initializer=tf.contrib.layers.xavier_initializer())
        self.W_e = tf.get_variable("W_e", shape=[self.num_labels, self.input_size_encoder],
           initializer=tf.contrib.layers.xavier_initializer())
        self.U = tf.get_variable("U", shape=[self.num_labels, self.input_size_decoder, self.input_size_encoder],
           initializer=tf.contrib.layers.xavier_initializer())
        
    def forward(self, input_d, input_e, mask_d=None, mask_e=None):
        batch = tf.shape(input_d)[0]
        length_decoder = tf.shape(input_d)[1]
        length_encoder = tf.shape(input_e)[1]
        out_d = tf.expand_dims(tf.matmul(self.W_d, tf.transpose(input_d, [0, 2, 1])), 3)
        out_e = tf.expand_dims(tf.matmul(self.W_e, tf.transpose(input_e, [0, 2, 1])), 2)
        output = tf.matmul(tf.expand_dims(input_d, 1), self.U)
        output = tf.matmul(output, tf.transpose(tf.expand_dims(input_e, 1), [0, 1, 3, 2]))
        
        output = output + out_d + out_e
        
        if mask_d is not None:
            d = tf.expand_dims(tf.expand_dims(mask_d, 1), 3)
            e = tf.expand_dims(tf.expand_dims(mask_e, 1), 2)
            output = output * d * e
            
        return output
    
class BiLinear:
    def __init__(self, left_features, right_features, out_features):
        self.left_features = left_features
        self.right_features = right_features
        self.out_features = out_features
        
        self.U = tf.get_variable("U-bi", shape=[out_features, left_features, right_features],
           initializer=tf.contrib.layers.xavier_initializer())
        self.W_l = tf.get_variable("Wl", shape=[out_features, left_features],
           initializer=tf.contrib.layers.xavier_initializer())
        self.W_r = tf.get_variable("Wr", shape=[out_features, right_features],
           initializer=tf.contrib.layers.xavier_initializer())
    
    def forward(self, input_left, input_right):
        left_size = tf.shape(input_left)
        output_shape = tf.concat([left_size[:-1], [self.out_features]], axis = 0)
        batch = tf.cast(tf.reduce_prod(left_size[:-1]), tf.int32)
        input_left = tf.reshape(input_left, (batch, self.left_features))
        input_right = tf.reshape(input_right, (batch, self.right_features))
        tiled = tf.tile(tf.expand_dims(input_left, axis = 0), (self.out_features,1,1))
        output = tf.transpose(tf.reduce_sum(tf.matmul(tiled, self.U), axis = 2))
        output = output + tf.matmul(input_left, tf.transpose(self.W_l))\
        + tf.matmul(input_right, tf.transpose(self.W_r))
        
        return tf.reshape(output, output_shape)

class StackPointer:
    def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size,
                 input_size_decoder, hidden_size, layers,
                 num_labels, arc_space, type_space):
        
        def cells(size, reuse=False):
            return tf.nn.rnn_cell.LSTMCell(size,
                                           initializer=tf.orthogonal_initializer(),reuse=reuse,
                                           state_is_tuple=False)
        
        self.word_embedd = tf.Variable(tf.random_uniform([num_words, word_dim], -1, 1))
        self.char_embedd = tf.Variable(tf.random_uniform([num_chars, char_dim], -1, 1))
        self.conv1d = tf.layers.Conv1D(num_filters, kernel_size, 1, padding='VALID')
        self.num_labels = num_labels
        self.prior_order = PriorOrder.DEPTH
        self.char_dim = char_dim
        self.layers = layers
        self.encoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(layers)],
                                                   state_is_tuple=False)
        self.encoder_char = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(layers)],
                                                   state_is_tuple=False)
        self.decoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(layers)],
                                                   state_is_tuple=False)
        self.hidden_size = hidden_size
        self.arc_space = arc_space
        
        
        self.src_dense = tf.layers.Dense(hidden_size)
        self.hx_dense = tf.layers.Dense(hidden_size)

        self.arc_h = tf.layers.Dense(arc_space)
        self.arc_c = tf.layers.Dense(arc_space)
        self.attention = BiAAttention(arc_space, arc_space, 1)

        self.type_h = tf.layers.Dense(type_space)
        self.type_c = tf.layers.Dense(type_space)
        self.bilinear = BiLinear(type_space, type_space, self.num_labels)
        
    def encode(self, input_word, input_char):
        word = tf.nn.embedding_lookup(self.word_embedd, input_word)
        char = tf.nn.embedding_lookup(self.char_embedd, input_char)
        s = tf.shape(char)
        char = tf.reshape(
            char, shape = [s[0] * s[1], s[-2], self.char_dim]
        )
        output, _ = tf.nn.dynamic_rnn(self.encoder_char, char, dtype = tf.float32,
                                      scope = 'encoder-char')
        output = tf.reshape(
            output[:, -1], shape = [s[0], s[1], self.hidden_size]
        )
        word_embedded = tf.concat([word, output], axis = -1)
        output, hn = tf.nn.dynamic_rnn(self.encoder, word_embedded, dtype = tf.float32,
                                      scope = 'encoder')
        return output, hn
    
    def decode(self, output_encoder, heads, heads_stack, siblings, hn):
        batch = tf.shape(output_encoder)[0]
        batch_index = tf.range(0, batch)
        t = tf.transpose(heads_stack)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        src_encoding = tf.gather_nd(output_encoder, concatenated)
        
        mask_sibs = tf.expand_dims(tf.cast(tf.not_equal(siblings, 0), tf.float32), axis = 2)
        t = tf.transpose(siblings)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        output_enc_sibling = tf.gather_nd(output_encoder, concatenated) * mask_sibs
        src_encoding = src_encoding + output_enc_sibling
        
        t = tf.transpose(heads_stack)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                       tf.expand_dims(t, axis = 0)],axis = 0))
        g = tf.transpose(tf.gather_nd(heads, concatenated))
        broadcasted = tf.broadcast_to(batch_index, tf.shape(g))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                               tf.expand_dims(g, axis = 0)],axis = 0))
        output_enc_gpar = tf.gather_nd(output_encoder, concatenated)
        src_encoding = src_encoding + output_enc_gpar
        
        src_encoding = tf.nn.elu(self.src_dense(src_encoding))
        output, hn = tf.nn.dynamic_rnn(self.decoder, src_encoding, dtype = tf.float32,
                                      initial_state = hn,
                                      scope = 'decoder')
        return output, hn
    
    def loss(self, input_word, input_char, 
             heads, stacked_heads, children, siblings, stacked_types,
             mask_e, mask_d,
             label_smooth = 1.0):
        
        output_enc, hn_enc = self.encode(input_word, input_char)
        arc_c = tf.nn.elu(self.arc_c(output_enc))
        type_c = tf.nn.elu(self.type_c(output_enc))
        
        output_dec, _ = self.decode(output_enc, heads, stacked_heads, siblings, hn_enc)
        arc_h = tf.nn.elu(self.arc_h(output_dec))
        type_h = tf.nn.elu(self.type_h(output_dec))
        
        max_len_d = tf.shape(arc_h)[1]
        
        out_arc = tf.squeeze(self.attention.forward(arc_h, arc_c, mask_d=mask_d, mask_e=mask_e), axis = 1)
        batch = tf.shape(arc_c)[0]
        max_len_e = tf.shape(arc_c)[1]
        batch_index = tf.range(0, batch)
        
        t = tf.transpose(children)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0), 
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        type_c = tf.gather_nd(type_c, concatenated)
        out_type = self.bilinear.forward(type_h, type_c)
        print(out_arc.shape,out_type.shape)
        
        minus_inf = -1e8
        minus_mask_d = (1 - mask_d) * minus_inf
        minus_mask_e = (1 - mask_e) * minus_inf
        
        out_arc = out_arc + tf.expand_dims(minus_mask_d, 2) + tf.expand_dims(minus_mask_e, 1)
        loss_arc = tf.nn.log_softmax(out_arc, axis = 2)
        loss_type = tf.nn.log_softmax(out_type, axis = 2)
        coverage = tf.cumsum(tf.exp(loss_arc), axis = 1)
        
        mask_leaf = tf.cast(tf.equal(children, stacked_heads), tf.float32)
        mask_non_leaf = (1.0 - mask_leaf)
        
        mask_d_2 = tf.expand_dims(mask_d, 2)
        mask_e_1 = tf.expand_dims(mask_e, 1)
        
        loss_arc = loss_arc * mask_d_2 * mask_e_1
        coverage = coverage * mask_d_2 * mask_e_1
        loss_type = loss_type * mask_d_2
        mask_leaf = mask_leaf * mask_d
        mask_non_leaf = mask_non_leaf * mask_d
        num_leaf = tf.reduce_sum(mask_leaf)
        num_non_leaf = tf.reduce_sum(mask_non_leaf)
        head_index = tf.tile(tf.expand_dims(tf.range(0, max_len_d), 1), [1, batch])
        
        t = tf.transpose(children)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0),
                                               tf.expand_dims(head_index, axis = 0),
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        loss_arc = tf.gather_nd(loss_arc, concatenated)
        
        t = tf.transpose(stacked_types)
        broadcasted = tf.broadcast_to(batch_index, tf.shape(t))
        concatenated = tf.transpose(tf.concat([tf.expand_dims(broadcasted, axis = 0),
                                               tf.expand_dims(head_index, axis = 0),
                                               tf.expand_dims(t, axis = 0)], axis = 0))
        loss_type = tf.gather_nd(loss_type, concatenated)
        
        loss_arc_leaf = loss_arc * mask_leaf
        loss_arc_non_leaf = loss_arc * mask_non_leaf

        loss_type_leaf = loss_type * mask_leaf
        loss_type_non_leaf = loss_type * mask_non_leaf
        
        loss_cov = tf.clip_by_value(coverage - 2.0, 0.0, 100.0)
        
        return (tf.reduce_sum(-loss_arc_leaf) / num_leaf, 
                tf.reduce_sum(-loss_arc_non_leaf) / num_non_leaf,
                tf.reduce_sum(-loss_type_leaf) / num_leaf, 
                tf.reduce_sum(-loss_type_non_leaf) / num_non_leaf,
                tf.reduce_sum(loss_cov) / (num_leaf + num_non_leaf), 
                num_leaf, 
                num_non_leaf)
    
class Model:
    def __init__(self, learning_rate = 1e-3, cov = 0.0):
        self.stackpointer = StackPointer(word_dim = 128, 
                            num_words = len(word2idx), 
                            char_dim = 128, 
                            num_chars = len(char2idx), 
                            num_filters = 128, 
                            kernel_size = 3,
                            input_size_decoder = 256, 
                            hidden_size = 256, 
                            layers = 1,
                            num_labels = len(tag2idx), 
                            arc_space = 128, 
                            type_space = 128)
        self.words = tf.placeholder(tf.int32, (None, None))
        self.chars = tf.placeholder(tf.int32, (None, None, None))
        self.heads = tf.placeholder(tf.int32, (None, None))
        self.stacked_heads = tf.placeholder(tf.int32, (None, None))
        self.siblings = tf.placeholder(tf.int32, (None, None))
        self.childrens = tf.placeholder(tf.int32, (None, None))
        self.stacked_types = tf.placeholder(tf.int32, (None, None))
        self.mask_e = tf.placeholder(tf.float32, (None, None))
        self.mask_d = tf.placeholder(tf.float32, (None, None))
        loss_arc_leaf, loss_arc_non_leaf, \
        loss_type_leaf, loss_type_non_leaf, \
        loss_cov, num_leaf, num_non_leaf = self.stackpointer.loss(self.words, self.chars, self.heads, 
                               self.stacked_heads, self.childrens, 
                               self.siblings, self.stacked_types,
                               self.mask_e, self.mask_d)
        loss_arc = loss_arc_leaf + loss_arc_non_leaf
        loss_type = loss_type_leaf + loss_type_non_leaf
        self.cost = loss_arc + loss_type + cov * loss_cov
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        
        self.encode_output, self.encode_hidden = self.stackpointer.encode(self.words, self.chars)
        self.encode_arc_c = tf.nn.elu(self.stackpointer.arc_c(self.encode_output))
        self.type_c = tf.nn.elu(self.stackpointer.type_c(self.encode_output))
        
        self.src_encoding = tf.placeholder(tf.float32, (None, self.stackpointer.hidden_size))
        self.arc_c = tf.placeholder(tf.float32, (None, self.stackpointer.arc_space))
        self.hx = tf.placeholder(tf.float32, (None, 
                                              self.stackpointer.hidden_size * 2 * self.stackpointer.layers))      
        
        src_encoding = tf.nn.elu(self.stackpointer.src_dense(self.src_encoding))
        output_dec, hx = self.stackpointer.decoder(src_encoding, self.hx)
        arc_h = tf.nn.elu(self.stackpointer.arc_h(tf.expand_dims(output_dec, axis = 1)))
        type_h = tf.nn.elu(self.stackpointer.type_h(output_dec))
        out_arc = self.stackpointer.attention.forward(arc_h, tf.expand_dims(self.arc_c, 0))
        out_arc = tf.squeeze(tf.squeeze(out_arc, axis = 1), axis = 1)
        self.hyp_scores = tf.nn.log_softmax(out_arc, axis = 1)
        self.type_h = type_h
        self.decode_hidden = hx
        
        self.holder_type_h = tf.placeholder(tf.float32, (None, self.stackpointer.arc_space))
        self.holder_type_c = tf.placeholder(tf.float32, (None, self.stackpointer.arc_space))
        
        out_type = self.stackpointer.bilinear.forward(self.holder_type_h, self.holder_type_c)
        self.hyp_type_scores = tf.nn.log_softmax(out_type, axis = 1)

In [17]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model()
sess.run(tf.global_variables_initializer())

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
(?, ?, ?) (?, ?, 52)
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [18]:
train_X = words_train
train_Y = labels_train
train_depends = depends_train
train_char = sentences_train

test_X = words_test
test_Y = labels_test
test_depends = depends_test
test_char = sentences_test

In [19]:
prior_order = model.stackpointer.prior_order

def decode_sentence(output_enc, arc_c, type_c, hx, beam, length, ordered, leading_symbolic):
    def valid_hyp(base_id, child_id, head):
        if constraints[base_id, child_id]:
            return False
        elif not ordered or prior_order == PriorOrder.DEPTH or child_orders[base_id, head] == 0:
            return True
        elif prior_order == PriorOrder.LEFT2RIGTH:
            return child_id > child_orders[base_id, head]
        else:
            if child_id < head:
                return child_id < child_orders[base_id, head] < head
            else:
                return child_id > child_orders[base_id, head]
        
    length = output_enc.shape[0] if length is None else length
            
    stacked_heads = [[0] for _ in range(beam)]
    grand_parents = [[0] for _ in range(beam)]
    siblings = [[0] for _ in range(beam)]
    children = np.zeros((beam, 2 * length - 1))
    stacked_types = np.zeros((beam, 2 * length - 1))
    
    children = np.zeros((beam, 2 * length - 1))
    stacked_types = np.zeros((beam, 2 * length - 1))
    hypothesis_scores = [0]
    constraints = np.zeros([beam, length], dtype=np.bool)
    constraints[:, 0] = True
    child_orders = np.zeros([beam, length], dtype=np.int64)

    new_stacked_heads = [[] for _ in range(beam)]
    new_grand_parents = [[] for _ in range(beam)]
    new_siblings = [[] for _ in range(beam)]
    new_skip_connects = [[] for _ in range(beam)]
    new_children = np.zeros((beam, 2 * length - 1))
    new_stacked_types = np.zeros((beam, 2 * length - 1))
    num_hyp = 1
    num_step = 2 * length - 1
    for t in range(num_step):
        heads = np.array([stacked_heads[i][-1] for i in range(num_hyp)])
        gpars = np.array([grand_parents[i][-1] for i in range(num_hyp)])
        sibs = np.array([siblings[i].pop() for i in range(num_hyp)])
        src_encoding = output_enc[heads]
        mask_sibs = np.expand_dims((np.array(sibs) != 0).astype(np.float32), axis = 1)
        output_enc_sibling = output_enc[sibs] * mask_sibs
        src_encoding = src_encoding + output_enc_sibling
        output_enc_gpar = output_enc[gpars]
        src_encoding = src_encoding + output_enc_gpar
        hyp_scores, type_h, hx = sess.run([model.hyp_scores, model.type_h, model.decode_hidden],
                                     feed_dict = {model.src_encoding: src_encoding,
                                                  model.arc_c: arc_c,
                                                  model.hx: hx})
        
        new_hypothesis_scores = np.expand_dims(hypothesis_scores[:num_hyp], axis = 1) + hyp_scores
        new_hypothesis_scores = new_hypothesis_scores.reshape((-1))
        hyp_index = np.argsort(new_hypothesis_scores)[::-1]
        new_hypothesis_scores = np.sort(new_hypothesis_scores)[::-1]
        base_index = (hyp_index // length)
        child_index = hyp_index % length
        cc = 0
        ids = []
        new_constraints = np.zeros([beam, length], dtype=np.bool)
        new_child_orders = np.zeros([beam, length], dtype=np.int64)
        for id_ in range(num_hyp * length):
            base_id = base_index[id_]
            if base_id:
                ids.append(id_)
                continue
            child_id = child_index[id_]
            head = heads[base_id]
            new_hyp_score = new_hypothesis_scores[id_]
            if child_id == head:
                if head != 0 or t + 1 == num_step:
                    new_constraints[cc] = constraints[base_id]
                    new_child_orders[cc] = child_orders[base_id]

                    new_stacked_heads[cc] = [stacked_heads[base_id][i] for i in range(len(stacked_heads[base_id]))]
                    new_stacked_heads[cc].pop()

                    new_grand_parents[cc] = [grand_parents[base_id][i] for i in range(len(grand_parents[base_id]))]
                    new_grand_parents[cc].pop()

                    new_siblings[cc] = [siblings[base_id][i] for i in range(len(siblings[base_id]))]

                    new_children[cc] = children[base_id]
                    new_children[cc, t] = child_id

                    hypothesis_scores[cc] = new_hyp_score
                    ids.append(id_)
                    cc += 1
            elif valid_hyp(base_id, child_id, head):
                new_constraints[cc] = constraints[base_id]
                new_constraints[cc, child_id] = True

                new_child_orders[cc] = child_orders[base_id]
                new_child_orders[cc, head] = child_id

                new_stacked_heads[cc] = [stacked_heads[base_id][i] for i in range(len(stacked_heads[base_id]))]
                new_stacked_heads[cc].append(child_id)

                new_grand_parents[cc] = [grand_parents[base_id][i] for i in range(len(grand_parents[base_id]))]
                new_grand_parents[cc].append(head)

                new_siblings[cc] = [siblings[base_id][i] for i in range(len(siblings[base_id]))]
                new_siblings[cc].append(child_id)
                new_siblings[cc].append(0)

                new_children[cc] = children[base_id]
                new_children[cc, t] = child_id

                hypothesis_scores[cc] = new_hyp_score
                ids.append(id_)
                cc += 1
                    
            if cc == beam:
                break
            
        num_hyp = len(ids)
        if num_hyp == 0:
            return None
        else:
            index = np.array(ids)
        base_index = base_index[index]
        child_index = child_index[index]
        hyp_type_scores = sess.run(model.hyp_type_scores,
                                  feed_dict = {
                                      model.holder_type_h: type_h[base_index],
                                      model.holder_type_c: type_c[child_index]
                                  })
        hyp_types = np.argmax(hyp_type_scores, axis = 1)
        hyp_type_scores = np.max(hyp_type_scores, axis = 1)
        hypothesis_scores[:num_hyp] = hypothesis_scores[:num_hyp] + hyp_type_scores

        for i in range(num_hyp):
            base_id = base_index[i]
            new_stacked_types[i] = stacked_types[base_id]
            new_stacked_types[i, t] = hyp_types[i]

        stacked_heads = [[new_stacked_heads[i][j] for j in range(len(new_stacked_heads[i]))] for i in range(num_hyp)]
        grand_parents = [[new_grand_parents[i][j] for j in range(len(new_grand_parents[i]))] for i in range(num_hyp)]
        siblings = [[new_siblings[i][j] for j in range(len(new_siblings[i]))] for i in range(num_hyp)]
        constraints = new_constraints
        child_orders = new_child_orders
        children = np.copy(new_children)
        stacked_types = np.copy(new_stacked_types)
        
    children = children[0].astype(np.int32)
    stacked_types = stacked_types[0].astype(np.int32)
    heads = np.zeros(length, dtype=np.int32)
    types = np.zeros(length, dtype=np.int32)
    stack = [0]
    for i in range(num_step):
        head = stack[-1]
        child = children[i]
        type_ = stacked_types[i]
        if child != head:
            heads[child] = head
            types[child] = type_
            stack.append(child)
        else:
            stacked_types[i] = 0
            stack.pop()

    return heads, types, length, children, stacked_types   
        
def decode(input_word, input_char, length = None, beam = 1, leading_symbolic=0, ordered=True):
    
    arc_c, type_c, output, hn = sess.run([model.encode_arc_c, model.type_c, 
                                          model.encode_output, model.encode_hidden],
            feed_dict = {model.words: input_word, model.chars: input_char})
    batch, max_len_e, _ = output.shape

    heads = np.zeros([batch, max_len_e], dtype=np.int32)
    types = np.zeros([batch, max_len_e], dtype=np.int32)

    children = np.zeros([batch, 2 * max_len_e - 1], dtype=np.int32)
    stack_types = np.zeros([batch, 2 * max_len_e - 1], dtype=np.int32)
    
    for b in range(batch):
        sent_len = None if length is None else length[b]
        preds = decode_sentence(output[b], arc_c[b], type_c[b], [hn[b]], 
                                beam, sent_len, ordered, leading_symbolic)
        if preds is None:
            preds = decode_sentence(output[b], arc_c[b], type_c[b], [hn[b]], beam, 
                                         sent_len, False, leading_symbolic)
        hids, tids, sent_len, chids, stids = preds
        heads[b, :sent_len] = hids
        types[b, :sent_len] = tids

        children[b, :2 * sent_len - 1] = chids
        stack_types[b, :2 * sent_len - 1] = stids

    return heads, types, children, stack_types

In [20]:
def generate_char_seq(batch, UNK = 2):
    maxlen_c = max([len(k) for k in batch])
    x = [[len(i) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((len(batch),maxlen_c,maxlen),dtype=np.int32)
    for i in range(len(batch)):
        for k in range(len(batch[i])):
            for no, c in enumerate(batch[i][k]):
                temp[i,k,-1-no] = char2idx.get(c, UNK)
    return temp

In [33]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

batch_x = train_X[:5]
batch_x = pad_sequences(batch_x,padding='post')
batch_char = train_char[:5]
batch_char = generate_char_seq(batch_char)
batch_y = train_Y[:5]
batch_y = pad_sequences(batch_y,padding='post')
batch_depends = train_depends[:5]
batch_depends = pad_sequences(batch_depends,padding='post')

batch_stacked_heads = stacked_heads_train[:5]
batch_stacked_heads = pad_sequences(batch_stacked_heads,padding='post')
batch_children = children_train[:5]
batch_children = pad_sequences(batch_children,padding='post')
batch_siblings = siblings_train[:5]
batch_siblings = pad_sequences(batch_siblings,padding='post')
batch_stacked_types = stacked_types_train[:5]
batch_stacked_types = pad_sequences(batch_stacked_types,padding='post')
batch_e = np.zeros(batch_x.shape)
batch_d = np.zeros(batch_stacked_heads.shape)
nonzero = np.count_nonzero(batch_x, axis = 1)

for no, i in enumerate(nonzero):
    batch_e[no,:i] = 1.0
for no, i in enumerate(nonzero * 2 - 1):
    batch_d[no,:i] = 1.0
    
batch_x.shape, batch_stacked_heads.shape

((5, 37), (5, 73))

In [22]:
feed_dict = {model.words: batch_x,
             model.chars: batch_char,
             model.heads: batch_depends,
             model.stacked_heads: batch_stacked_heads,
             model.childrens: batch_children,
             model.siblings: batch_siblings,
             model.stacked_types: batch_stacked_types,
             model.mask_e: batch_e,
             model.mask_d: batch_d}
sess.run(model.cost, feed_dict = feed_dict)

14.264593

In [34]:
%%time
decode(batch_x, batch_char)

CPU times: user 2.27 s, sys: 251 ms, total: 2.52 s
Wall time: 1.32 s


(array([[ 0,  0,  1,  0,  1,  6,  1,  1,  7,  0,  0, 12,  0,  0, 15,  8,
         18, 18,  7, 21, 21, 18, 23, 21,  1, 28, 28, 28, 21,  1, 34, 34,
         31, 34,  0, 34,  0],
        [ 0, 10,  3, 10,  7,  7,  7,  3, 10, 10,  0, 10, 10, 14, 10, 16,
         14, 10, 10, 23,  0,  0, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
         36, 36, 36, 36, 21],
        [ 0,  0,  1,  4,  5,  1,  9,  9,  9,  5,  9, 13, 13,  9, 13, 16,
         14,  1,  0,  0, 25,  0,  0, 25, 25, 22,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0],
        [ 0,  6,  3,  1,  6,  6,  0,  9,  9,  6, 12, 12,  9, 15, 15, 12,
          6,  0, 17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0],
        [ 0,  2,  6,  4,  2,  6,  0, 10, 10, 10,  6,  6, 16, 16, 16, 17,
          6, 16, 17, 18, 18, 22, 17, 27, 27, 27, 27, 22, 31, 31, 31, 27,
         35, 35, 35, 22,  6]], dtype=int32),
 array([[ 0,  5,  7,  8,  7, 13, 35, 28, 10,  8, 44,  7, 38,  7,  3, 35,
          2,  3,  4,  2,  3, 14,

In [24]:
from tqdm import tqdm

batch_size = 32
epoch = 15

for e in range(epoch):
    test_loss, train_loss = [], []
    
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x = train_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_char = train_char[i: index]
        batch_char = generate_char_seq(batch_char)
        batch_y = train_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = train_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')

        batch_stacked_heads = stacked_heads_train[i: index]
        batch_stacked_heads = pad_sequences(batch_stacked_heads,padding='post')
        batch_children = children_train[i: index]
        batch_children = pad_sequences(batch_children,padding='post')
        batch_siblings = siblings_train[i: index]
        batch_siblings = pad_sequences(batch_siblings,padding='post')
        batch_stacked_types = stacked_types_train[i: index]
        batch_stacked_types = pad_sequences(batch_stacked_types,padding='post')
        batch_e = np.zeros(batch_x.shape)
        batch_d = np.zeros(batch_stacked_heads.shape)
        nonzero = np.count_nonzero(batch_x, axis = 1)

        for no, i in enumerate(nonzero):
            batch_e[no,:i] = 1.0
        for no, i in enumerate(nonzero * 2 - 1):
            batch_d[no,:i] = 1.0
        
        feed_dict = {model.words: batch_x,
             model.chars: batch_char,
             model.heads: batch_depends,
             model.stacked_heads: batch_stacked_heads,
             model.childrens: batch_children,
             model.siblings: batch_siblings,
             model.stacked_types: batch_stacked_types,
             model.mask_e: batch_e,
             model.mask_d: batch_d}
        cost, _ = sess.run([model.cost, model.optimizer], feed_dict = feed_dict)
        train_loss.append(cost)
        pbar.set_postfix(cost = cost)
        
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x = test_X[i: index]
        batch_x = pad_sequences(batch_x,padding='post')
        batch_char = test_char[i: index]
        batch_char = generate_char_seq(batch_char)
        batch_y = test_Y[i: index]
        batch_y = pad_sequences(batch_y,padding='post')
        batch_depends = test_depends[i: index]
        batch_depends = pad_sequences(batch_depends,padding='post')

        batch_stacked_heads = stacked_heads_test[i: index]
        batch_stacked_heads = pad_sequences(batch_stacked_heads,padding='post')
        batch_children = children_test[i: index]
        batch_children = pad_sequences(batch_children,padding='post')
        batch_siblings = siblings_test[i: index]
        batch_siblings = pad_sequences(batch_siblings,padding='post')
        batch_stacked_types = stacked_types_test[i: index]
        batch_stacked_types = pad_sequences(batch_stacked_types,padding='post')
        batch_e = np.zeros(batch_x.shape)
        batch_d = np.zeros(batch_stacked_heads.shape)
        nonzero = np.count_nonzero(batch_x, axis = 1)

        for no, i in enumerate(nonzero):
            batch_e[no,:i] = 1.0
        for no, i in enumerate(nonzero * 2 - 1):
            batch_d[no,:i] = 1.0
        
        feed_dict = {model.words: batch_x,
             model.chars: batch_char,
             model.heads: batch_depends,
             model.stacked_heads: batch_stacked_heads,
             model.childrens: batch_children,
             model.siblings: batch_siblings,
             model.stacked_types: batch_stacked_types,
             model.mask_e: batch_e,
             model.mask_d: batch_d}
        cost = sess.run(model.cost, feed_dict = feed_dict)
        test_loss.append(cost)
        pbar.set_postfix(cost = cost)
    
    print(
    'epoch: %d, training loss: %f, valid loss: %f\n'
    % (e, np.mean(train_loss), np.mean(test_loss)))
        

train minibatch loop: 100%|██████████| 375/375 [01:01<00:00,  6.11it/s, cost=2.97]
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 19.70it/s, cost=12.1]
train minibatch loop:   0%|          | 1/375 [00:00<01:01,  6.08it/s, cost=3.28]

epoch: 0, training loss: 5.157737, valid loss: 11.861909



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.15it/s, cost=2.01]
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 18.53it/s, cost=13.8]
train minibatch loop:   0%|          | 1/375 [00:00<01:07,  5.52it/s, cost=2.31]

epoch: 1, training loss: 2.576627, valid loss: 13.340673



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.17it/s, cost=1.55] 
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 19.33it/s, cost=15.4]
train minibatch loop:   0%|          | 1/375 [00:00<01:02,  5.99it/s, cost=1.77]

epoch: 2, training loss: 1.922838, valid loss: 14.725556



train minibatch loop: 100%|██████████| 375/375 [01:01<00:00,  6.11it/s, cost=1.36] 
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 19.20it/s, cost=16.4]
train minibatch loop:   0%|          | 1/375 [00:00<01:05,  5.70it/s, cost=1.47]

epoch: 3, training loss: 1.529883, valid loss: 15.789502



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.15it/s, cost=1.12] 
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 19.20it/s, cost=17.9]
train minibatch loop:   0%|          | 1/375 [00:00<01:03,  5.88it/s, cost=1.2]

epoch: 4, training loss: 1.266019, valid loss: 17.307760



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.16it/s, cost=1.02] 
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 18.87it/s, cost=19.5]
train minibatch loop:   0%|          | 1/375 [00:00<01:03,  5.93it/s, cost=1.06]

epoch: 5, training loss: 1.066313, valid loss: 19.008535



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.16it/s, cost=0.878]
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 19.69it/s, cost=21.3]
train minibatch loop:   0%|          | 1/375 [00:00<01:02,  6.03it/s, cost=0.895]

epoch: 6, training loss: 0.908035, valid loss: 20.994354



train minibatch loop: 100%|██████████| 375/375 [01:01<00:00,  6.13it/s, cost=0.748]
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 18.86it/s, cost=22.6]
train minibatch loop:   0%|          | 1/375 [00:00<01:02,  6.03it/s, cost=0.771]

epoch: 7, training loss: 0.780265, valid loss: 22.426714



train minibatch loop: 100%|██████████| 375/375 [01:01<00:00,  6.11it/s, cost=0.636]
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 19.22it/s, cost=24.4]
train minibatch loop:   0%|          | 1/375 [00:00<01:03,  5.93it/s, cost=0.615]

epoch: 8, training loss: 0.687402, valid loss: 24.419289



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.17it/s, cost=0.628]
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 18.42it/s, cost=26.8]
train minibatch loop:   0%|          | 1/375 [00:00<01:05,  5.75it/s, cost=0.546]

epoch: 9, training loss: 0.609938, valid loss: 26.764641



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.18it/s, cost=0.613]
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 17.83it/s, cost=28.2]
train minibatch loop:   0%|          | 1/375 [00:00<01:06,  5.65it/s, cost=0.52]

epoch: 10, training loss: 0.525183, valid loss: 28.478970



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.15it/s, cost=0.538]
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 18.67it/s, cost=31.2]
train minibatch loop:   0%|          | 1/375 [00:00<01:06,  5.62it/s, cost=0.484]

epoch: 11, training loss: 0.459827, valid loss: 31.322876



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.17it/s, cost=0.512] 
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 19.11it/s, cost=32.4]
train minibatch loop:   0%|          | 1/375 [00:00<01:06,  5.59it/s, cost=0.367]

epoch: 12, training loss: 0.400364, valid loss: 33.366253



train minibatch loop: 100%|██████████| 375/375 [01:01<00:00,  6.14it/s, cost=0.413] 
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 18.76it/s, cost=34.1]
train minibatch loop:   0%|          | 1/375 [00:00<01:02,  5.95it/s, cost=0.316]

epoch: 13, training loss: 0.357156, valid loss: 34.881569



train minibatch loop: 100%|██████████| 375/375 [01:00<00:00,  6.16it/s, cost=0.331] 
test minibatch loop: 100%|██████████| 120/120 [00:06<00:00, 18.91it/s, cost=36.8]

epoch: 14, training loss: 0.307119, valid loss: 37.149876






In [25]:
def evaluate(heads_pred, types_pred, heads, types, lengths,
             symbolic_root=False, symbolic_end=False):
    batch_size, _ = heads_pred.shape
    ucorr = 0.
    lcorr = 0.
    total = 0.
    ucomplete_match = 0.
    lcomplete_match = 0.

    corr_root = 0.
    total_root = 0.
    start = 1 if symbolic_root else 0
    end = 1 if symbolic_end else 0
    for i in range(batch_size):
        ucm = 1.
        lcm = 1.
        for j in range(start, lengths[i] - end):

            total += 1
            if heads[i, j] == heads_pred[i, j]:
                ucorr += 1
                if types[i, j] == types_pred[i, j]:
                    lcorr += 1
                else:
                    lcm = 0
            else:
                ucm = 0
                lcm = 0

            if heads[i, j] == 0:
                total_root += 1
                corr_root += 1 if heads_pred[i, j] == 0 else 0

        ucomplete_match += ucm
        lcomplete_match += lcm
    
    return ucorr / total, lcorr / total, corr_root / total_root

In [27]:
heads, types, _, _ = decode(batch_x, batch_char)
arc_accuracy, type_accuracy, root_accuracy = evaluate(heads, types, batch_depends, batch_y, 
                                                      np.count_nonzero(batch_x, axis = 1))
arc_accuracy, type_accuracy, root_accuracy

(0.6045627376425855, 0.5209125475285171, 0.90625)

In [36]:
arcs, types, roots = [], [], []

for i in range(0, len(test_X), 5):
    index = min(i + 5, len(test_X))
    batch_x = test_X[i: index]
    batch_x = pad_sequences(batch_x,padding='post')
    batch_char = test_char[i: index]
    batch_char = generate_char_seq(batch_char)
    batch_y = test_Y[i: index]
    batch_y = pad_sequences(batch_y,padding='post')
    batch_depends = test_depends[i: index]
    batch_depends = pad_sequences(batch_depends,padding='post')
    
    heads, tags_seq, _, _ = decode(batch_x, batch_char)
    
    arc_accuracy, type_accuracy, root_accuracy = evaluate(heads, tags_seq, batch_depends, batch_y, 
                                                      np.count_nonzero(batch_x, axis = 1))
    arcs.append(arc_accuracy)
    types.append(type_accuracy)
    roots.append(root_accuracy)

In [37]:
print('arc accuracy:', np.mean(arcs))
print('types accuracy:', np.mean(types))
print('root accuracy:', np.mean(roots))

arc accuracy: 0.6188156085110088
types accuracy: 0.482035002661857
root accuracy: 0.8939869281045753
