In [2]:
import sys, os, _pickle as pickle
import tensorflow as tf
import numpy as np
import nltk
from sklearn.metrics import f1_score

In [3]:
data_dir = '../data'
ckpt_dir = '../checkpoint'
word_embd_dir = '../checkpoint/word_embd'
model_dir = '../checkpoint/modelv5'

In [4]:
word_embd_dim = 100
pos_embd_dim = 25
dep_embd_dim = 25
word_vocab_size = 400001
pos_vocab_size = 10
dep_vocab_size = 21
relation_classes = 19
word_state_size = 100
other_state_size = 50
batch_size = 10
channels = 3
lambda_l2 = 0.00001
max_len_path = 10

In [5]:
with tf.name_scope("input"):
    path_length = tf.placeholder(tf.int32, shape=[2, batch_size], name="path1_length")
    word_ids = tf.placeholder(tf.int32, shape=[2, batch_size, max_len_path], name="word_ids")
    pos_ids = tf.placeholder(tf.int32, [2, batch_size, max_len_path], name="pos_ids")
    dep_ids = tf.placeholder(tf.int32, [2, batch_size, max_len_path], name="dep_ids")
    y = tf.placeholder(tf.int32, [batch_size], name="y")

In [6]:
with tf.name_scope("word_embedding"):
    W = tf.Variable(tf.constant(0.0, shape=[word_vocab_size, word_embd_dim]), name="W")
    embedding_placeholder = tf.placeholder(tf.float32,[word_vocab_size, word_embd_dim])
    embedding_init = W.assign(embedding_placeholder)
    embedded_word = tf.nn.embedding_lookup(W, word_ids)
    word_embedding_saver = tf.train.Saver({"word_embedding/W": W})

with tf.name_scope("pos_embedding"):
    W = tf.Variable(tf.random_uniform([pos_vocab_size, pos_embd_dim]), name="W")
    embedded_pos = tf.nn.embedding_lookup(W, pos_ids)
    pos_embedding_saver = tf.train.Saver({"pos_embedding/W": W})

with tf.name_scope("dep_embedding"):
    W = tf.Variable(tf.random_uniform([dep_vocab_size, dep_embd_dim]), name="W")
    embedded_dep = tf.nn.embedding_lookup(W, dep_ids)
    dep_embedding_saver = tf.train.Saver({"dep_embedding/W": W})

In [7]:
with tf.name_scope("word_dropout"):
    embedded_word_drop = tf.nn.dropout(embedded_word, 0.3)

In [8]:
word_hidden_state = tf.zeros([batch_size, word_state_size], name='word_hidden_state')
word_cell_state = tf.zeros([batch_size, word_state_size], name='word_cell_state')
word_init_state = tf.contrib.rnn.LSTMStateTuple(word_hidden_state, word_cell_state)

other_hidden_states = tf.zeros([channels-1, batch_size, other_state_size], name="hidden_state")
other_cell_states = tf.zeros([channels-1, batch_size, other_state_size], name="cell_state")

other_init_states = [tf.contrib.rnn.LSTMStateTuple(other_hidden_states[i], other_cell_states[i]) for i in range(channels-1)]

with tf.variable_scope("word_lstm1"):
    cell = tf.contrib.rnn.BasicLSTMCell(word_state_size)
    state_series, current_state = tf.nn.dynamic_rnn(cell, embedded_word_drop[0], sequence_length=path_length[0], initial_state=word_init_state)
    state_series_word1 = tf.reduce_max(state_series, axis=1)

with tf.variable_scope("word_lstm2"):
    cell = tf.contrib.rnn.BasicLSTMCell(word_state_size)
    state_series, current_state = tf.nn.dynamic_rnn(cell, embedded_word_drop[1], sequence_length=path_length[1], initial_state=word_init_state)
    state_series_word2 = tf.reduce_max(state_series, axis=1)

with tf.variable_scope("pos_lstm1"):
    cell = tf.contrib.rnn.BasicLSTMCell(other_state_size)
    state_series, current_state = tf.nn.dynamic_rnn(cell, embedded_pos[0], sequence_length=path_length[0],initial_state=other_init_states[0])
    state_series_pos1 = tf.reduce_max(state_series, axis=1)

with tf.variable_scope("pos_lstm2"):
    cell = tf.contrib.rnn.BasicLSTMCell(other_state_size)
    state_series, current_state = tf.nn.dynamic_rnn(cell, embedded_pos[1], sequence_length=path_length[1],initial_state=other_init_states[0])
    state_series_pos2 = tf.reduce_max(state_series, axis=1)

with tf.variable_scope("dep_lstm1"):
    cell = tf.contrib.rnn.BasicLSTMCell(other_state_size)
    state_series, current_state = tf.nn.dynamic_rnn(cell, embedded_dep[0], sequence_length=path_length[0], initial_state=other_init_states[1])
    state_series_dep1 = tf.reduce_max(state_series, axis=1)

with tf.variable_scope("dep_lstm2"):
    cell = tf.contrib.rnn.BasicLSTMCell(other_state_size)
    state_series, current_state = tf.nn.dynamic_rnn(cell, embedded_dep[1], sequence_length=path_length[1], initial_state=other_init_states[1])
    state_series_dep2 = tf.reduce_max(state_series, axis=1)

state_series1 = tf.concat([state_series_word1, state_series_pos1, state_series_dep1], 1)
state_series2 = tf.concat([state_series_word2, state_series_pos2, state_series_dep2], 1)

state_series = tf.concat([state_series1, state_series2], 1)

In [12]:
with tf.name_scope("hidden_layer"):
    W = tf.Variable(tf.truncated_normal([400, 100], -0.1, 0.1), name="W")
    b = tf.Variable(tf.zeros([100]), name="b")
    y_hidden_layer = tf.matmul(state_series, W) + b

with tf.name_scope("dropout"):
    y_hidden_layer_drop = tf.nn.dropout(y_hidden_layer, 0.3)

with tf.name_scope("softmax_layer"):
    W = tf.Variable(tf.truncated_normal([100, relation_classes], -0.1, 0.1), name="W")
    b = tf.Variable(tf.zeros([relation_classes]), name="b")
    logits = tf.matmul(y_hidden_layer_drop, W) + b
    predictions = tf.argmax(logits, 1)

tv_all = tf.trainable_variables()
tv_regu = []
non_reg = ["word_embedding/W:0","pos_embedding/W:0",'dep_embedding/W:0',"global_step:0",'hidden_layer/b:0','softmax_layer/b:0']
for t in tv_all:
    if t.name not in non_reg:
        if(t.name.find('biases')==-1):
            tv_regu.append(t)

with tf.name_scope("loss"):
    l2_loss = lambda_l2 * tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv_regu ])
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y))
    total_loss = loss + l2_loss

global_step = tf.Variable(0, name="global_step")

optimizer = tf.train.AdamOptimizer(0.001).minimize(total_loss, global_step=global_step)

In [16]:
f = open(data_dir + '/vocab.pkl', 'rb')
vocab = pickle.load(f)
f.close()

word2id = dict((w, i) for i,w in enumerate(vocab))
id2word = dict((i, w) for i,w in enumerate(vocab))

unknown_token = "UNKNOWN_TOKEN"
word2id[unknown_token] = word_vocab_size -1
id2word[word_vocab_size-1] = unknown_token

pos_tags_vocab = []
for line in open(data_dir + '/pos_tags.txt'):
        pos_tags_vocab.append(line.strip())

dep_vocab = []
for line in open(data_dir + '/dependency_types.txt'):
    dep_vocab.append(line.strip())

relation_vocab = []
for line in open(data_dir + '/relation_types.txt'):
    relation_vocab.append(line.strip())


rel2id = dict((w, i) for i,w in enumerate(relation_vocab))
id2rel = dict((i, w) for i,w in enumerate(relation_vocab))

pos_tag2id = dict((w, i) for i,w in enumerate(pos_tags_vocab))
id2pos_tag = dict((i, w) for i,w in enumerate(pos_tags_vocab))

dep2id = dict((w, i) for i,w in enumerate(dep_vocab))
id2dep = dict((i, w) for i,w in enumerate(dep_vocab))

pos_tag2id['OTH'] = 9
id2pos_tag[9] = 'OTH'

dep2id['OTH'] = 20
id2dep[20] = 'OTH'

JJ_pos_tags = ['JJ', 'JJR', 'JJS']
NN_pos_tags = ['NN', 'NNS', 'NNP', 'NNPS']
RB_pos_tags = ['RB', 'RBR', 'RBS']
PRP_pos_tags = ['PRP', 'PRP$']
VB_pos_tags = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
_pos_tags = ['CC', 'CD', 'DT', 'IN']

def pos_tag(x):
    if x in JJ_pos_tags:
        return pos_tag2id['JJ']
    if x in NN_pos_tags:
        return pos_tag2id['NN']
    if x in RB_pos_tags:
        return pos_tag2id['RB']
    if x in PRP_pos_tags:
        return pos_tag2id['PRP']
    if x in VB_pos_tags:
        return pos_tag2id['VB']
    if x in _pos_tags:
        return pos_tag2id[x]
    else:
        return 9

In [21]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()

In [19]:
# model = tf.train.latest_checkpoint(model_dir)
# saver.restore(sess, model)

In [17]:
# f = open('data/word_embedding', 'rb')
# word_embedding = pickle.load(f)
# f.close()
# sess.run(embedding_init, feed_dict={embedding_placeholder:word_embedding})
# word_embedding_saver.save(sess, word_embd_dir + '/word_embd')

In [22]:
latest_embd = tf.train.latest_checkpoint(word_embd_dir)
word_embedding_saver.restore(sess, latest_embd)

In [32]:
f = open(data_dir + '/train_paths', 'rb')
word_p1, word_p2, dep_p1, dep_p2, pos_p1, pos_p2 = pickle.load(f)
f.close()

relations = []
for line in open(data_dir + '/train_relations.txt'):
    relations.append(line.strip().split()[1])

length = len(word_p1)
num_batches = int(length/batch_size)

for i in range(length):
    for j, word in enumerate(word_p1[i]):
        word = word.lower()
        word_p1[i][j] = word if word in word2id else unknown_token 
    for k, word in enumerate(word_p2[i]):
        word = word.lower()
        word_p2[i][k] = word if word in word2id else unknown_token 
    for l, d in enumerate(dep_p1[i]):
        dep_p1[i][l] = d if d in dep2id else 'OTH'
    for m, d in enumerate(dep_p2[i]):
        dep_p2[i][m] = d if d in dep2id else 'OTH'

word_p1_ids = np.ones([length, max_len_path],dtype=int)
word_p2_ids = np.ones([length, max_len_path],dtype=int)
pos_p1_ids = np.ones([length, max_len_path],dtype=int)
pos_p2_ids = np.ones([length, max_len_path],dtype=int)
dep_p1_ids = np.ones([length, max_len_path],dtype=int)
dep_p2_ids = np.ones([length, max_len_path],dtype=int)
rel_ids = np.array([rel2id[rel] for rel in relations])
path1_len = np.array([len(w) for w in word_p1], dtype=int)
path2_len = np.array([len(w) for w in word_p2])

for i in range(length):
    for j, w in enumerate(word_p1[i]):
        word_p1_ids[i][j] = word2id[w]
    for j, w in enumerate(word_p2[i]):
        word_p2_ids[i][j] = word2id[w]
    for j, w in enumerate(pos_p1[i]):
        pos_p1_ids[i][j] = pos_tag(w)
    for j, w in enumerate(pos_p2[i]):
        pos_p2_ids[i][j] = pos_tag(w)
    for j, w in enumerate(dep_p1[i]):
        dep_p1_ids[i][j] = dep2id[w]
    for j, w in enumerate(dep_p2[i]):
        dep_p2_ids[i][j] = dep2id[w]

In [33]:
num_epochs = 20
for i in range(num_epochs):
    for j in range(num_batches):
        path_dict = [path1_len[j*batch_size:(j+1)*batch_size], path2_len[j*batch_size:(j+1)*batch_size]]
        word_dict = [word_p1_ids[j*batch_size:(j+1)*batch_size], word_p2_ids[j*batch_size:(j+1)*batch_size]]
        pos_dict = [pos_p1_ids[j*batch_size:(j+1)*batch_size], pos_p2_ids[j*batch_size:(j+1)*batch_size]]
        dep_dict = [dep_p1_ids[j*batch_size:(j+1)*batch_size], dep_p2_ids[j*batch_size:(j+1)*batch_size]]
        y_dict = rel_ids[j*batch_size:(j+1)*batch_size]
        
        feed_dict = {
            path_length:path_dict,
            word_ids:word_dict,
            pos_ids:pos_dict,
            dep_ids:dep_dict,
            y:y_dict}
        _, loss, step = sess.run([optimizer, total_loss, global_step], feed_dict)
        if step%10==0:
            print("Step:", step, "loss:",loss)
        if step % 1000 == 0:
            saver.save(sess, model_dir + '/model')
            print("Saved Model")

Step: 32010 loss: 0.279179
Step: 32020 loss: 0.450493
Step: 32030 loss: 0.67678
Step: 32040 loss: 0.530038
Step: 32050 loss: 0.18954
Step: 32060 loss: 0.345747
Step: 32070 loss: 0.399547
Step: 32080 loss: 0.307604
Step: 32090 loss: 0.211703
Step: 32100 loss: 0.0819739
Step: 32110 loss: 1.1188
Step: 32120 loss: 0.82525
Step: 32130 loss: 0.202954
Step: 32140 loss: 0.0546259
Step: 32150 loss: 0.1748
Step: 32160 loss: 0.652707
Step: 32170 loss: 0.0409834
Step: 32180 loss: 1.07589
Step: 32190 loss: 0.835989
Step: 32200 loss: 0.306376
Step: 32210 loss: 0.318982
Step: 32220 loss: 0.604181
Step: 32230 loss: 0.468826
Step: 32240 loss: 0.15929
Step: 32250 loss: 1.04704
Step: 32260 loss: 0.175985
Step: 32270 loss: 0.191001
Step: 32280 loss: 0.0813737
Step: 32290 loss: 0.124923
Step: 32300 loss: 0.0491613
Step: 32310 loss: 0.139159
Step: 32320 loss: 0.0837461
Step: 32330 loss: 0.264151
Step: 32340 loss: 0.361466
Step: 32350 loss: 0.361493
Step: 32360 loss: 0.524457
Step: 32370 loss: 0.0660119
Step

Step: 35040 loss: 0.479934
Step: 35050 loss: 0.0341393
Step: 35060 loss: 0.215971
Step: 35070 loss: 0.646517
Step: 35080 loss: 0.262261
Step: 35090 loss: 0.0584262
Step: 35100 loss: 0.0881525
Step: 35110 loss: 0.297986
Step: 35120 loss: 0.207136
Step: 35130 loss: 1.25428
Step: 35140 loss: 1.44878
Step: 35150 loss: 0.375991
Step: 35160 loss: 0.0596189
Step: 35170 loss: 0.712018
Step: 35180 loss: 0.750539
Step: 35190 loss: 0.18919
Step: 35200 loss: 0.0566818
Step: 35210 loss: 0.0729581
Step: 35220 loss: 0.222028
Step: 35230 loss: 0.498812
Step: 35240 loss: 0.496733
Step: 35250 loss: 0.0773993
Step: 35260 loss: 0.0732535
Step: 35270 loss: 0.0512249
Step: 35280 loss: 0.412121
Step: 35290 loss: 0.247884
Step: 35300 loss: 0.0792624
Step: 35310 loss: 0.299243
Step: 35320 loss: 1.06095
Step: 35330 loss: 0.139944
Step: 35340 loss: 0.126545
Step: 35350 loss: 0.062601
Step: 35360 loss: 0.452786
Step: 35370 loss: 0.0500685
Step: 35380 loss: 0.888264
Step: 35390 loss: 0.173906
Step: 35400 loss: 0.0

Step: 38060 loss: 0.286757
Step: 38070 loss: 0.166176
Step: 38080 loss: 0.295198
Step: 38090 loss: 0.145652
Step: 38100 loss: 0.526205
Step: 38110 loss: 0.179935
Step: 38120 loss: 0.255303
Step: 38130 loss: 0.206683
Step: 38140 loss: 0.316885
Step: 38150 loss: 0.0736747
Step: 38160 loss: 0.450026
Step: 38170 loss: 0.657418
Step: 38180 loss: 0.433982
Step: 38190 loss: 0.639218
Step: 38200 loss: 0.110932
Step: 38210 loss: 0.288301
Step: 38220 loss: 0.119983
Step: 38230 loss: 0.356939
Step: 38240 loss: 0.141395
Step: 38250 loss: 0.0782554
Step: 38260 loss: 0.0788708
Step: 38270 loss: 0.0932686
Step: 38280 loss: 0.244754
Step: 38290 loss: 0.100126
Step: 38300 loss: 0.0432423
Step: 38310 loss: 0.424576
Step: 38320 loss: 0.07325
Step: 38330 loss: 0.349547
Step: 38340 loss: 0.726019
Step: 38350 loss: 0.171024
Step: 38360 loss: 0.253216
Step: 38370 loss: 0.800708
Step: 38380 loss: 0.671277
Step: 38390 loss: 0.296961
Step: 38400 loss: 0.4337
Step: 38410 loss: 0.0595922
Step: 38420 loss: 0.49495

Step: 41070 loss: 0.173165
Step: 41080 loss: 0.584465
Step: 41090 loss: 0.280949
Step: 41100 loss: 0.0586651
Step: 41110 loss: 0.0715683
Step: 41120 loss: 0.065528
Step: 41130 loss: 0.902982
Step: 41140 loss: 0.259082
Step: 41150 loss: 0.642926
Step: 41160 loss: 0.157603
Step: 41170 loss: 0.215129
Step: 41180 loss: 0.429703
Step: 41190 loss: 0.0657954
Step: 41200 loss: 0.390801
Step: 41210 loss: 0.153557
Step: 41220 loss: 0.23228
Step: 41230 loss: 0.0738138
Step: 41240 loss: 0.071583
Step: 41250 loss: 0.0857569
Step: 41260 loss: 0.129131
Step: 41270 loss: 0.27342
Step: 41280 loss: 0.232289
Step: 41290 loss: 0.0739835
Step: 41300 loss: 0.173283
Step: 41310 loss: 0.347164
Step: 41320 loss: 0.0608985
Step: 41330 loss: 0.258351
Step: 41340 loss: 0.264683
Step: 41350 loss: 0.554065
Step: 41360 loss: 0.124283
Step: 41370 loss: 0.480248
Step: 41380 loss: 0.130078
Step: 41390 loss: 0.479999
Step: 41400 loss: 0.12899
Step: 41410 loss: 0.0416332
Step: 41420 loss: 0.137996
Step: 41430 loss: 0.092

Step: 44080 loss: 0.128725
Step: 44090 loss: 0.300282
Step: 44100 loss: 0.0374196
Step: 44110 loss: 0.148153
Step: 44120 loss: 0.910608
Step: 44130 loss: 0.0519017
Step: 44140 loss: 0.0320734
Step: 44150 loss: 0.0778119
Step: 44160 loss: 0.884712
Step: 44170 loss: 0.0930497
Step: 44180 loss: 0.98539
Step: 44190 loss: 0.0657582
Step: 44200 loss: 0.0381521
Step: 44210 loss: 0.204958
Step: 44220 loss: 0.126904
Step: 44230 loss: 0.361081
Step: 44240 loss: 0.126165
Step: 44250 loss: 0.204496
Step: 44260 loss: 0.197386
Step: 44270 loss: 0.0427434
Step: 44280 loss: 0.102043
Step: 44290 loss: 0.79481
Step: 44300 loss: 0.0462335
Step: 44310 loss: 0.0454056
Step: 44320 loss: 0.160095
Step: 44330 loss: 0.208157
Step: 44340 loss: 0.126203
Step: 44350 loss: 0.436138
Step: 44360 loss: 0.323827
Step: 44370 loss: 0.379715
Step: 44380 loss: 0.210826
Step: 44390 loss: 0.200252
Step: 44400 loss: 0.672188
Step: 44410 loss: 0.0782018
Step: 44420 loss: 0.34085
Step: 44430 loss: 0.0445239
Step: 44440 loss: 0

Step: 47090 loss: 0.48607
Step: 47100 loss: 0.0445481
Step: 47110 loss: 0.107045
Step: 47120 loss: 0.157856
Step: 47130 loss: 0.689522
Step: 47140 loss: 0.356236
Step: 47150 loss: 0.0575687
Step: 47160 loss: 0.880571
Step: 47170 loss: 0.225042
Step: 47180 loss: 1.22775
Step: 47190 loss: 0.126019
Step: 47200 loss: 0.45015
Step: 47210 loss: 0.175434
Step: 47220 loss: 0.341063
Step: 47230 loss: 0.623115
Step: 47240 loss: 0.909092
Step: 47250 loss: 0.066878
Step: 47260 loss: 0.11269
Step: 47270 loss: 0.162067
Step: 47280 loss: 0.0739465
Step: 47290 loss: 0.214058
Step: 47300 loss: 0.165262
Step: 47310 loss: 0.0769133
Step: 47320 loss: 0.740466
Step: 47330 loss: 0.0842959
Step: 47340 loss: 0.0396902
Step: 47350 loss: 0.14395
Step: 47360 loss: 0.0829715
Step: 47370 loss: 0.0411171
Step: 47380 loss: 0.691197
Step: 47390 loss: 0.451377
Step: 47400 loss: 0.19287
Step: 47410 loss: 0.0343534
Step: 47420 loss: 0.19025
Step: 47430 loss: 0.452724
Step: 47440 loss: 0.0943098
Step: 47450 loss: 0.38645

In [34]:
# training accuracy
all_predictions = []
for j in range(num_batches):
    path_dict = [path1_len[j*batch_size:(j+1)*batch_size], path2_len[j*batch_size:(j+1)*batch_size]]
    word_dict = [word_p1_ids[j*batch_size:(j+1)*batch_size], word_p2_ids[j*batch_size:(j+1)*batch_size]]
    pos_dict = [pos_p1_ids[j*batch_size:(j+1)*batch_size], pos_p2_ids[j*batch_size:(j+1)*batch_size]]
    dep_dict = [dep_p1_ids[j*batch_size:(j+1)*batch_size], dep_p2_ids[j*batch_size:(j+1)*batch_size]]
    y_dict = rel_ids[j*batch_size:(j+1)*batch_size]

    feed_dict = {
        path_length:path_dict,
        word_ids:word_dict,
        pos_ids:pos_dict,
        dep_ids:dep_dict,
        y:y_dict}
    batch_predictions = sess.run(predictions, feed_dict)
    all_predictions.append(batch_predictions)

y_pred = []
for i in range(num_batches):
    for pred in all_predictions[i]:
        y_pred.append(pred)

count = 0
for i in range(batch_size*num_batches):
    count += y_pred[i]==rel_ids[i]
accuracy = count/(batch_size*num_batches) * 100

print("training accuracy", accuracy)

training accuracy 92.2125


In [35]:
f = open(data_dir + '/test_paths', 'rb')
word_p1, word_p2, dep_p1, dep_p2, pos_p1, pos_p2 = pickle.load(f)
f.close()

relations = []
for line in open(data_dir + '/test_relations.txt'):
    relations.append(line.strip().split()[0])

length = len(word_p1)
num_batches = int(length/batch_size)

for i in range(length):
    for j, word in enumerate(word_p1[i]):
        word = word.lower()
        word_p1[i][j] = word if word in word2id else unknown_token 
    for k, word in enumerate(word_p2[i]):
        word = word.lower()
        word_p2[i][k] = word if word in word2id else unknown_token 
    for l, d in enumerate(dep_p1[i]):
        dep_p1[i][l] = d if d in dep2id else 'OTH'
    for m, d in enumerate(dep_p2[i]):
        dep_p2[i][m] = d if d in dep2id else 'OTH'

word_p1_ids = np.ones([length, max_len_path],dtype=int)
word_p2_ids = np.ones([length, max_len_path],dtype=int)
pos_p1_ids = np.ones([length, max_len_path],dtype=int)
pos_p2_ids = np.ones([length, max_len_path],dtype=int)
dep_p1_ids = np.ones([length, max_len_path],dtype=int)
dep_p2_ids = np.ones([length, max_len_path],dtype=int)
rel_ids = np.array([rel2id[rel] for rel in relations])
path1_len = np.array([len(w) for w in word_p1], dtype=int)
path2_len = np.array([len(w) for w in word_p2])

for i in range(length):
    for j, w in enumerate(word_p1[i]):
        word_p1_ids[i][j] = word2id[w]
    for j, w in enumerate(word_p2[i]):
        word_p2_ids[i][j] = word2id[w]
    for j, w in enumerate(pos_p1[i]):
        pos_p1_ids[i][j] = pos_tag(w)
    for j, w in enumerate(pos_p2[i]):
        pos_p2_ids[i][j] = pos_tag(w)
    for j, w in enumerate(dep_p1[i]):
        dep_p1_ids[i][j] = dep2id[w]
    for j, w in enumerate(dep_p2[i]):
        dep_p2_ids[i][j] = dep2id[w]

In [36]:
# test 
all_predictions = []
for j in range(num_batches):
    path_dict = [path1_len[j*batch_size:(j+1)*batch_size], path2_len[j*batch_size:(j+1)*batch_size]]
    word_dict = [word_p1_ids[j*batch_size:(j+1)*batch_size], word_p2_ids[j*batch_size:(j+1)*batch_size]]
    pos_dict = [pos_p1_ids[j*batch_size:(j+1)*batch_size], pos_p2_ids[j*batch_size:(j+1)*batch_size]]
    dep_dict = [dep_p1_ids[j*batch_size:(j+1)*batch_size], dep_p2_ids[j*batch_size:(j+1)*batch_size]]
    y_dict = rel_ids[j*batch_size:(j+1)*batch_size]

    feed_dict = {
        path_length:path_dict,
        word_ids:word_dict,
        pos_ids:pos_dict,
        dep_ids:dep_dict,
        y:y_dict}
    batch_predictions = sess.run(predictions, feed_dict)
    all_predictions.append(batch_predictions)

y_pred = []
for i in range(num_batches):
    for pred in all_predictions[i]:
        y_pred.append(pred)

count = 0
for i in range(batch_size*num_batches):
    count += y_pred[i]==rel_ids[i]
accuracy = count/(batch_size*num_batches) * 100

print("test accuracy", accuracy)

test accuracy 62.2878228782
