### TransNet: Translation-Based Network Representation Learning for Social Relation Extraction

This is a transNet implementation example using TensorFlow library.

In [1]:
import numpy as np
import random
import tensorflow as tf
from input_data import read_data_sets

aminer = read_data_sets()
entity_total = aminer.entity_total
tag_total = aminer.tag_total

In [2]:
# Parameters
learning_rate = 0.001
warm_up_epochs = 40
epochs = 200
batch_size = 200
eval_batch_size = 2000
display_step = 5

gamma = 1
alpha = 0.5
l2_lambda = 0.001
beta = 50.0
keep_prob = 0.5
rep_size = 64

hits_k = [1,2,3,4,5,6,7,8,9,10]

In [3]:
#input
pos_h = tf.placeholder(tf.int32, [None])
pos_t = tf.placeholder(tf.int32, [None])
pos_r = tf.placeholder(tf.float32, [None, tag_total])
pos_br = tf.placeholder(tf.float32, [None, tag_total])

neg_h = tf.placeholder(tf.int32, [None])
neg_t = tf.placeholder(tf.int32, [None])
neg_r = tf.placeholder(tf.float32, [None, tag_total])
neg_br = tf.placeholder(tf.float32, [None, tag_total])

In [4]:
#embedding
node_lookup = {
    'int_embeddings': tf.Variable(tf.random_normal([entity_total, rep_size])),
    'adv_embeddings': tf.Variable(tf.random_normal([entity_total, rep_size])),
}

def lookup(pos_head, pos_tail, neg_head, neg_tail, lookup):
    pos_head_e = tf.nn.l2_normalize(
        tf.nn.embedding_lookup(lookup['int_embeddings'], pos_head), 1)
    pos_tail_e = tf.nn.l2_normalize(
        tf.nn.embedding_lookup(lookup['adv_embeddings'], pos_tail), 1)
    neg_head_e = tf.nn.l2_normalize(
        tf.nn.embedding_lookup(lookup['int_embeddings'], neg_head), 1)
    neg_tail_e = tf.nn.l2_normalize(
        tf.nn.embedding_lookup(lookup['adv_embeddings'], neg_tail), 1)
    return pos_head_e, pos_tail_e, neg_head_e, neg_tail_e

pos_h_e, pos_t_e, neg_h_e, neg_t_e = lookup(pos_h, pos_t,
                                           neg_h, neg_t, node_lookup)

In [5]:
#autoencoder
relation_weights = {
    'encoder_w': tf.Variable(tf.random_normal([tag_total, rep_size])),
    'decoder_w': tf.Variable(tf.random_normal([rep_size, tag_total])),
}
relation_biases = {
    'encoder_b': tf.Variable(tf.random_normal([rep_size])),
    'decoder_b': tf.Variable(tf.random_normal([tag_total])),
}

def autoencoder(W,B,x):
    rep = tf.nn.dropout(
        tf.nn.tanh(tf.matmul(x, W['encoder_w'])+B['encoder_b']), keep_prob)
    decode_x = tf.nn.sigmoid(
        tf.matmul(rep, W['decoder_w'])+B['decoder_b'])
    return rep, decode_x

pos_r_rep, pos_r_dec = autoencoder(relation_weights, relation_biases, pos_r)
neg_r_rep, neg_r_dec = autoencoder(relation_weights, relation_biases, neg_r)

In [6]:
# loss
relation_ae_l2_loss = tf.nn.l2_loss(relation_weights['encoder_w'])+\
                        tf.nn.l2_loss(relation_weights['decoder_w'])+\
                        tf.nn.l2_loss(relation_biases['encoder_b'])+\
                        tf.nn.l2_loss(relation_biases['decoder_b'])
relation_loss = tf.reduce_sum(tf.abs(tf.multiply(pos_r_dec-pos_r, pos_br)))+\
                tf.reduce_sum(tf.abs(tf.multiply(neg_r_dec-neg_r, neg_br)))
relation_pos_r_loss = tf.reduce_sum(tf.abs(tf.multiply(pos_r_dec-pos_r, pos_br))) +\
                        l2_lambda*relation_ae_l2_loss

pos = tf.reduce_sum(tf.abs(pos_h_e + pos_r_rep - pos_t_e), 1, keep_dims = True)
neg = tf.reduce_sum(tf.abs(neg_h_e + neg_r_rep - neg_t_e), 1, keep_dims = True)
trans_loss = tf.reduce_sum(tf.maximum(pos - neg + gamma, 0))
loss = trans_loss+alpha*relation_loss+l2_lambda*relation_ae_l2_loss

relation_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(relation_pos_r_loss)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
# Initializing the variables
init = tf.global_variables_initializer()

In [7]:
# evaluate
relation_sum = tf.reduce_sum(pos_r)
pos_r_minus = pos_t_e - pos_h_e
pos_r_minus_dec = tf.nn.sigmoid(
    tf.matmul(pos_r_minus, relation_weights['decoder_w'])+relation_biases['decoder_b'])
hits = []
for k in hits_k:
    topk_indices = tf.nn.top_k(pos_r_minus_dec, k=k).indices
    pred = tf.reduce_sum(tf.one_hot(topk_indices, tag_total), 1)
    hits.append(tf.reduce_sum(tf.multiply(pred, pos_r)))
    

In [8]:
# Launch the graph
sess = tf.Session()
sess.run(init)
total_batch = int(aminer.train.num_examples / batch_size)
test_total_batch = int(aminer.test.num_examples / batch_size)

# initialize relation
print "Starting warm-up relation"
for epoch in range(warm_up_epochs):
    # loop over all batches
    sum_loss = 0.0
    for i in range(total_batch):
        vecs, bs = aminer.train.next_autoencoder_batch(batch_size, beta)
        _, cur_loss = sess.run([relation_optimizer, relation_pos_r_loss],
                              feed_dict={pos_r: vecs, pos_br: bs})
        sum_loss += cur_loss
    print 'Warm-up relation epoch: ', epoch, 'sum of loss', sum_loss

for epoch in range(epochs):
    sum_loss = 0.0
    for i in range(total_batch):
        pos_h_batch, pos_t_batch, pos_r_batch, pos_b_batch,\
        neg_h_batch, neg_t_batch, neg_r_batch, neg_b_batch = aminer.train.next_batch(batch_size, beta)
        _, cur_loss = sess.run([optimizer, loss],
                               feed_dict={pos_h: pos_h_batch, pos_t: pos_t_batch,
                                         pos_r: pos_r_batch, pos_br: pos_b_batch,
                                         neg_h: neg_h_batch, neg_t: neg_t_batch,
                                         neg_r: neg_r_batch, neg_br: neg_b_batch})
        sum_loss += cur_loss
    print 'Train TransNet epoch: ', epoch, 'sum of loss', sum_loss
    if epoch % display_step == 0:
        print 'Evaluating...'
        hits_ = [0]*len(hits_k)
        p_ = [0]*len(hits_k)
        p_indice = [float(i+1) for i in range(len(hits))]
        all_count = 0.0
        for i in range(test_total_batch):
            pos_h_batch, pos_t_batch, pos_r_batch = aminer.test.next_test_batch(eval_batch_size)
            cur_hits, cur_sum = sess.run([hits, relation_sum],
                                        feed_dict={pos_h: pos_h_batch,
                                                  pos_t: pos_t_batch,
                                                  pos_r: pos_r_batch})
            hits_ = list(map(lambda x: x[0]+x[1], zip(hits_, cur_hits)))
            p_value = [len(pos_r_batch)*indice for indice in p_indice]
            p_ = list(map(lambda x: x[0]+x[1], zip(p_, p_value)))
            all_count +=cur_sum
        r = [hit/all_count for hit in hits_]
        p_new = [hits_[i]/p_[i] for i in range(len(hits_))]
        print 'Recall', r
        print 'Precision', p_new


Starting warm-up relation
Warm-up relation epoch:  0 sum of loss 37099769.3789
Warm-up relation epoch:  1 sum of loss 14118158.5811
Warm-up relation epoch:  2 sum of loss 8842641.18115
Warm-up relation epoch:  3 sum of loss 6534123.71191
Warm-up relation epoch:  4 sum of loss 5151475.13013
Warm-up relation epoch:  5 sum of loss 4277016.4248
Warm-up relation epoch:  6 sum of loss 3668801.74219
Warm-up relation epoch:  7 sum of loss 3286632.22778
Warm-up relation epoch:  8 sum of loss 2984517.66162
Warm-up relation epoch:  9 sum of loss 2792923.52893
Warm-up relation epoch:  10 sum of loss 2634050.89917
Warm-up relation epoch:  11 sum of loss 2515474.53491
Warm-up relation epoch:  12 sum of loss 2434859.34949
Warm-up relation epoch:  13 sum of loss 2323524.02661
Warm-up relation epoch:  14 sum of loss 2236894.51965
Warm-up relation epoch:  15 sum of loss 2147938.34827
Warm-up relation epoch:  16 sum of loss 2084366.06323
Warm-up relation epoch:  17 sum of loss 2045124.78101
Warm-up relat

In [12]:
# test warm up
# relation_hits = []
# for k in hits_k:
#     relation_topk = tf.nn.top_k(pos_r_dec, k=k).indices
#     relation_pred = tf.reduce_sum(tf.one_hot(relation_topk, tag_total), 1)
#     relation_hits.append(tf.reduce_sum(tf.multiply(relation_pred, pos_r)))
# vvvvv, bbbb = aminer.train.next_autoencoder_batch(10000, 50.0)
# cur_hits = sess.run(relation_hits, feed_dict={pos_r: vvvvv, pos_br: bbbb})
# print cur_hits

[9678.0, 16767.0, 22359.0, 26240.0, 29231.0, 31645.0, 33138.0, 34099.0, 34667.0, 35039.0]
