In [1]:
from tqdm import tqdm
import tensorflow as tf
import numpy as np
import pprint

import os, sys
sys.path.append(os.path.dirname(os.getcwd()))

from data import WN18
from model.metrics import evaluate_rank

In [2]:
class Config:
    seed = 21
    n_epochs = 10
    batch_size = 128
    embed_dim = 150

In [3]:
"""
e: entity
s: subject
p: predicate
o: object
"""

def read_triples(path):
    triples = []
    with open(path, 'rt') as f:
        for line in f.readlines():
            s, p, o = line.split()
            triples += [(s.strip(), p.strip(), o.strip())]
    return triples


def load_triple():
    WN18.download()
    triples_tr = read_triples('../data/WN18/wn18/train.txt')
    triples_va = read_triples('../data/WN18/wn18/valid.txt')
    triples_te = read_triples('../data/WN18/wn18/test.txt')
    
    triples_all = triples_tr + triples_va + triples_te
    
    return triples_all, triples_tr, triples_va, triples_te


def build_vocab(triples):
    params = {}
    
    e_set = {s for (s, p, o) in triples} | {o for (s, p, o) in triples}
    p_set = {p for (s, p, o) in triples}

    params['e_vocab_size'] = len(e_set)
    params['p_vocab_size'] = len(p_set)

    e2idx = {e: idx for idx, e in enumerate(sorted(e_set))}
    p2idx = {p: idx for idx, p in enumerate(sorted(p_set))}
    
    return e2idx, p2idx, params


def build_train_data(triples_tr, e2idx, p2idx):
    x_s = np.array([e2idx[s] for (s, p, o) in triples_tr], dtype=np.int32)
    x_p = np.array([p2idx[p] for (s, p, o) in triples_tr], dtype=np.int32)
    x_o = np.array([e2idx[o] for (s, p, o) in triples_tr], dtype=np.int32)

    x = {'s': x_s,
         'p': x_p,
         'o': x_o}
    y = np.ones([len(x_s)], dtype=np.float32)
    
    return x, y


def train_input_fn(triples_tr, e2idx, p2idx, random_state, params):
    x, y = build_train_data(triples_tr, e2idx, p2idx)
    s, p, o = x['s'], x['p'], x['o']
    
    s_ = random_state.choice(params['e_vocab_size'], s.shape)
    o_ = random_state.choice(params['e_vocab_size'], o.shape)
    
    x_ = {
        's': np.concatenate([s, s_, s]),
        'p': np.concatenate([p, p, p]),
        'o': np.concatenate([o, o, o_])}
    y_ = np.concatenate([y, np.zeros([2*len(y)], dtype=np.float32)])
    
    return tf.estimator.inputs.numpy_input_fn(x = x_,
                                              y = y_,
                                              batch_size = Config.batch_size,
                                              num_epochs = 1,
                                              shuffle = True)

In [4]:
def forward(features, params):
    e_embed = tf.get_variable('e_embed',
                              [params['e_vocab_size'], Config.embed_dim],
                              initializer=tf.contrib.layers.xavier_initializer())
    p_embed = tf.get_variable('p_embed',
                              [params['p_vocab_size'], Config.embed_dim],
                              initializer=tf.contrib.layers.xavier_initializer())
    
    s = tf.nn.embedding_lookup(e_embed, features['s'])
    p = tf.nn.embedding_lookup(p_embed, features['p'])
    o = tf.nn.embedding_lookup(e_embed, features['o'])
    
    logits = tf.reduce_sum(s * p * o, [1])
    
    return logits
    
    
def model_fn(features, labels, mode, params):
    logits = forward(features, params)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        tf.logging.info('\n'+pprint.pformat(tf.trainable_variables()))
        
        loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                                         labels=labels))
        
        train_op = tf.train.AdamOptimizer().minimize(
            loss_op, global_step=tf.train.get_global_step())
        
        return tf.estimator.EstimatorSpec(mode = mode,
                                          loss = loss_op,
                                          train_op = train_op)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions = tf.sigmoid(logits))

In [5]:
random_state = np.random.RandomState(Config.seed)
triples_all, triples_tr, triples_va, triples_te = load_triple()
e2idx, p2idx, params = build_vocab(triples_all)

model = tf.estimator.Estimator(model_fn,
                               params = params)

for n_epoch in range(Config.n_epochs):
    model.train(train_input_fn(triples_tr,
                               e2idx,
                               p2idx,
                               random_state,
                               params))
    evaluate_rank(model,
                  triples_va,
                  triples_te,
                  triples_all,
                  e2idx,
                  p2idx,
                  params['e_vocab_size'],
                  batch_size = Config.batch_size*10)

Files Already Downloaded
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11b66a240>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:
[<tf.Variable 'e_embed:0' shape=(40943, 150) dtype=float32_ref>,
 <tf.Variable 'p_embed:0' shape=(18, 150) dtype=float32_ref>]
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorfl

INFO:tensorflow:global_step/sec: 26.7079
INFO:tensorflow:loss = 0.69138396, step = 4217 (3.744 sec)
INFO:tensorflow:global_step/sec: 25.8455
INFO:tensorflow:loss = 0.68983907, step = 4317 (3.871 sec)
INFO:tensorflow:global_step/sec: 24.9607
INFO:tensorflow:loss = 0.6971928, step = 4417 (4.005 sec)
INFO:tensorflow:global_step/sec: 26.8089
INFO:tensorflow:loss = 0.39985102, step = 4517 (3.730 sec)
INFO:tensorflow:global_step/sec: 22.4693
INFO:tensorflow:loss = 0.48135972, step = 4617 (4.452 sec)
INFO:tensorflow:global_step/sec: 25.397
INFO:tensorflow:loss = 0.3950963, step = 4717 (3.937 sec)
INFO:tensorflow:global_step/sec: 28.7028
INFO:tensorflow:loss = 0.38092944, step = 4817 (3.484 sec)
INFO:tensorflow:global_step/sec: 31.674
INFO:tensorflow:loss = 0.29065722, step = 4917 (3.157 sec)
INFO:tensorflow:global_step/sec: 31.8215
INFO:tensorflow:loss = 0.24799505, step = 5017 (3.142 sec)
INFO:tensorflow:global_step/sec: 32.1538
INFO:tensorflow:loss = 0.22924696, step = 5117 (3.111 sec)
INFO

INFO:tensorflow:loss = 0.7430958, step = 9733 (3.799 sec)
INFO:tensorflow:global_step/sec: 25.7924
INFO:tensorflow:loss = 0.6391849, step = 9833 (3.877 sec)
INFO:tensorflow:global_step/sec: 25.1487
INFO:tensorflow:loss = 0.71445036, step = 9933 (3.977 sec)
INFO:tensorflow:Saving checkpoints for 9948 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39/model.ckpt.
INFO:tensorflow:Loss for final step: 0.55153847.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39/model.ckpt-9948
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39/model.ckpt-9948
INFO:tensorflow:Running local_i

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:
[<tf.Variable 'e_embed:0' shape=(40943, 150) dtype=float32_ref>,
 <tf.Variable 'p_embed:0' shape=(18, 150) dtype=float32_ref>]
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39/model.ckpt-13264
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 13265 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39/model.ckpt.
INFO:tensorflow:loss = 0.04488693, step = 13265
INFO:tensorflow:global_step/sec: 31.7904
INFO:tensorflow:loss = 0.51365817, step = 13365 (3.147 sec)
INFO:tensorflow:global_step/sec: 31.7211
INFO:tensorflow:loss = 0.60783625, step = 13465 (3.153 sec)
INFO:tensorflow:global_step/sec: 31.712
INFO:tensorflow:loss = 0.586699, step = 13565 (3.153 sec)
INFO:tensorflow:global_step/

INFO:tensorflow:loss = 0.1490865, step = 18081 (2.745 sec)
INFO:tensorflow:global_step/sec: 36.8554
INFO:tensorflow:loss = 0.11415184, step = 18181 (2.713 sec)
INFO:tensorflow:global_step/sec: 36.4689
INFO:tensorflow:loss = 0.07971771, step = 18281 (2.742 sec)
INFO:tensorflow:global_step/sec: 36.9034
INFO:tensorflow:loss = 0.05309146, step = 18381 (2.710 sec)
INFO:tensorflow:global_step/sec: 37.0776
INFO:tensorflow:loss = 0.038600124, step = 18481 (2.697 sec)
INFO:tensorflow:global_step/sec: 37.2023
INFO:tensorflow:loss = 0.031075522, step = 18581 (2.688 sec)
INFO:tensorflow:global_step/sec: 37.052
INFO:tensorflow:loss = 0.022928085, step = 18681 (2.699 sec)
INFO:tensorflow:global_step/sec: 37.1689
INFO:tensorflow:loss = 0.024230544, step = 18781 (2.690 sec)
INFO:tensorflow:global_step/sec: 36.7703
INFO:tensorflow:loss = 0.01898127, step = 18881 (2.720 sec)
INFO:tensorflow:global_step/sec: 36.9673
INFO:tensorflow:loss = 0.017716182, step = 18981 (2.705 sec)
INFO:tensorflow:global_step/

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39/model.ckpt-23212
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39/model.ckpt-23212
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[valid] Raw Mean Rank: 952.4538
[valid] Raw Hits@1: 13.200000000000001
[valid] Raw Hits@3: 41.05
[valid] Raw Hits@5: 57.49999999999999
[valid] Raw Hits@10: 73.85000000000001
[valid] Filtered Mean Rank: 940.9745
[valid] Filtered Hits@1: 19.06
[valid] Filtered Hits@3: 53.959999999999994
[valid] Filtered Hits@5: 71.25
[valid] Filtered Hits@10: 85.86
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was

INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39/model.ckpt-26528
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 26529 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpk4th1w39/model.ckpt.
INFO:tensorflow:loss = 0.03775738, step = 26529
INFO:tensorflow:global_step/sec: 28.1057
INFO:tensorflow:loss = 0.018823368, step = 26629 (3.559 sec)
INFO:tensorflow:global_step/sec: 26.4512
INFO:tensorflow:loss = 0.0105059445, step = 26729 (3.781 sec)
INFO:tensorflow:global_step/sec: 26.7515
INFO:tensorflow:loss = 0.0068263886, step = 26829 (3.737 sec)
INFO:tensorflow:global_step/sec: 27.0036
INFO:tensorflow:loss = 0.0052618394, step = 26929 (3.703 sec)
INFO:tensorflow:global_step/sec: 27.3111
INFO:tensorflow:loss = 0.0036338367, step = 27029 (3.662 sec)
INFO:tensorflow:global_step/sec: 27.0787
INFO:tensorflow:loss = 0.010116895, step = 27129 (3.692 sec)
INFO:tenso

INFO:tensorflow:loss = 0.0042908806, step = 31645 (2.977 sec)
INFO:tensorflow:global_step/sec: 34.5902
INFO:tensorflow:loss = 0.002525309, step = 31745 (2.891 sec)
INFO:tensorflow:global_step/sec: 35.6395
INFO:tensorflow:loss = 0.0025300249, step = 31845 (2.806 sec)
INFO:tensorflow:global_step/sec: 29.2422
INFO:tensorflow:loss = 0.0016556489, step = 31945 (3.420 sec)
INFO:tensorflow:global_step/sec: 29.4093
INFO:tensorflow:loss = 0.001347293, step = 32045 (3.400 sec)
INFO:tensorflow:global_step/sec: 33.6106
INFO:tensorflow:loss = 0.03216719, step = 32145 (2.975 sec)
INFO:tensorflow:global_step/sec: 35.1891
INFO:tensorflow:loss = 0.0010582064, step = 32245 (2.842 sec)
INFO:tensorflow:global_step/sec: 29.7018
INFO:tensorflow:loss = 0.006578085, step = 32345 (3.367 sec)
INFO:tensorflow:global_step/sec: 30.0599
INFO:tensorflow:loss = 0.005026155, step = 32445 (3.327 sec)
INFO:tensorflow:global_step/sec: 32.5081
INFO:tensorflow:loss = 0.006975163, step = 32545 (3.076 sec)
INFO:tensorflow:gl