In [1]:
import logging
import os
import sys
import time

import numpy as np
import matplotlib.pyplot as plt
import pykeen
from pykeen.kge_models import TransE

In [2]:
%matplotlib inline

In [3]:
logging.basicConfig(level=logging.INFO)
logging.getLogger('pykeen').setLevel(logging.INFO)

In [4]:
print(sys.version)

3.7.1 (default, Nov  6 2018, 18:45:35) 
[Clang 10.0.0 (clang-1000.11.45.5)]


In [5]:
print(time.asctime())

Fri Apr  5 14:12:18 2019


In [6]:
print(pykeen.get_version())

0.0.23


Check which hyper-parameters are required by TransE:

In [7]:
TransE.hyper_params

['embedding_dim',
 'margin_loss',
 'learning_rate',
 'scoring_function',
 'normalization_of_entities']

Define output directory:

In [8]:
output_directory = '../data/trained_model'

Train and evaluate TransE (provide test set):
* Define the path to our test set: **test_set_path**
* Define whether you want to compute the metrics (mean rank and hits@k) in raw or in a filtered setting: **filter_negative_triples**

In [9]:
config = dict(
    training_set_path           = '../../data/rdf.nt',
    test_set_path               = '../../data/rdf.nt', # Just for illustration, we use the training set also as test set
    execution_mode              = 'Training_mode',
    random_seed                 = 2,
    kg_embedding_model_name     = 'TransE',
    embedding_dim               = 100,
    scoring_function            = 1,  # corresponds to L1
    normalization_of_entities   = 2,  # corresponds to L2
    margin_loss                 = 3,
    learning_rate               = 0.1,
    num_epochs                  = 100,  
    batch_size                  = 32,
    filter_negative_triples     = True,
    preferred_device            = 'cpu'
)

Train and evaluate TransE:

In [10]:
results = pykeen.run(
    config=config,
    output_directory=output_directory,
)

INFO:pykeen.utilities.pipeline:-------------Train KG Embeddings-------------
Training epoch: 100%|██████████| 100/100 [00:00<00:00, 149.25it/s]
INFO:pykeen.utilities.pipeline:-------------Start Evaluation-------------
INFO:pykeen.utilities.evaluation_utils.metrics_computations:Evaluation took 0.26s seconds


Check result entries:

In [11]:
results.results.keys()

odict_keys(['trained_model', 'losses', 'entity_to_embedding', 'relation_to_embedding', 'eval_summary', 'entity_to_id', 'relation_to_id', 'final_configuration'])

Get evaluation results:
 * Mean rank
 * Hits@k, k $\in$ {1,3,5,10} 

In [12]:
results.results['eval_summary']

{'mean_rank': 0.0660377358490566,
 'hits@k': {1: 0.9339622641509434, 3: 1.0, 5: 1.0, 10: 1.0}}

Prepare inference workflow

Set path to model directory:

In [13]:
model_directory = output_directory

Set path to data directory:
* Should contain the candidate entities as *entities.txt*
* Should contain the candidate relations as *relations.txt*

In [14]:
data_directory = '../data'

In [15]:
from pykeen.predict import start_predictions_pipeline

In [None]:
start_predictions_pipeline(model_directory=model_directory,
                          data_directory=data_directory,
                          )