In [12]:
import logging
import os
import sys
import time

import numpy as np
import matplotlib.pyplot as plt
import pykeen
from pykeen.kge_models import ConvE
import os
os.environ["CUDA_VISIBLE_DEVICES"]='0'

In [13]:
%matplotlib inline

In [14]:
logging.basicConfig(level=logging.INFO)
logging.getLogger('pykeen').setLevel(logging.INFO)

print(sys.version)

In [15]:
print(time.asctime())

Fri May 24 10:52:23 2019


In [16]:
print(pykeen.get_version())

0.0.26-dev


Check which hyper-parameters are required by ConvE:

In [17]:
ConvE.hyper_params

['embedding_dim',
 'ConvE_input_channels',
 'ConvE_output_channels',
 'ConvE_height',
 'ConvE_width',
 'ConvE_kernel_height',
 'ConvE_kernel_width',
 'conv_e_input_dropout',
 'conv_e_feature_map_dropout',
 'conv_e_output_dropout',
 'margin_loss',
 'learning_rate']

Define output directory:

In [18]:
output_directory = os.path.join(
    os.path.expanduser('~'), 
    'Desktop', 
    'pykeen_test'
)

Define hyper-parameters:

Note: ConvE_height * ConvE_width == embedding_dim

Note: ConvE_kernel_height <= ConvE_height

Note: ConvE_kernel_width <= ConvE_width

Train ConvE:

In [19]:
# For the new model
config = dict(
#     random_seed                 = 22,
    embedding_dim               = 200,
    ConvE_input_channels        = 1,  
    ConvE_output_channels       = 32,  
    ConvE_height                = 10,
    ConvE_width                 = 20,
    ConvE_kernel_height         = 3,
    ConvE_kernel_width          = 3,
    conv_e_input_dropout        = 0.2,
    conv_e_feature_map_dropout  = 0.2,
    conv_e_output_dropout       = 0.3,
    margin_loss                 = 1,
    preferred_device            = 'gpu',
)

In [20]:
model = ConvE(**config)

In [26]:
# train_triples = model.load_triples_from_path('../../tests/resources/data/rdf.nt')
train_triples, valid_triples, test_triples = model.load_triples_from_path(['fb15k_train.tsv', 'fb15k_valid.tsv', 'fb15k_test.tsv'])

In [22]:
from pykeen.kge_models.base import load_data

In [23]:
triples = load_data('fb15k_train.tsv')

In [24]:
triples_short = triples[:10000]

In [25]:
train_triples = test_triples = model.load_triples(triples_short)

In [159]:
model.train()

ConvE(
  (criterion): MarginRankingLoss()
  (inp_drop): Dropout(p=0.2)
  (hidden_drop): Dropout(p=0.3)
  (feature_map_drop): Dropout2d(p=0.2)
  (loss): BCEWithLogitsLoss()
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (bn0): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc): Linear(in_features=10368, out_features=200, bias=True)
)

In [None]:
losses = model.fit(train_triples, learning_rate=0.003, num_epochs=300, batch_size=128)

INFO:pykeen.kge_models.base:****Run Model On CUDA****
Training epoch:   1%|▏         | 4/300 [01:53<2:19:29, 28.28s/it]

Access trained model:

In [None]:
model

Visualize loss values:

In [11]:
epochs = np.arange(len(losses))
plt.title(r'Loss Per Epoch')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.plot(epochs, losses)
plt.show()

NameError: name 'losses' is not defined

Move model to cpu

In [None]:
model.eval()

Load test triples

In [46]:
test_triples = model.map_triples_from_path('../../tests/resources/data/rdf.nt')
# test_triples = model.map_triples_from_path('fb15k_test.tsv')

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

Calculate results

In [133]:
from pykeen.utilities.evaluation_utils.metrics_computations import compute_metric_results

In [134]:
results = compute_metric_results(kg_embedding_model=model, mapped_train_triples=train_triples, 
                                mapped_test_triples=test_triples[:100], device = model.device, filter_neg_triples=False)

⚽️ corrupting triples: 100%|██████████| 100/100 [00:05<00:00, 19.38it/s]
INFO:pykeen.utilities.evaluation_utils.metrics_computations:Evaluation took 5.27 seconds


In [135]:
results

MetricResults(mean_rank=1.0, hits_at_k={1: 1.0, 3: 1.0, 5: 1.0, 10: 1.0})

## Make subject and object predictions

In [136]:
tes = triples_short[:, 1:2]

In [111]:
indexing = triples_short[:, 1:2] == '/award/award_category/nominees./award/award_nomination/nominated_for'

In [123]:
triples_short[indexing.flatten()]

array([['/m/02qyp19',
        '/award/award_category/nominees./award/award_nomination/nominated_for',
        '/m/02d413'],
       ['/m/04ljl_l',
        '/award/award_category/nominees./award/award_nomination/nominated_for',
        '/m/03qcfvw'],
       ['/m/02r0csl',
        '/award/award_category/nominees./award/award_nomination/nominated_for',
        '/m/0m313'],
       ['/m/05ztjjw',
        '/award/award_category/nominees./award/award_nomination/nominated_for',
        '/m/01gc7'],
       ['/m/05f4m9q',
        '/award/award_category/nominees./award/award_nomination/nominated_for',
        '/m/08lr6s'],
       ['/m/03hkv_r',
        '/award/award_category/nominees./award/award_nomination/nominated_for',
        '/m/0ds3t5x'],
       ['/m/05zkcn5',
        '/award/award_category/nominees./award/award_nomination/nominated_for',
        '/m/03mh94'],
       ['/m/0gq_v',
        '/award/award_category/nominees./award/award_nomination/nominated_for',
        '/m/0ds11z'],
       ['/

In [137]:
uniq, count = np.unique(tes, return_counts=True)
most_count = count.argsort()[::-1][0]

In [138]:
uniq[most_count]

'/award/award_nominee/award_nominations./award/award_nomination/award_nominee'

In [139]:
subject = '/m/04ljl_l'
relation = '/award/award_category/nominees./award/award_nomination/nominated_for'

In [154]:
con1 = triples_short[:,0:1]== '/m/04ljl_l'
con2 = triples_short[:,1:2]== '/award/award_category/nominees./award/award_nomination/nominated_for'
con3 = triples_short[:,2:3]== '/m/03xb2w'
con_total = con1 & con2 & con3

In [155]:
triples_short[con_total.flatten()]

array([], shape=(0, 3), dtype='<U146')

In [149]:
model.predict_objects(subject,relation)[:10]

array([['/m/075wx7_', '0.9367189'],
       ['/m/01msrb', '0.9047188'],
       ['/m/03qcfvw', '0.8560706'],
       ['/m/03xb2w', '0.041995708'],
       ['/m/017gl1', '0.04062991'],
       ['/m/0b_fw', '0.03682607'],
       ['/m/0dsvzh', '0.029924264'],
       ['/m/03lrht', '0.02428267'],
       ['/m/05jzt3', '0.019856513'],
       ['/m/0jqp3', '0.01937395']], dtype='<U32')

In [21]:
subject = 'http://commons.dbpedia.org/resource/File:Paddestoel_003.jpg'
relation = 'http://commons.dbpedia.org/property/description'

In [22]:
model.predict_objects(subject,relation)

array([['Paddestoel . These are used in the Netherlands to show directions for cyclists.',
        '0.99830544'],
       ['2004-08-20', '0.00606696'],
       ['Hunebed near Drouwen, the Netherlands. August 2004.',
        '0.002805463'],
       ['Hunebed near Borger, the largest hunebed in the Netherlands, August 2004.',
        '0.0026126327'],
       ['Hunebeds  near Drouwen, the Netherlands.', '0.0026097216'],
       ['Hunebed D24 near Bronneger, August 2004.', '0.002481958'],
       ['Events', '0.002360752'],
       ['Entrance to the hunebed of Borger. August 2004.',
        '0.0014043334'],
       ['Kane quantum computer', '0.0013159805'],
       ['The hunebed of Borger, the largest of the Netherlands. August 2004.',
        '0.0012376056'],
       ['Hunebed D25 near Bronneger, August 2004.', '0.0012304807'],
       ['1801', '0.0008116153'],
       ["'De Slegte' bookshop in Groningen. De Slegte is the largest chain of second-hand and remainder books in the Netherlands. This is the

In [23]:
obj = 'Paddestoel . These are used in the Netherlands to show directions for cyclists.'
relation = 'http://commons.dbpedia.org/property/description'

In [24]:
model.predict_subjects(obj, relation)

array([['http://commons.dbpedia.org/resource/File:Paddestoel_002.jpg',
        '0.99865675'],
       ['http://commons.dbpedia.org/resource/File:Paddestoel_003.jpg',
        '0.99830544'],
       ['Entrance to the hunebed of Borger. August 2004.', '0.10651482'],
       ['Diagram drawn by Tim Starling', '0.091593064'],
       ['The hunebed of Borger, the largest of the Netherlands. August 2004.',
        '0.09136357'],
       ['100.0', '0.09073976'],
       ['http://commons.dbpedia.org/resource/File:Bustaxi.jpg',
        '0.041434254'],
       ['Own picture from Andre Engels', '0.022528224'],
       ['Lokoj', '0.017725676'],
       ["'De Slegte' bookshop in Groningen. De Slegte is the largest chain of second-hand and remainder books in the Netherlands. This is the back entrance .",
        '0.013548709'],
       ['http://commons.dbpedia.org/resource/File:Lijn51.jpg',
        '0.011775235'],
       ['Kane quantum computer', '0.010166289'],
       ['http://commons.dbpedia.org/resource/Cate