In [None]:
%load_ext autoreload
%autoreload 2

from ryn.common import helper
helper.notebook()

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [None]:
# https://github.com/pykeen/pykeen

import ryn
from ryn.embers import keen
from ryn.graphs import split

from pykeen import pipeline

import copy
from datetime import datetime

model = 'DistMult'
embedding_dim = 256
epochs = 5

path = ryn.ENV.SPLIT_DIR / 'oke.fb15k237_30061990_50/'

# load ryn dataset and transform it into pykeen triple factories
dataset = split.Dataset.load(path)
triples_factory = keen.TripleFactories.create(dataset)

# https://pykeen.readthedocs.io/en/latest/reference/pipeline.html
kwargs = dict(
    model=model,
    model_kwargs=dict(embedding_dim=embedding_dim),

    optimizer='Adagrad',
    optimizer_kwargs=dict(lr=0.01),

    training_kwargs=dict(num_epochs=epochs),
 
    stopper='early',
    stopper_kwargs=dict(frequency=5, patience=50, delta=0.002),

    # important
    random_seed=dataset.cfg.seed,
)

# run training, use custom triple factories
result = pipeline.pipeline(

        training_triples_factory=triples_factory.train,
        validation_triples_factory=triples_factory.valid,
        testing_triples_factory=triples_factory.test,
    
        # pipeline parameters are not saved by pykeen
        metadata=dict(
            metadata=dict(
                dataset_name=dataset.path.name,
                dataset_path=str(dataset.path),
                graph_name=dataset.g.name,
            ),
            pipeline=copy.deepcopy(kwargs),
        ),

        **kwargs)

fname = '-'.join((model, str(embedding_dim), str(datetime.now().strftime(keen.DATEFMT)), ))
result.save_to_directory(str(ryn.ENV.EMBER_DIR / dataset.path.name / fname))