In [2]:
#!pip install pykeen
#!pip install torch

In [3]:
from pykeen.models import ERModel, TransD, DistMult, TransH, TransE, RotatE, ComplEx, TuckER
from pykeen.nn import Embedding
from pykeen.nn.modules import Interaction, NormBasedInteraction
from torch import FloatTensor
from pykeen.pipeline import pipeline
from class_resolver import Hint, HintOrType, OptionalKwargs
from torch.nn import functional
from pykeen.nn.init import xavier_uniform_, xavier_uniform_norm_, xavier_normal_norm_
from pykeen.typing import Constrainer, Initializer
from pykeen.regularizers import Regularizer, LpRegularizer
from typing import Union, Any, ClassVar, Mapping
from pykeen.utils import negative_norm_of_sum, tensor_product
from pykeen.constants import DEFAULT_EMBEDDING_HPO_EMBEDDING_DIM_RANGE
from pykeen.evaluation import RankBasedEvaluator
from pykeen.datasets import Nations
from torch.optim import Adam, SGD
from pykeen.predict import predict_triples

  from .autonotebook import tqdm as notebook_tqdm


## TransE

In [2]:
result_TransE = pipeline(
    model=TransE,
    dataset='nations',
    training_kwargs={'num_epochs':100},
    random_seed=1603073093
)

No cuda devices were available. The model runs on CPU
Training epochs on cpu: 100%|██████████| 100/100 [00:20<00:00,  4.90epoch/s, loss=0.732, prev_loss=0.746]
Evaluating on cpu: 100%|██████████| 201/201 [00:00<00:00, 1.61ktriple/s]
INFO:pykeen.evaluation.evaluator:Evaluation took 0.20s seconds


In [None]:
print(f"MRR: {result_TransE.metric_results.to_flat_dict()['both.realistic.inverse_harmonic_mean_rank']}")
for k in [1,3,5,10]:
    print(f"Hits@{k} : {result_TransE.metric_results.to_flat_dict()['both.realistic.hits_at_'+str(k)]}")

MRR: 0.36063244938850403
Hits@1 : 0.014925373134328358
Hits@3 : 0.6467661691542289
Hits@5 : 0.8109452736318408
Hits@10 : 0.9776119402985075


## TransH

In [3]:
result_TransH = pipeline(
    model=TransH,
    dataset='nations',
    training_kwargs={'num_epochs':100},
    random_seed=1603073093
)

INFO:pykeen.datasets.utils:Loading cached preprocessed dataset from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/training
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/testing
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/validation
INFO:pykeen.pipeline.api:Using device: None
Training epochs on cpu: 100%|██████████| 100/100 [00:24<00:00,  4.02epoch/s, loss=0.837, prev_loss=0.842]
Evaluating on cpu: 100%|██████████| 201/201 [00:00<00:00, 1.53ktriple/s]
INFO:pykeen.evaluation.evaluator:Evaluatio

In [None]:
print(f"MRR: {result_TransH.metric_results.to_flat_dict()['both.realistic.inverse_harmonic_mean_rank']}")
for k in [1,3,5,10]:
    print(f"Hits@{k} : {result_TransH.metric_results.to_flat_dict()['both.realistic.hits_at_'+str(k)]}")

MRR: 0.43468767404556274
Hits@1 : 0.14925373134328357
Hits@3 : 0.6368159203980099
Hits@5 : 0.845771144278607
Hits@10 : 0.9701492537313433


## TransD

In [4]:
result_TransD = pipeline(
    model=TransD,
    dataset='nations',
    training_kwargs={'num_epochs':100},
    random_seed=1603073093
)

INFO:pykeen.datasets.utils:Loading cached preprocessed dataset from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/training
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/testing
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/validation
INFO:pykeen.pipeline.api:Using device: None
Training epochs on cpu: 100%|██████████| 100/100 [00:27<00:00,  3.63epoch/s, loss=0.771, prev_loss=0.789]
Evaluating on cpu: 100%|██████████| 201/201 [00:00<00:00, 1.97ktriple/s]
INFO:pykeen.evaluation.evaluator:Evaluatio

In [None]:
print(f"MRR: {result_TransD.metric_results.to_flat_dict()['both.realistic.inverse_harmonic_mean_rank']}")
for k in [1,3,5,10]:
    print(f"Hits@{k} : {result_TransD.metric_results.to_flat_dict()['both.realistic.hits_at_'+str(k)]}")

MRR: 0.36391735076904297
Hits@1 : 0.004975124378109453
Hits@3 : 0.6567164179104478
Hits@5 : 0.845771144278607
Hits@10 : 0.9776119402985075


## RotatE

In [5]:
result_RotatE = pipeline(
    model=RotatE,
    dataset='nations',
    training_kwargs={'num_epochs':100},
    random_seed=1603073093
)

INFO:pykeen.datasets.utils:Loading cached preprocessed dataset from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/training
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/testing
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/validation
INFO:pykeen.pipeline.api:Using device: None
Training epochs on cpu: 100%|██████████| 100/100 [00:26<00:00,  3.81epoch/s, loss=0.753, prev_loss=0.731]
Evaluating on cpu: 100%|██████████| 201/201 [00:00<00:00, 1.78ktriple/s]
INFO:pykeen.evaluation.evaluator:Evaluatio

In [None]:
print(f"MRR: {result_RotatE.metric_results.to_flat_dict()['both.realistic.inverse_harmonic_mean_rank']}")
for k in [1,3,5,10]:
    print(f"Hits@{k} : {result_RotatE.metric_results.to_flat_dict()['both.realistic.hits_at_'+str(k)]}")

MRR: 0.5127915143966675
Hits@1 : 0.29850746268656714
Hits@3 : 0.6442786069651741
Hits@5 : 0.7910447761194029
Hits@10 : 0.9601990049751243


## ComplEx

In [6]:
result_ComplEx = pipeline(
    model=ComplEx,
    dataset='nations',
    training_kwargs={'num_epochs':100},
    random_seed=1603073093
)

INFO:pykeen.datasets.utils:Loading cached preprocessed dataset from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/training
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/testing
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/validation
INFO:pykeen.pipeline.api:Using device: None
Training epochs on cpu: 100%|██████████| 100/100 [00:35<00:00,  2.85epoch/s, loss=6.33, prev_loss=6.3]
Evaluating on cpu: 100%|██████████| 201/201 [00:00<00:00, 1.44ktriple/s]
INFO:pykeen.evaluation.evaluator:Evaluation t

In [None]:
print(f"MRR: {result_ComplEx.metric_results.to_flat_dict()['both.realistic.inverse_harmonic_mean_rank']}")
for k in [1,3,5,10]:
    print(f"Hits@{k} : {result_ComplEx.metric_results.to_flat_dict()['both.realistic.hits_at_'+str(k)]}")

MRR: 0.41185036301612854
Hits@1 : 0.20149253731343283
Hits@3 : 0.48507462686567165
Hits@5 : 0.7139303482587065
Hits@10 : 0.9427860696517413


## DistMult

In [7]:
result_DistMult = pipeline(
    model=DistMult,
    dataset='nations',
    training_kwargs={'num_epochs':100},
    random_seed=1603073093
)

INFO:pykeen.datasets.utils:Loading cached preprocessed dataset from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/training
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/testing
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/validation
INFO:pykeen.pipeline.api:Using device: None
Training epochs on cpu: 100%|██████████| 100/100 [00:22<00:00,  4.46epoch/s, loss=0.96, prev_loss=0.961]
Evaluating on cpu: 100%|██████████| 201/201 [00:00<00:00, 1.99ktriple/s]
INFO:pykeen.evaluation.evaluator:Evaluation

In [None]:
print(f"MRR: {result_DistMult.metric_results.to_flat_dict()['both.realistic.inverse_harmonic_mean_rank']}")
for k in [1,3,5,10]:
    print(f"Hits@{k} : {result_DistMult.metric_results.to_flat_dict()['both.realistic.hits_at_'+str(k)]}")

MRR: 0.6158139705657959
Hits@1 : 0.4651741293532338
Hits@3 : 0.6940298507462687
Hits@5 : 0.8208955223880597
Hits@10 : 0.9800995024875622


## TuckER

In [None]:
result_TuckER = pipeline(
    model=TuckER,
    dataset='nations',
    training_kwargs={'num_epochs':100},
    random_seed=1603073093
)

In [6]:
print(f"MRR: {result_TuckER.metric_results.to_flat_dict()['both.realistic.inverse_harmonic_mean_rank']}")
for k in [1,3,5,10]:
    print(f"Hits@{k} : {result_TuckER.metric_results.to_flat_dict()['both.realistic.hits_at_'+str(k)]}")

MRR: 0.4602665603160858
Hits@1 : 0.26119402985074625
Hits@3 : 0.5298507462686567
Hits@5 : 0.7164179104477612
Hits@10 : 0.9552238805970149


## KGCModel

In [7]:
def kgcmodel_interaction(
    h: FloatTensor,
    r: FloatTensor,
    t: FloatTensor,
    p: Union[int, str] = 2,
    power_norm: bool = False,
) -> FloatTensor:
    return (tensor_product(h, r, t).sum(dim=-1) * negative_norm_of_sum(h, r, -t, p=p, power_norm=power_norm))

class KGCModelInteraction(NormBasedInteraction[FloatTensor, FloatTensor, FloatTensor]):
    
    func = kgcmodel_interaction


class KGCModel(ERModel):

    hpo_default: ClassVar[Mapping[str, Any]] = dict(
        embedding_dim=DEFAULT_EMBEDDING_HPO_EMBEDDING_DIM_RANGE,
        scoring_fct_norm=dict(type=int, low=1, high=2),
    )

    def __init__(
        self,
        *,
        embedding_dim: int = 50,
        scoring_fct_norm: int = 1,
        entity_initializer: Hint[Initializer] = xavier_uniform_,
        entity_constrainer: Hint[Constrainer] = functional.normalize,
        relation_initializer: Hint[Initializer] = xavier_uniform_norm_,
        relation_constrainer: Hint[Constrainer] = None,
        regularizer: HintOrType[Regularizer] = LpRegularizer,
        regularizer_kwargs: OptionalKwargs = None,
        **kwargs,
    ) -> None:

        if regularizer is LpRegularizer and regularizer_kwargs is None:
            regularizer_kwargs = DistMult.regularizer_default_kwargs

        super().__init__(
            interaction=KGCModelInteraction,
            interaction_kwargs=dict(p=scoring_fct_norm),
            entity_representations=Embedding,
            entity_representations_kwargs=dict(
                embedding_dim=embedding_dim,
                initializer=entity_initializer,
                constrainer=entity_constrainer,
            ),
            relation_representations=Embedding,
            relation_representations_kwargs=dict(
                embedding_dim=embedding_dim,
                initializer=relation_initializer,
                constrainer=relation_constrainer,
                regularizer=regularizer,
                regularizer_kwargs=regularizer_kwargs,
            ),
            **kwargs,
        )

In [8]:
result_KGCModel = pipeline(
        model=KGCModel,
        model_kwargs={'embedding_dim': 50},
        dataset='Nations',
        optimizer='Adam',
        loss='marginranking',
        loss_kwargs=dict(margin=1),
        training_kwargs={'num_epochs':100},
        training_loop='sLCWA',
        evaluator = RankBasedEvaluator,
        random_seed=1603073093
    )

INFO:pykeen.datasets.utils:Loading cached preprocessed dataset from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/training


INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/testing
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/%D0%9E%D0%BA%D1%81%D0%B0%D0%BD%D0%B0/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/validation
INFO:pykeen.pipeline.api:Using device: None
Training epochs on cpu: 100%|██████████| 100/100 [00:20<00:00,  4.89epoch/s, loss=0.623, prev_loss=0.627]
Evaluating on cpu: 100%|██████████| 201/201 [00:00<00:00, 1.81ktriple/s]
INFO:pykeen.evaluation.evaluator:Evaluation took 0.15s seconds


In [9]:
print(f"MRR: {result_KGCModel.metric_results.to_flat_dict()['both.realistic.inverse_harmonic_mean_rank']}")
for k in [1,3,5,10]:
    print(f"Hits@{k} : {result_KGCModel.metric_results.to_flat_dict()['both.realistic.hits_at_'+str(k)]}")

MRR: 0.6257765293121338
Hits@1 : 0.4651741293532338
Hits@3 : 0.7189054726368159
Hits@5 : 0.8532338308457711
Hits@10 : 0.972636815920398
