In [1]:
import os
import sys
import time

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pykeen
import torch
from pykeen.pipeline import pipeline

In [2]:
dataset = 'CoDExSmall'
num_epochs = 10
embedding_dim = 50
lbda = 0.9
loss = 'MarginRankingLoss'

In [3]:
from pykeen.models import StructuredEmbedding

def find(t, v):
    return (t[..., None] == v).nonzero()

class ModifiedSE(StructuredEmbedding):
    
    def score_hrt(self, hrt_batch: torch.LongTensor) -> torch.FloatTensor:
        # Get embeddings
        
        nunq = torch.unique(torch.cat((hrt_batch[:,0],hrt_batch[:,2]),dim=0),dim=0)
        
        x = self.entity_embeddings(indices=nunq).view(-1, 1)
        h = self.entity_embeddings(indices=hrt_batch[:, 0]).view(-1, self.embedding_dim, 1)
        t = self.entity_embeddings(indices=hrt_batch[:, 2]).view(-1, self.embedding_dim, 1)
        rel_h = self.left_relation_embeddings(indices=hrt_batch[:, 1]).view(-1, self.embedding_dim, self.embedding_dim)
        rel_t = self.right_relation_embeddings(indices=hrt_batch[:, 1]).view(-1, self.embedding_dim, self.embedding_dim)  
        
        B = torch.zeros((nunq.shape[0]*self.embedding_dim, hrt_batch.shape[0]*self.embedding_dim), device=self.device)
        for i in range(hrt_batch.shape[0]):
            triple = hrt_batch[i]
            hix = (nunq == triple[0]).nonzero()[0]
            tix = (nunq == triple[2]).nonzero()[0]
#             hix = hixs[i][0]
#             tix = tixs[i][0]
            B[hix*self.embedding_dim:(hix+1)*self.embedding_dim, i*rel_h[i].shape[0]:(i+1)*rel_h[i].shape[1]] = rel_h[i]
            B[tix*self.embedding_dim:(tix+1)*self.embedding_dim, i*rel_t[i].shape[0]:(i+1)*rel_t[i].shape[1]] = -rel_t[i]
        
        L = B @ B.T
        
        # Project entities
        proj_h = rel_h @ h
        proj_t = rel_t @ t
        Lv = L @ x
        xLv = x.T @ Lv
        scores = -torch.norm(proj_h - proj_t, dim=1, p=self.scoring_fct_norm)
#         scores = -torch.norm(Lv, dim=0, p=2)
        return scores - xLv

In [6]:
result2 = pipeline(
    model=ModifiedSE,
    dataset=dataset,
    random_seed=1235,
    device='gpu',
    training_kwargs=dict(num_epochs=num_epochs, batch_size=500),
    model_kwargs=dict(embedding_dim=embedding_dim),
    loss=loss,
#     regularizer='LpRegularizer'
)
model2 = result2.model
model2

INFO:pykeen.training.training_loop:Starting sub_batch_size search for training now...
INFO:pykeen.training.training_loop:Concluded search with sub_batch_size 125.


HBox(children=(HTML(value='Training epochs on cuda'), FloatProgress(value=0.0, max=10.0), HTML(value='')))

HBox(children=(HTML(value='Training batches on cuda'), FloatProgress(value=0.0, max=66.0), HTML(value='')))




KeyboardInterrupt: 

In [None]:
result2.plot_losses()
plt.show()

In [None]:
comp_models = ['StructuredEmbedding','TransE','RotatE','HolE']
comp_results = []
for comp_model in comp_models:
    print('Running {}'.format(comp_model))
    result = pipeline(
        dataset=dataset,
        model=comp_model,
        random_seed=1235,
        device='gpu',
        training_kwargs=dict(num_epochs=num_epochs),  # Shouldn't take more than a minute or two on a nice computer
        model_kwargs=dict(embedding_dim=embedding_dim),
        loss=loss
    )
    comp_results.append(result)

In [None]:
plt.plot(np.arange(len(result2.losses)),result2.losses,label='Sheaf SE')
for i in range(len(comp_models)):
    comp_model = comp_models[i]
    comp_result = comp_results[i]
    plt.plot(np.arange(len(comp_result.losses)),comp_result.losses,label=comp_model)
plt.ylabel(str(result.model.loss).replace('()',''))
plt.xlabel('epoch')
plt.legend()
plt.show()

In [None]:
res_df = result2.metric_results.to_df()

In [None]:
compto = 1

In [None]:
res_df['diff'] = res_df.Value - comp_results[compto].metric_results.to_df().Value
res_df

In [None]:
# comp_results[0].model.score_all_triples()

In [None]:
# model2.score_all_triples()