In [None]:
import numpy as np
import pandas as pd
import ampligraph

In [None]:
import requests
from ampligraph.datasets import load_from_csv

# for running locally:
# X = load_from_csv('.', '..data/knowledge-graph.csv', sep=',')
X = load_from_csv('.', 'knowledge-graph.csv', sep=',')
X[:5, ]

In [None]:
from ampligraph.evaluation import train_test_split_no_unseen 

num_test = int(len(X) * (20 / 100))

data = {}
data['valid'], data['test'] = train_test_split_no_unseen(X, test_size=num_test, seed=0, allow_duplication=False) 

data['train'], data['valid'] = train_test_split_no_unseen(data['valid'], test_size=100)

print('Train set size: ', data['train'].shape)
print('Test set size: ', data['test'].shape)
print('Valid set size: ', data['valid'].shape)

In [None]:
# grid search

from ampligraph.latent_features import ComplEx
from ampligraph.latent_features import DistMult
from ampligraph.evaluation import select_best_model_ranking

model_class = ComplEx
param_grid = {
    "batches_count": [50],
    "seed": 0,
    "epochs": [10],
    "k": [100, 200],
    "eta": [5, 10, 15],
    "loss": ["pairwise", "nll"],
    "loss_params": {
        "margin": [2]
    },
    "embedding_model_params": {
    },
    "regularizer": ["LP", None],
    "regularizer_params": {
        "p": [1, 3],
        "lambda": [1e-4, 1e-5]
    },
    "optimizer": ["adagrad", "adam"],
    "optimizer_params": {
        "lr": lambda: np.random.uniform(0.0001, 0.01)
    },
    "verbose": False
}
select_best_model_ranking(model_class, data['train'], data['valid'], data['test'],
                          param_grid,
                          max_combinations=10,
                          use_filter=True,
                          verbose=True,
                          early_stopping=True)

In [None]:
from ampligraph.latent_features import DistMult
from ampligraph.evaluation import select_best_model_ranking

model_class = DistMult
param_grid = {
    "batches_count": [100],
    "seed": 0,
    "epochs": [100],
    "k": [100, 200],
    "eta": [5, 10, 15],
    "loss": ["pairwise", "nll"],
    "loss_params": {
        "margin": [5, 10]
    },
    "embedding_model_params": {
    },
    "regularizer": ["LP", None],
    "regularizer_params": {
        "p": [1, 3],
        "lambda": [1e-4, 1e-5]
    },
    "optimizer": ["adagrad", "adam"],
    "optimizer_params": {
        "lr": lambda: np.random.uniform(0.0001, 0.01)
    },
    "verbose": False
}
select_best_model_ranking(model_class, data['train'], data['valid'], data['test'],
                          param_grid,
                          max_combinations=10,
                          use_filter=True,
                          verbose=True,
                          early_stopping=True)

In [None]:
from ampligraph.latent_features import TransE
from ampligraph.evaluation import select_best_model_ranking

model_class = TransE
param_grid = {
    "batches_count": [100],
    "seed": 0,
    "epochs": [100],
    "k": [100, 200],
    "eta": [5, 10, 15],
    "loss": ["pairwise", "nll"],
    "loss_params": {
        "margin": [5, 10]
    },
    "embedding_model_params": {
    },
    "regularizer": ["LP", None],
    "regularizer_params": {
        "p": [1, 3],
        "lambda": [1e-4, 1e-5]
    },
    "optimizer": ["adagrad", "adam"],
    "optimizer_params": {
        "lr": lambda: np.random.uniform(0.0001, 0.01)
    },
    "verbose": False
}
result2 = select_best_model_ranking(model_class, data['train'], data['valid'], data['test'],
                          param_grid,
                          max_combinations=10,
                          use_filter=True,
                          verbose=True,
                          early_stopping=True)

In [None]:
result2