# Learn KG embeddings


In [None]:
from pykeen.triples import TriplesFactory
from pykeen.pipeline import pipeline
import torch
from pykeen.hpo import hpo_pipeline
import pandas as pd
import gc

## 0 | Setup

Install pykeen library `pip install pykeen` - may need to restart Jupyter after

Install GPU drivers:

`sudo apt install ubuntu-drivers-common`

`ubuntu-drivers devices` 

`sudo apt install nvidia-driver-XXX`

(take the 'recommended' drive from the previous step, or if there's an out of data driver warning, try a more recent one)
                                    
Then restart if running on a virtual machine

In [None]:
print( torch.cuda_is_available() )
print( torch.cuda.device_count() )
print( torch.cuda.current_device() )
print( torch.cuda.get_device_name(0) )

In [None]:
transitive = TriplesFactory.from_path('out/triples_transitive.txt', create_inverse_triples=False)

In [None]:
tf = TriplesFactory.from_path('out/triples_case1.txt', create_inverse_triples=False)
training, testing = tf.split(ratios=[0.95, 0.05])

print( len(training.mapped_triples) )
print( len(testing.mapped_triples) )

## 1 | Train model

In [None]:

model_name = MODELNAME

b = 4096

gc.collect()

torch.cuda.empty_cache()

result = pipeline(
    training = training,
    testing = testing,
    model = model_name,
    training_loop = 'sLCWA',
    negative_sampler = 'basic',
    evaluator = 'RankBasedEvaluator',
    epochs = 200,
    optimizer = 'Adam',
    training_kwargs = [
        'batch_size' : b
    ],
    loss_kwargs = {
        'margin' : 8
    },
    optimizer_kwargs = {
        'lr' : 1e-3
    },
    device = 0,
    random_seed = 42,
    evaluation_kwargs = {
        'additional_filter_triples' : [
            training.mapped_triples, 
            transitive.mapped_triples
        ],
        'batch_size' : b
    },
    evaluator_kwargs = {
        'batch_size' : b
    },
    negative_sampler_kwargs = {
        'num_negs_per_pos' : 60
    },
    dimensions = 512
)

result.save_to_directory('out/' + model_name)



## 2 | Plot results

In [None]:
result.plot_losses()

metrics = [
    'adjusted_mean_rank_index',
    'adjusted_mean_rank',
    'mean_rank',
    'mean_reciprocal_rank',
    'inverse_geometric_mean_rank',
    'hits@1',
    'hits@10'
]

for m in metrics:
    print(m, results.get_metric(m))