# **Imports**

In [1]:
import os
import sys
import tqdm
from pathlib import Path
from tqdm.notebook import trange, tqdm
from warnings import filterwarnings

filterwarnings("ignore")

PATH = os.getcwd()
PROJECT = str(Path(PATH).parents[0])

In [5]:
import numpy as np
import pandas as pd
import ampligraph

ampligraph.__version__

'1.4.0'

In [3]:
import requests
from ampligraph.datasets import load_from_csv

url = 'https://ampligraph.s3-eu-west-1.amazonaws.com/datasets/GoT.csv'
file = f'{PROJECT}/data/GoT.csv'
open(file, 'wb').write(requests.get(url).content)
X = load_from_csv('.',file, sep=',')
X[:5, ]

array([['Smithyton', 'SEAT_OF', 'House Shermer of Smithyton'],
       ['House Mormont of Bear Island', 'LED_BY', 'Maege Mormont'],
       ['Margaery Tyrell', 'SPOUSE', 'Joffrey Baratheon'],
       ['Maron Nymeros Martell', 'ALLIED_WITH',
        'House Nymeros Martell of Sunspear'],
       ['House Gargalen of Salt Shore', 'IN_REGION', 'Dorne']],
      dtype=object)

In [6]:
entities = np.unique(np.concatenate([X[:, 0], X[:, 2]]))
relations = np.unique(X[:, 1])
print(relations, entities)

['ALLIED_WITH' 'BRANCH_OF' 'FOUNDED_BY' 'HEIR_TO' 'IN_REGION' 'LED_BY'
 'PARENT_OF' 'SEAT_OF' 'SPOUSE' 'SWORN_TO'] ['Abelar Hightower' 'Acorn Hall' 'Addam Frey' ... 'the Antlers' 'the Paps'
 'unnamed tower']


In [7]:
from ampligraph.evaluation import train_test_split_no_unseen 

X_train, X_test = train_test_split_no_unseen(X, test_size=100) 
print('Train set size: ', X_train.shape)
print('Test set size: ', X_test.shape)

Train set size:  (3075, 3)
Test set size:  (100, 3)


In [8]:
from ampligraph.latent_features import ComplEx

In [9]:
model = ComplEx(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

In [10]:
positives_filter = X
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(X_train, early_stopping = False)

Average ComplEx Loss:   0.018132: 100%|██████████| 200/200 [01:30<00:00,  2.21epoch/s]


In [11]:
from ampligraph.latent_features import save_model, restore_model
save_model(model, './best_model.pkl')

In [12]:
from ampligraph.evaluation import evaluate_performance
ranks = evaluate_performance(X_test, 
                             model=model, 
                             filter_triples=positives_filter,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)

from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))



100%|██████████| 100/100 [00:00<00:00, 158.48it/s]


MRR: 0.44
Hits@10: 0.57
Hits@3: 0.46
Hits@1: 0.36


In [14]:
X_unseen = np.array([
    ['Jorah Mormont', 'SPOUSE', 'Daenerys Targaryen'],
    ['Tyrion Lannister', 'SPOUSE', 'Missandei'],
    ["King's Landing", 'SEAT_OF', 'House Lannister of Casterly Rock'],
    ['Sansa Stark', 'SPOUSE', 'Petyr Baelish'],
    ['Daenerys Targaryen', 'SPOUSE', 'Jon Snow'],
    ['Daenerys Targaryen', 'SPOUSE', 'Craster'],
    ['House Stark of Winterfell', 'IN_REGION', 'The North'],
    ['House Stark of Winterfell', 'IN_REGION', 'Dorne'],
    ['House Tyrell of Highgarden', 'IN_REGION', 'Beyond the Wall'],
    ['Brandon Stark', 'ALLIED_WITH', 'House Stark of Winterfell'],
    ['Brandon Stark', 'ALLIED_WITH', 'House Lannister of Casterly Rock'],    
    ['Rhaegar Targaryen', 'PARENT_OF', 'Jon Snow'],
    ['House Hutcheson', 'SWORN_TO', 'House Tyrell of Highgarden'],
    ['Daenerys Targaryen', 'ALLIED_WITH', 'House Stark of Winterfell'],
    ['Daenerys Targaryen', 'ALLIED_WITH', 'House Lannister of Casterly Rock'],
    ['Jaime Lannister', 'PARENT_OF', 'Myrcella Baratheon'],
    ['Robert I Baratheon', 'PARENT_OF', 'Myrcella Baratheon'],
    ['Cersei Lannister', 'PARENT_OF', 'Myrcella Baratheon'],
    ['Cersei Lannister', 'PARENT_OF', 'Brandon Stark'],
    ["Tywin Lannister", 'PARENT_OF', 'Jaime Lannister'],
    ["Missandei", 'SPOUSE', 'Grey Worm'],
    ["Brienne of Tarth", 'SPOUSE', 'Jaime Lannister']
])

unseen_filter = np.array(list({tuple(i) for i in np.vstack((positives_filter, X_unseen))}))
ranks_unseen = evaluate_performance(
    X_unseen, 
    model=model, 
    filter_triples=unseen_filter,   # Corruption strategy filter defined above 
    corrupt_side = 's+o',
    use_default_protocol=False, # corrupt subj and obj separately while evaluating
    verbose=True
)
scores = model.predict(X_unseen)

from scipy.special import expit
probs = expit(scores)

pd.DataFrame(list(zip([' '.join(x) for x in X_unseen], 
                      ranks_unseen, 
                      np.squeeze(scores),
                      np.squeeze(probs))), 
             columns=['statement', 'rank', 'score', 'prob']).sort_values("rank")

100%|██████████| 22/22 [00:00<00:00, 105.79it/s]


Unnamed: 0,statement,rank,score,prob
9,Brandon Stark ALLIED_WITH House Stark of Winte...,1,6.629964,0.998681
12,House Hutcheson SWORN_TO House Tyrell of Highg...,4,9.970954,0.999953
6,House Stark of Winterfell IN_REGION The North,12,2.354563,0.913296
16,Robert I Baratheon PARENT_OF Myrcella Baratheon,18,1.951342,0.875593
13,Daenerys Targaryen ALLIED_WITH House Stark of ...,63,2.374156,0.914835
20,Missandei SPOUSE Grey Worm,65,3.164443,0.959474
19,Tywin Lannister PARENT_OF Jaime Lannister,75,1.526644,0.821515
3,Sansa Stark SPOUSE Petyr Baelish,109,1.889828,0.868736
14,Daenerys Targaryen ALLIED_WITH House Lannister...,228,1.385445,0.799864
2,King's Landing SEAT_OF House Lannister of Cast...,723,0.590551,0.643492
