In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import ampligraph

#  https://docs.ampligraph.org/en/1.3.1/tutorials/AmpliGraphBasicsTutorial.html
#  https://docs.ampligraph.org/en/1.3.1/examples.html

In [2]:
%store -r triples_arr
%store -r unseen_triples

In [3]:
X = triples_arr

#  Sanity check 
print(len(X))

29759


In [4]:
#  Sanity check 
print(len(unseen_triples))

1063


In [5]:
# Train, test, valid split 

from ampligraph.evaluation import train_test_split_no_unseen 
X_train_valid, X_test = train_test_split_no_unseen(X, test_size=(2400))
X_train, X_valid = train_test_split_no_unseen(X_train_valid, test_size=2400)

print('Train set size: ', X_train.shape)
print('Test set size: ', X_test.shape)
print('Valid set size: ', X_valid.shape)

Train set size:  (24959, 3)
Test set size:  (2400, 3)
Valid set size:  (2400, 3)


#  Train model

In [6]:
#  Define ComplEx model 

from ampligraph.latent_features import ComplEx

model = ComplEx(batches_count=100, 
                seed=555, 
                epochs=100, 
                k=200, 
                eta=15,
                loss='multiclass_nll', 
                embedding_model_params = {'negative_corruption_entities': 'all'},
                regularizer='LP', 
                regularizer_params={'p':1, 'lambda':1e-5}, 
                initializer= 'xavier', 
                initializer_params= {'uniform': False},
                optimizer= 'adam',
                optimizer_params = {'lr': 0.0005}, 
                verbose=True)


In [7]:
#  Fit model on the training data 

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(X_train, early_stopping = False)

Average Loss:   0.104632: 100%|███████████████████████████████████████████████████| 100/100 [19:00<00:00, 11.40s/epoch]


In [8]:
#  save model 

from ampligraph.latent_features import save_model
save_model(model, './best_model.pkl')

In [9]:
from ampligraph.latent_features import restore_model
model = restore_model('./best_model.pkl')

In [10]:
#  Sanity check 

if model.is_fitted:
    print('The model is fit!')
else:
    print('The model is not fit! Did you skip a step?')

The model is fit!


# Evaluate Model

In [11]:
#  Evaluate performance on the test set 

positives_filter = np.concatenate((X_train, X_test, X_valid))

from ampligraph.evaluation import evaluate_performance
ranks = evaluate_performance(X_test, 
                             model=model, 
                             filter_triples=positives_filter,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)



100%|██████████████████████████████████████████████████████████████████████████████| 2400/2400 [03:44<00:00, 10.68it/s]


In [12]:
#  Evaluation scores

from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score

mr = mr_score(ranks)
print("MR: %.2f" % (mr))

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))

MR: 332.93
MRR: 0.67
Hits@10: 0.74
Hits@3: 0.68
Hits@1: 0.63


# Predicting new links

In [13]:
#  unseen triples from pandas dataframe to numpy array 
X_unseen = np.array(unseen_triples)

In [14]:
unseen_filter = np.array(list({tuple(i) for i in np.vstack((positives_filter, X_unseen))}))

In [15]:
print(len(X_unseen))
print(len(unseen_filter)) 

1063
18891


In [16]:
#  calculate ranks 

ranks_unseen = evaluate_performance(
    X_unseen, 
    model=model, 
    filter_triples=unseen_filter,   # Corruption strategy filter defined above 
    corrupt_side = 's+o',
    use_default_protocol=False, # corrupt subj and obj separately while evaluating
    verbose=True
)

100%|██████████████████████████████████████████████████████████████████████████████| 1063/1063 [01:36<00:00, 10.98it/s]


In [17]:
#  calculate scores

scores = model.predict(X_unseen)

In [18]:
#  Calibrate model on the valid set 
#  https://docs.ampligraph.org/_/downloads/en/1.3.1/pdf/  See page 27 for explanation  

model.calibrate(X_valid, positive_base_rate=0.5)

probas_pos_neg = model.predict_proba(X_unseen) 

Calibration Loss:   0.290666: 100%|█████████████████████████████████████████████████| 50/50 [00:05<00:00,  8.66epoch/s]


In [19]:
#  Create DataFrame with ranks, scores and probabilities

rankings = pd.DataFrame(list(zip([' '.join(x) for x in X_unseen], 
                      ranks_unseen, 
                      np.squeeze(scores),
                      np.squeeze(probas_pos_neg))), 
             columns=['statement', 'rank', 'score', 'probas_pos_neg']).sort_values('probas_pos_neg', ascending=False)


In [20]:
#  inspect the scores 

pd.set_option('display.max_colwidth', 300)
pd.set_option('max_rows', 350)
rankings = rankings.reset_index(drop=True)
rankings

Unnamed: 0,statement,rank,score,probas_pos_neg
0,https://data.cooperationdatabank.org/vocab/prop/anonymityManipul_H1 https://data.cooperationdatabank.org/vocab/prop/hasPositiveEffectOn https://data.cooperationdatabank.org/id/dependentvariable/cooperation,1,7.823242,0.989597
1,https://data.cooperationdatabank.org/vocab/prop/sVOType_H1 https://data.cooperationdatabank.org/vocab/prop/hasPositiveEffectOn https://data.cooperationdatabank.org/id/dependentvariable/cooperation,1,7.789260,0.989320
2,https://data.cooperationdatabank.org/vocab/prop/emotion_H11 https://data.cooperationdatabank.org/vocab/prop/hasPositiveEffectOn https://data.cooperationdatabank.org/id/dependentvariable/withdrawals,1,7.646037,0.988068
3,https://data.cooperationdatabank.org/vocab/prop/positionInGame_H2 https://data.cooperationdatabank.org/vocab/prop/hasPositiveEffectOn https://data.cooperationdatabank.org/id/dependentvariable/withdrawals,1,7.612097,0.987750
4,https://data.cooperationdatabank.org/vocab/prop/iteratedStrategy_H6 https://data.cooperationdatabank.org/vocab/prop/hasPositiveEffectOn https://data.cooperationdatabank.org/id/dependentvariable/cooperation,1,7.601054,0.987645
...,...,...,...,...
1058,https://data.cooperationdatabank.org/vocab/prop/punishmentAgent_H3 https://data.cooperationdatabank.org/vocab/prop/hasNegativeEffectOn https://data.cooperationdatabank.org/id/dependentvariable/cooperation,1510,0.247662,0.201819
1059,https://data.cooperationdatabank.org/vocab/prop/punishmentAgent_H3 https://data.cooperationdatabank.org/vocab/prop/hasNegativeEffectOn https://data.cooperationdatabank.org/id/dependentvariable/withdrawals,2339,0.140320,0.188622
1060,https://data.cooperationdatabank.org/vocab/prop/partnerEmotion_H3 https://data.cooperationdatabank.org/vocab/prop/hasPositiveEffectOn https://data.cooperationdatabank.org/id/dependentvariable/cooperation,9372,-0.237753,0.147424
1061,https://data.cooperationdatabank.org/vocab/prop/communicationReal_H1 https://data.cooperationdatabank.org/vocab/prop/hasPositiveEffectOn https://data.cooperationdatabank.org/id/dependentvariable/withdrawals,10239,-0.260704,0.145181


In [21]:
#  store the rankings 

%store rankings

Stored 'rankings' (DataFrame)
