### Defining Score Calculation Func.

In [1]:
import torch

def get_score(model, H, T):
    from graphvite.application.network import LinkPredictor
    model = LinkPredictor('LINE', model.solver.vertex_embeddings, model.solver.context_embeddings)
    model = model.cuda()
    tensorH = torch.as_tensor(H).cuda()
    tensorT = torch.as_tensor(T).cuda()
    return model(tensorH, tensorT)

In [2]:
def get_prob(model, h, t):
    
    import torch
    
    context_emb = torch.as_tensor(model.solver.context_embeddings).cuda()
    vertex_emb = torch.as_tensor(model.solver.vertex_embeddings).cuda()
    context_embt = context_emb.t()
    xxx = vertex_emb.mm(context_embt)
    w = vertex_emb[h].view(1, len(vertex_emb[h]))
    w_vec = w.mm(context_embt)
    min_val = torch.min(w_vec)
    max_val = torch.max(w_vec)
    w_vec = (w_vec-min_val)/(max_val-min_val)
    return w_vec[0][t].item()      

### Loading Trained Models

In [14]:
# Loading deepwalk and LINE embeddings.
import pickle

import os
os.chdir(r'/home/qxl/')

dim = 32
length = 40
method = 'line'
directed = False
emb = f'embedding/dim{dim}-len{length}/{"directed_" if directed else ""}{method}_full.pkl'
model = pickle.load(open(emb, 'rb'))

### Calculating Conditional Prob.

In [15]:
# Loading evaluating set.
H, T, Y = [], [], []
with open('data/test2.txt', 'r') as f:
    mapping = model.graph.name2id
    for line in f:
        h, t, y = line.split()
        H.append(mapping[h])
        T.append(mapping[t])
        Y.append(int(y))

In [16]:
# Geting inner products of corresponding entries.
score = get_score(model, H, T)
score = score.tolist()

In [None]:
# Computing conditional probabilities.
import math
sigmoid_vec = []
for i in range(len(score)):
    sigmoid_vec.append(1/(1 + math.exp(-score[i])))

### Setting Threshold and Obtaining Predictions

In [18]:
# Setting threshold and making predictions.
threshold = 0.1
prediction = []
for i in range(len(score)):
    output = 0
    if sigmoid_vec[i] > threshold:
        output = 1
    prediction.append(output)

### Model Evaluation

In [19]:
from sklearn.metrics import classification_report
report = classification_report(Y, prediction, zero_division=0, digits=4)

from sklearn.metrics import roc_curve, auc, roc_auc_score
false_positive_rate, true_positive_rate, thresholds = roc_curve(Y, prediction)
auc = auc(false_positive_rate, true_positive_rate)

report += ('auc = ' + str(auc))
print(report)

              precision    recall  f1-score   support

           0     0.9856    0.8525    0.9142   3677767
           1     0.8834    0.9890    0.9332   4155066

    accuracy                         0.9249   7832833
   macro avg     0.9345    0.9207    0.9237   7832833
weighted avg     0.9314    0.9249    0.9243   7832833
auc = 0.9207284009402161


### Saving Evaluation Results into txt

In [20]:
results_path = 'eval/results/' + method + '_sigmoid_dim32_len40.txt'
with open(results_path, 'w') as results_f:
    results_f.write(report)
results_f.close()

### Generating Sigmoid Prob. for Kaggle Test Set

In [47]:
H, T = [], []
mapping = model.graph.name2id
with open('data/test-public.txt', 'r') as f:
    next(f)
    for line in f:
        _, h, t = line.split()
        H.append(mapping[h])
        T.append(mapping[t])

In [49]:
score = get_score(model, H, T)
score = score.tolist()

import math
sigmoid_vec = []
for i in range(len(score)):
    sigmoid_vec.append(1/(1 + math.exp(-score[i])))

In [50]:
# Saving the output
out = f'output/sigmoid_deepwalk.csv'
with open(out, 'w') as o:
    o.write('Id,Predicted\n')
    for i, s in enumerate(sigmoid_vec):
        o.write(f'{i+1},{s}\n')
print('Write output to', out)

Write output to output/sigmoid_deepwalk.csv
