In [1]:
import sys
sys.path.insert(0, '../')

In [2]:
import importlib
import denoise.algorithms.dsd.denoise as denoise
import denoise.algorithms.dsd.computations as computations
import denoise.graph.io as graph_io
from   denoise.graph import operations
from   denoise import scoring
from   denoise import predict
import scipy.spatial.distance as spatial

In [3]:
params       =  {
    "loc"   : "l3",
    "alpha" : 2.0,
    "beta"  : 100,
    "delta" : 0.01
}
new_weight   = 0.05

In [5]:
edgelist, node_list, node_map = graph_io.parse_graph_file("../data/test_graphs/All_Graphs/Graph1.txt")
A                             = operations.densify(edgelist)
X                             = computations.compute_embedding(edgelist)
# Protein labels
_, proteins_to_go             = graph_io.parse_go_label_file("../data/dream_files/human.golabels")
n, _                          = A.shape
labels                        = {i: proteins_to_go[node_list[i]] for i in range(n) if node_list[i] in proteins_to_go}
labels

{1: ['GO:0032991', 'GO:1902494', 'GO:1990234'],
 3: ['GO:0003677'],
 4: ['GO:0003677'],
 5: ['GO:0000785', 'GO:0000790'],
 6: ['GO:0060089',
  'GO:0038023',
  'GO:0004672',
  'GO:0140096',
  'GO:0016772',
  'GO:0016740',
  'GO:0004888',
  'GO:0016301',
  'GO:0016773'],
 7: ['GO:0048522',
  'GO:0048518',
  'GO:0023051',
  'GO:0010646',
  'GO:0048583',
  'GO:0009966',
  'GO:0031399',
  'GO:0001934',
  'GO:0032270',
  'GO:0051247',
  'GO:0032268',
  'GO:0009893',
  'GO:0051174',
  'GO:0010562',
  'GO:0042327',
  'GO:0042325',
  'GO:0019220',
  'GO:0051173',
  'GO:0045937',
  'GO:0001932',
  'GO:0051246',
  'GO:0010604',
  'GO:0031325',
  'GO:0031401',
  'GO:0048584',
  'GO:0023056',
  'GO:0050790',
  'GO:0065009',
  'GO:0045859',
  'GO:0071900',
  'GO:0051338',
  'GO:0043549'],
 8: ['GO:0019219',
  'GO:0009889',
  'GO:2001141',
  'GO:2000112',
  'GO:0051252',
  'GO:0006357',
  'GO:0031326',
  'GO:0006355',
  'GO:0010556',
  'GO:1903506',
  'GO:0010468',
  'GO:0048523',
  'GO:2000113',
  '

In [5]:
dsd_ranked_edgelist             = denoise.predict_links(X)
A_added_edges_dsd               = A.copy()
num_added, counter              = 0, 0
# Adding 10% edges
while num_added < len(edgelist) * 0.10:
    (u, v), distance            = dsd_ranked_edgelist[counter]
    if A_added_edges_dsd[u, v] == 0:
        A_added_edges_dsd[u, v] = new_weight
        num_added += 1
    counter += 1

In [6]:
glide_ranked_edgelist         = denoise.glide_predict_links(edgelist, X, params = params)
A_added_edges_glide           = A.copy()
num_added, counter            = 0, 0
while num_added < len(edgelist) * 0.10:
    u, v, score               = glide_ranked_edgelist[counter]
    if A_added_edges_glide[u, v] == 0:
        A_added_edges_glide[u, v] = new_weight
        num_added += 1
    counter += 1

In [7]:
def create_predictor(similarity_matrix):
    def predictor(training_labels):
        tlabels_f = lambda i: (training_labels[i] if i in training_labels else [])
        return predict.wmv(similarity_matrix, tlabels_f)
    return predictor

A_scores       = scoring.kfoldcv(5, labels, create_predictor(A))
A_dsd_scores   = scoring.kfoldcv(5, labels, create_predictor(A_added_edges_dsd))
A_glide_scores = scoring.kfoldcv(5, labels, create_predictor(A_added_edges_glide))

print(f"The scores for running 5-fold cv using WMV on the original network are: {A_scores}")
print(f"The scores for running 5-fold cv using WMV on the original network with edges added by DSD: {A_dsd_scores}")
print(f"The scores for running 5-fold cv using WMV on the original network with edges added by GLIDE: {A_glide_scores}")

The scores for running 5-fold cv using WMV on the original network are: [0.047619047619047616, 0.15873015873015872, 0.12698412698412698, 0.14285714285714285, 0.125]
The scores for running 5-fold cv using WMV on the original network with edges added by DSD: [0.047619047619047616, 0.15873015873015872, 0.12698412698412698, 0.14285714285714285, 0.125]
The scores for running 5-fold cv using WMV on the original network with edges added by GLIDE: [0.047619047619047616, 0.15873015873015872, 0.12698412698412698, 0.14285714285714285, 0.125]


In [8]:
D_added_edges_dsd = computations.compute_degree_mat(A_added_edges_dsd)
X_added_edges_dsd = computations.compute_X_normalized(A_added_edges_dsd, D_added_edges_dsd)

D_added_edges_glide = computations.compute_degree_mat(A_added_edges_glide)
X_added_edges_glide = computations.compute_X_normalized(A_added_edges_glide, D_added_edges_glide)

In [9]:
def create_predictor(X):
    distances = spatial.squareform(spatial.pdist(X))
    def predictor(training_labels):
        tlabels_f = lambda i: (training_labels[i] if i in training_labels else [])
        return predict.knn(distances, tlabels_f, 10)
    return predictor

def create_svm_predictor(E):
    def create_fb_dict(t_labels):
        count = 0
        l_dict = {}
        i_dict = {}
        for i in t_labels:
            ll = t_labels[i]
            if ll not in i_dict:
                i_dict[ll] = count
                l_dict[count] = ll
                count += 1
        return i_dict, l_dict
    def predictor(training_labels):
        t_labels = {i: training_labels[i][0] for i in training_labels}
        i_dict, l_dict = create_fb_dict(t_labels)
        labels_f   = lambda x: i_dict[t_labels[x]] if x in t_labels else None
        i_labels_f = lambda y: l_dict[y]
        return predict.svm(E, labels_f, i_labels_f)
    return predictor
            
X_scores = scoring.kfoldcv(5, labels, create_predictor(X))
X_dsd_scores = scoring.kfoldcv(5, labels, create_predictor(X_added_edges_dsd))
X_glide_scores = scoring.kfoldcv(5, labels, create_predictor(X_added_edges_glide))


In [10]:
import numpy as np

print("Majority Vote: ")
print(f"WMV on raw network: {np.mean(A_scores) * 100}%")
print(f"WMV on DSD added edges network: {np.mean(A_dsd_scores) * 100}%")
print(f"WMV on GLIDE added edges network: {np.mean(A_glide_scores) * 100}%")

print("\n10-Nearest Neighbors using DSD embedding:")
print(f"KNN on raw network: {np.mean(X_scores) * 100}%")
print(f"KNN on DSD added edges network: {np.mean(X_dsd_scores) * 100}%")
print(f"KNN on GLIDE added edges network: {np.mean(X_glide_scores) * 100}%")

Majority Vote: 
WMV on raw network: 12.023809523809522%
WMV on DSD added edges network: 12.023809523809522%
WMV on GLIDE added edges network: 12.023809523809522%

10-Nearest Neighbors using DSD embedding:
KNN on raw network: 6.964285714285714%
KNN on DSD added edges network: 8.234126984126984%
KNN on GLIDE added edges network: 7.916666666666666%


In [None]:
# A possible function to create a new graph with updated edges
def create_new_graph_updated_weights(original_graph, new_edges):
    """
        Function that takes in a original graph, and puts in new edges, with the weights
        of the new_edges having a meaningful value.
        @param original_graph : A list of form [(p, q, wt), ...], wt representing the weight of 
                                the edges (p, q).
        @param new_edges      : A list of form [(p, q, sc),...], sc represent the score given by 
                                the link prediction algorithm.
    """
    pass
