In [1]:
import sys
import imp
sys.path.insert(0, '../src')

In [4]:
from denoise.graph import io as graphio
imp.reload(graphio)

_, proteins_to_go = graphio.parse_go_label_file("../data/dream_files/human.golabels")
edgelist, node_list, node_map = graphio.parse_graph_file("../data/test_graphs/All_Graphs/Graph1.txt")

In [5]:
from denoise.graph import operations
from denoise.algorithms.dsd import computations
imp.reload(operations)
imp.reload(computations)

A = operations.densify(edgelist)
D = computations.compute_degree_mat(A)
X = computations.compute_X_normalized(A, D)

In [6]:
import denoise.algorithms.dsd.denoise as denoise
imp.reload(denoise)

all_edges = denoise.predict_links(X) 

A_added_edges = A.copy()
num_added, counter = 0, 0
while num_added < len(edgelist) * 0.10:
    (u, v), distance = all_edges[counter]
    if A_added_edges[u, v] == 0:
        A_added_edges[u, v] = 1
        num_added += 1
    counter += 1

In [None]:
imp.reload(denoise)

glide_edges = denoise.glide_predict_links(edgelist, X, params = {"alpha" : 1, "beta" : 1000, "delta" : 0.001, "loc" : "l3"})

A_added_edges_glide = A.copy()
num_added, counter = 0, 0
while num_added < len(edgelist) * 0.10:
    (u, v), distance = all_edges[counter]
    if A_added_edges_glide[u, v] == 0:
        A_added_edges_glide[u, v] = 1
        num_added += 1
    counter += 1

In [None]:
D_added_edges = computations.compute_degree_mat(A_added_edges)
X_added_edges = computations.compute_X_normalized(A_added_edges, D_added_edges)

D_glide = computations.compute_degree_mat(A_added_edges_glide)
X_glide = computations.compute_X_normalized(A_added_edges_glide, D_glide)

In [60]:
from denoise import scoring
from denoise import predict
imp.reload(predict)
imp.reload(scoring)

n, _ = A.shape
labels = {i: proteins_to_go[node_list[i]] for i in range(n)
          if node_list[i] in proteins_to_go}

def create_predictor(similarity_matrix):
    def predictor(training_labels):
        tlabels_f = lambda i: (training_labels[i] if i in training_labels else [])
        return predict.wmv(similarity_matrix, tlabels_f)
    return predictor

A_scores = scoring.kfoldcv(5, labels, create_predictor(A))
A_added_edges_scores = scoring.kfoldcv(5, labels, create_predictor(A_added_edges))
A_glide_scores = scoring.kfoldcv(5, labels, create_predictor(A_added_edges_glide))

print(f"The scores for running 5-fold cv using WMV on the original network are: {A_scores}")
print(f"The scores for running 5-fold cv using WMV on the original network with edges added by DSD: {A_added_edges_scores}")
print(f"The scores for running 5-fold cv using WMV on the original network with edges added by GLIDE: {A_glide_scores}")

The scores for running 5-fold cv using WMV on the original network are: [0.13961407491486946, 0.1373439273552781, 0.11350737797956867, 0.1021566401816118, 0.09761634506242906]
The scores for running 5-fold cv using WMV on the original network with edges added by DSD: [0.13961407491486946, 0.1373439273552781, 0.11350737797956867, 0.1021566401816118, 0.09761634506242906]
The scores for running 5-fold cv using WMV on the original network with edges added by GLIDE: [0.13961407491486946, 0.1373439273552781, 0.11350737797956867, 0.1021566401816118, 0.09761634506242906]


In [61]:
from denoise import scoring
from denoise import predict
import scipy.spatial.distance as spatial
imp.reload(predict)
imp.reload(scoring)

"""
def create_predictor(X):
    distances = spatial.squareform(spatial.pdist(X))
    def predictor(training_labels):
        tlabels_f = lambda i: (training_labels[i] if i in training_labels else [])
        return predict.knn(distances, tlabels_f, 10)
    return predictor* 100

"""

def create_predictor(E):
    def predictor(training_labels):
        return predict.perform_binary_svc(E, training_labels)
    return predictor * 100
X_scores             = scoring.kfoldcv(5, labels, create_predictor(X))
X_added_edges_scores = scoring.kfoldcv(5, labels, create_predictor(X_added_edges))
X_glide_scores       = scoring.kfoldcv(5, labels, create_predictor(X_glide))

In [66]:
import numpy as np

print("Majority Vote: ")
print(f"WMV on raw network: {np.mean(A_scores) * 100}%")
print(f"WMV on DSD added edges network: {np.mean(A_added_edges_scores) * 100}%")
print(f"WMV on GLIDE added edges network: {np.mean(A_glide_scores) * 100}%")

print("\n10-Nearest Neighbors using DSD embedding:")
print(f"KNN on raw network: {np.mean(X_scores) * 100}%")
print(f"KNN on DSD added edges network: {np.mean(X_added_edges_scores) * 100}%")
print(f"KNN on GLIDE added edges network: {np.mean(X_glide_scores) * 100}%")

Majority Vote: 
WMV on raw network: 11.804767309875142%
WMV on DSD added edges network: 11.804767309875142%
WMV on GLIDE added edges network: 11.804767309875142%

10-Nearest Neighbors using DSD embedding:
KNN on raw network: 12.553916004540294%
KNN on DSD added edges network: 12.57661748013621%
KNN on GLIDE added edges network: 12.57661748013621%
