# Compute diffusion scores in Python

In [1]:
import networkx as nx
import numpy as np
import logging
import scipy as sp
from math import pi, sqrt
import sys
import os

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")

dir_path = os.path.dirname(os.path.realpath('__file__'))

DEBUG:root:test


#### Import kernel functions from diffuPy

The kernel functions a imported from the package. Despite this the functions implementation are in this notebook (final _imp in the function name).

In [18]:
from diffupy.kernels import commute_time_kernel, p_step_kernel, inverse_cosine_kernel, diffusion_kernel, regularised_laplacian_kernel

from diffupy.matrix import Matrix, LaplacianMatrix

from diffupy.utils import get_label_list_graph

from diffupy.diffuse_raw import diffuse_raw

from diffupy.diffuse import diffuse

from diffupy.validate_inputs import _validate_scores, _validate_graph, _validate_K


### Import example graph

In [19]:
G = nx.read_gml(os.path.join(dir_path, '04_unit_testing', '_graph.gml'), label='id')

In [20]:
list(G.nodes(data=True))[0]

(0, {'name': 'V1'})

### General functions

#### Labels mapping

In [21]:
def csv_labeled_matrix_to_matrix(path):
    # Import matrix from csv file and remove headers
    m = np.genfromtxt(path, dtype=None, delimiter=',')
    return Matrix(np.array([[float(x) for x in a[1:]] for a in m[1:]]), os.path.basename(path).split('.csv'), m[1:,0], m[0, 1:])

In [22]:
def run_score_method_test(method, G, input_scores, test_output_scores):
    
    computed_output_scores = diffuse(input_scores, method, graph = G).mat
    
    test_output_scores = test_output_scores.mat
    
    logging.info(' %s  \n %s\n', 'Computed matrix', computed_output_scores)
    logging.info(' %s  \n %s\n', 'Test matrix', test_output_scores)
    # Assert rounded similarity (floating comma)
    assert np.allclose(computed_output_scores, test_output_scores)
    logging.info(' Test '+ method+' passed')

#### Helpers

In [23]:
# In which format is the input? Tell apart vector, matrix or list of matrices
def which_format(x):
    # if is.numeric(x) and is.null(dim(x)): return "vector"
    if isinstance(x, isinstance(x[0], list)) or isinstance(x, isinstance(x[0], np.ndarray)): return "matrix"
    if isinstance(x, list): return "list"
    
    raise ValueError('Non-recognised input scores format, object of class:  %s', x.__class__.__name__)

### Raw scores test

In [24]:
input_scores = Matrix.from_csv(os.path.join(dir_path, 'scores_test', 'input_methods_scores.csv'))


In [25]:
output_scores = Matrix.from_csv(os.path.join(dir_path, 'scores_test', 'output_raw_scores.csv'))
print(output_scores)


matrix output_raw_scores 
  [[0.04214556 0.42357706]
 [0.02803012 0.52029579]
 [0.07392846 0.4492091 ]
 [0.02882746 0.43188125]
 [0.02498394 0.4275005 ]
 [0.02694918 0.42234912]
 [0.04164558 0.49299045]
 [0.02889637 0.48562453]
 [0.03051561 0.45158231]
 [0.02342762 0.49923227]
 [0.02099228 0.46111093]
 [0.02743536 0.46415073]
 [0.03170256 0.52545556]
 [0.02845791 0.58202662]
 [0.02020737 0.33386854]
 [0.02814164 0.55441211]
 [0.02503623 0.44053046]
 [0.01942714 0.38636217]
 [0.02439709 0.50951685]
 [0.13784437 0.47691742]
 [0.02120247 0.53093393]
 [0.05397049 0.44490905]
 [0.03590104 0.61562383]
 [0.0431597  0.56393209]
 [0.01995905 0.34566662]
 [0.02123165 0.51726928]
 [0.01967457 0.55317294]
 [0.05469101 0.5860439 ]
 [0.02302591 0.44694767]
 [0.21329129 0.43418512]
 [0.04744166 0.60237241]
 [0.02109237 0.39848373]
 [0.01860675 0.36808301]
 [0.03242628 0.32585987]
 [0.01849678 0.39954062]
 [0.0276085  0.61560809]
 [0.01818489 0.35749063]
 [0.03632278 0.41405989]
 [0.02119981 0.433224

In [26]:
run_score_method_test('raw', G, input_scores, output_scores)


INFO:root:Scores validated.
INFO:root:Kernel not supplied. Computing regularised Laplacian kernel ...
INFO:root:Done
INFO:root:Kernel validated scores.
INFO:root:Scores matched.
INFO:root:Matrix product for raw scores preformed.
INFO:root: Computed matrix  
 [[0.04214556 0.42357706]
 [0.02803012 0.52029579]
 [0.07392846 0.4492091 ]
 [0.02882746 0.43188125]
 [0.02498394 0.4275005 ]
 [0.02694918 0.42234912]
 [0.04164558 0.49299045]
 [0.02889637 0.48562453]
 [0.03051561 0.45158231]
 [0.02342762 0.49923227]
 [0.02099228 0.46111093]
 [0.02743536 0.46415073]
 [0.03170256 0.52545556]
 [0.02845791 0.58202662]
 [0.02020737 0.33386854]
 [0.02814164 0.55441211]
 [0.02503623 0.44053046]
 [0.01942714 0.38636217]
 [0.02439709 0.50951685]
 [0.13784437 0.47691742]
 [0.02120247 0.53093393]
 [0.05397049 0.44490905]
 [0.03590104 0.61562383]
 [0.0431597  0.56393209]
 [0.01995905 0.34566662]
 [0.02123165 0.51726928]
 [0.01967457 0.55317294]
 [0.05469101 0.5860439 ]
 [0.02302591 0.44694767]
 [0.21329129 0.4

### z-scores test

In [27]:
run_score_method_test('z', G, Matrix.from_csv(dir_path+'/scores_test/input_methods_scores.csv'), Matrix.from_csv(dir_path+'/scores_test/output_z_scores.csv'))


INFO:root:Scores validated.
INFO:root:Kernel not supplied. Computing regularised Laplacian kernel ...
INFO:root:Done
INFO:root:Kernel validated scores.
INFO:root:Scores matched.
INFO:root:Matrix product for raw scores preformed.
INFO:root:Normalization z-scores.
INFO:root:Rowmeans and rowmeans2 computatated.
INFO:root: Computed matrix  
 [[ 5.72900503e-01 -9.08743827e-01]
 [-1.13346069e-01  7.91682217e-01]
 [ 4.08043369e+00 -9.76577930e-01]
 [-9.23887165e-02 -1.29458178e+00]
 [-4.09220982e-01 -1.46242738e+00]
 [-2.51410271e-01 -1.62217088e+00]
 [ 1.11370470e+00  4.24186904e-01]
 [-5.88803789e-02  1.02461207e-01]
 [ 3.71221934e-02 -6.98592924e-01]
 [-2.57795602e-01  2.57577470e-01]
 [-4.84654538e-01 -3.47018132e-01]
 [-1.25700004e-01 -2.65242390e-01]
 [ 1.33054511e-01  1.21294039e+00]
 [-7.37208835e-02  1.66540069e+00]
 [-2.17625149e-01 -1.10886277e+00]
 [-6.20089510e-02  8.47796885e-01]
 [-2.22217600e-01 -6.03327173e-01]
 [-3.76246194e-01 -1.13777156e+00]
 [-3.13109721e-01  5.63218455e

###  ml scores test

In [28]:
run_score_method_test('ml', G, Matrix.from_csv(dir_path+'/scores_test/input_methods_scores.csv'), Matrix.from_csv(dir_path+'/scores_test/output_ml_scores.csv'))


INFO:root:Scores validated.
INFO:root:Kernel not supplied. Computing regularised Laplacian kernel ...
INFO:root:Done
INFO:root:Kernel validated scores.
INFO:root:Scores matched.
INFO:root:Matrix product for raw scores preformed.
INFO:root: Computed matrix  
 [[-9.15708872e-01 -1.52845889e-01]
 [-9.43939757e-01  4.05915793e-02]
 [-8.52143072e-01 -1.01581802e-01]
 [-9.42345082e-01 -1.36237507e-01]
 [-9.50032125e-01 -1.44998992e-01]
 [-9.46101639e-01 -1.55301757e-01]
 [-9.16708841e-01 -1.40191058e-02]
 [-9.42207251e-01 -2.87509365e-02]
 [-9.38968777e-01 -9.68353813e-02]
 [-9.53144753e-01 -1.53545593e-03]
 [-9.58015441e-01 -7.77781409e-02]
 [-9.45129274e-01 -7.16985446e-02]
 [-9.36594888e-01  5.09111192e-02]
 [-9.43084178e-01  1.64053242e-01]
 [-9.59585254e-01 -3.32262922e-01]
 [-9.43716725e-01  1.08824218e-01]
 [-9.49927545e-01 -1.18939083e-01]
 [-9.61145726e-01 -2.27275666e-01]
 [-9.51205825e-01  1.90336917e-02]
 [-7.24311255e-01 -4.61651545e-02]
 [-9.57595060e-01  6.18678558e-02]
 [-8.9

### gm scores test: input unlabeled p-value score prediction

In [30]:
run_score_method_test('gm', G, Matrix.from_csv(dir_path+'/scores_test/input_unlabeled_scores.csv'), Matrix.from_csv(dir_path+'/scores_test/output_gm_scores.csv'))


KeyError: 'V100'