In [1]:
import networkx as nx
import pandas as pd
import numpy as np
import time
import os

In [2]:
network_file_path = "./data/network/CancerSubnetwork.txt"
matrix_path = "./data/raw/miss/kirp_sm_miss251_csn.csv"

In [3]:
matrix= pd.read_csv(matrix_path, index_col=0)
network = nx.read_adjlist(network_file_path, create_using=nx.Graph)

In [4]:
# matrix

In [5]:
save_file_path = "./data/netprop/miss/"
isExist = os.path.exists(save_file_path)
if not isExist:
    os.makedirs(save_file_path)
save_file_path += "kirp_sm_miss251_csn_propagated.csv"

In [6]:
def normalize_network(network, symmetric_norm=False):
    adj_mat = nx.adjacency_matrix(network)
    adj_array = np.array(adj_mat.todense())
    if symmetric_norm:
        D = np.diag(1/np.sqrt(sum(adj_array)))
        adj_array_norm = np.dot(np.dot(D, adj_array), D)
    else:
        degree = sum(adj_array)
        adj_array_norm = (adj_array*1.0/degree).T
    return adj_array_norm

In [7]:
def fast_random_walk(alpha, binary_mat, subgraph_norm, prop_data_prev):
    term1 = (1-alpha)*binary_mat
    # print(subgraph_norm)
    term2 = np.identity(binary_mat.shape[1])-alpha*subgraph_norm
    term2_inv = np.linalg.inv(term2)
    subgraph_prop = np.dot(term1, term2_inv)
    prop_data_add = np.concatenate((prop_data_prev, subgraph_prop), axis=1)
    return prop_data_add

In [8]:
def network_propagation(network, binary_matrix, alpha=0.7, symmetric_norm=False, verbose=True):

    # Begin network propagation
    starttime = time.time()
    if verbose:
        print('Performing network propagation with alpha:', alpha)
    # Separate network into connected components and calculate propagation values of each sub-sample on each connected component
    subgraphs = list(network.subgraph(c) for c in nx.connected_components(network))
    # Initialize propagation results by propagating first subgraph
    subgraph = subgraphs[0]
    subgraph_nodes = list(subgraph.nodes)
    prop_data_node_order = list(subgraph_nodes)
    binary_matrix_filt = np.array(binary_matrix.T.loc[subgraph_nodes].fillna(0).T)
    subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
    prop_data_empty = np.zeros((binary_matrix_filt.shape[0], 1))
    prop_data = fast_random_walk(alpha, binary_matrix_filt, subgraph_norm, prop_data_empty)
    # Get propagated results for remaining subgraphs
    for subgraph in subgraphs[1:]:
        subgraph_nodes = list(subgraph.nodes)
        prop_data_node_order = prop_data_node_order + subgraph_nodes
        binary_matrix_filt = np.array(binary_matrix.T.loc[subgraph_nodes].fillna(0).T)
        subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
        prop_data = fast_random_walk(alpha, binary_matrix_filt, subgraph_norm, prop_data)
    # Return propagated result as dataframe
    prop_data_df = pd.DataFrame(data=prop_data[:, 1:], index=binary_matrix.index, columns=prop_data_node_order)
    if verbose:
        print('Network Propagation Complete:', time.time()-starttime, 'seconds')
    return prop_data_df

In [9]:
propagated_matrix = network_propagation(network, matrix)

Performing network propagation with alpha: 0.7


  adj_mat = nx.adjacency_matrix(network)


Network Propagation Complete: 2.672335624694824 seconds


In [10]:
propagated_matrix.to_csv(save_file_path)