In [1]:
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('__file__'))))
import jReversion as jR
from LDOI import BooleanDOI_processing as BDOIp
from LDOI import qm
import pandas as pd
import numpy as np
import networkx as nx
import concurrent.futures
import par_helper as ph
import random
import robustness as rb
import pickle

In [20]:
networkModel =[ 'bortezomib',
                # 'igvh',
                'apoptosis',
                # 'aurora',
                #'bt474_long',
                'bt474_short',
                # 'cd4t',
                'colitis',
                'death',
                # 'egfr',
                # 'erbb',
                # 'fa_brca',
                # 'fa_check',
                #'hcc1954_long',
                'hcc1954_short',
                'hgf',
                'mammalian',
                # 'mammalian_2006',
                'mapk',
                'oxidative',
                # 'pro_inflammatory',
                'fibroblasts',
                #'skbr3_long',
                'skbr3_short',
                'tlgl_2008',
                'tlgl_2011',
                # 'tlgl_2011_reduced',
                # 'prostate',
                'migration'
              ]

In [21]:
def canalizing_chain(dir_path, model_name, net_idx, prefix='n', suffix='n', sampling_num=10):
    with open(dir_path + model_name + '_' + str(net_idx) + '.pck', 'rb') as f:  # notice the r instead of w
        g = pickle.load(f)
    
    read_nodes = list(g.nodes)
    mapping = {}  # nodename to number index
    inverse_mapping = {}  # number index to nodename
    read_nodes_dict = {}
    inverse_read_nodes_dict = {}
    for i, node in enumerate(read_nodes):
        index = prefix + str(i) + suffix
        mapping[str(node)] = index
        inverse_mapping[index] = str(node)
        mapping['~' + str(node)] = '~' + index
        inverse_mapping['~' + index] = '~' + str(node)
        read_nodes_dict[i] = str(node)
        inverse_read_nodes_dict[str(node)] = i
    
    input_nodes = [node for node in g.nodes if g.in_degree(node) == 0]
    output_nodes = [node for node in g.nodes if g.out_degree(node) == 0]
    
    num_inputs = len(input_nodes)
    num_input_conditions = 2 ** num_inputs
    input_conditions = np.ndarray((num_inputs, 2), dtype=object)
    for idx, input_node in enumerate(input_nodes):
        input_conditions[idx, 0] = '~' + str(input_node)
        input_conditions[idx, 1] = str(input_node)
    
    
    output_nodes_ex = []
    output_nodes_ex.extend([read_nodes_dict[idx] for idx in output_nodes])
    output_nodes_ex.extend(['~'+read_nodes_dict[idx] for idx in output_nodes])
    input_nodes_ex = input_conditions.reshape(num_inputs * 2, ).tolist()
    
    try:
        GExpanded = nx.read_gml('/data2/jijoo/theor/exnet/' + model_name + '_' + str(net_idx) + '_ex.gml')
    except:
        #     GExpanded = ph.par_get_expanded_network(g, prefix=prefix, suffix=suffix, worker=num_worker)
        GExpanded = BDOIp.Get_expanded_network(g, prefix=prefix, suffix=suffix)
        nx.write_gml(GExpanded, '/data2/jijoo/theor/exnet/' + model_name + '_' + str(net_idx) + '_ex.gml')
        
    GEx_nonComposite = GExpanded.copy()
    GEx_nonComposite.remove_nodes_from([node for node in GEx_nonComposite.nodes if node.find('_') > -1])
    
    wccs_size = [len(c) for c in sorted(nx.weakly_connected_components(GEx_nonComposite), key=len, reverse=True)]
    
    return {
        'num_wccs': len(wccs_size),
        'norm_num_wccs': len(wccs_size)/float(len(GEx_nonComposite)),
        'len_largest_wcc': wccs_size[0],
        'norm_len_largest_wcc': wccs_size[0] / float(len(g)),
        'avg_len_wcc': np.average(wccs_size)
        
    }

In [22]:
result_pd = pd.DataFrame(columns=['model', 'network_idx', 'num_wccs', 'norm_num_wccs', 'len_largest_wcc',  'norm_len_largest_wcc', 'avg_len_wcc'])

In [None]:
for Model in networkModel:
    
    with concurrent.futures.ProcessPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(canalizing_chain, dir_path='../networks/configuration_models/', model_name=Model, net_idx=network_idx, prefix='n', suffix='n', sampling_num=10): network_idx for network_idx in range(100)}

    for future in concurrent.futures.as_completed(futures):
        network_idx = futures[future]
        temp = future.result()
        temp['network_idx'] = network_idx
        temp['model'] = Model
        result_pd = result_pd.append(temp, ignore_index=True)
    
    print(Model + ': DONE')
# configuration_models

bortezomib: DONE
apoptosis: DONE
bt474_short: DONE
colitis: DONE
death: DONE
hcc1954_short: DONE
hgf: DONE
mammalian: DONE
mapk: DONE
oxidative: DONE


In [18]:
result_pd

Unnamed: 0,model,network_idx,num_wccs,norm_num_wccs,len_largest_wcc,norm_len_largest_wcc
0,bortezomib,99,27,0.198529,39,0.582090
1,bortezomib,74,25,0.183824,93,1.388060
2,bortezomib,33,32,0.235294,88,1.313433
3,bortezomib,5,29,0.213235,99,1.477612
4,bortezomib,94,26,0.191176,103,1.537313
...,...,...,...,...,...,...
995,oxidative,52,6,0.150000,34,1.789474
996,oxidative,15,6,0.150000,34,1.789474
997,oxidative,27,7,0.175000,32,1.684211
998,oxidative,28,7,0.175000,17,0.894737


In [19]:
result_pd.to_csv('../data/cc_canalizing_chain_211126.csv')