In [1]:
%matplotlib inline


# Parallel Betweenness


Example of parallel implementation of betweenness centrality using the
multiprocessing module from Python Standard Library.

The function betweenness centrality accepts a bunch of nodes and computes
the contribution of those nodes to the betweenness centrality of the whole
network. Here we divide the network in chunks of nodes and we compute their
contribution to the betweenness centrality of the whole network.

This doesn't work in python2.7.13. It does work in 3.6, 3.5, 3.4, and 3.3.

It may be related to this:
https://stackoverflow.com/questions/1816958/cant-pickle-type-instancemethod-when-using-multiprocessing-pool-map


In [1]:
import pickle

In [2]:
from multiprocessing import Pool
import time
import itertools

import matplotlib.pyplot as plt
import networkx as nx


def chunks(l, n):
    """Divide a list of nodes `l` in `n` chunks"""
    l_c = iter(l)
    while 1:
        x = tuple(itertools.islice(l_c, n))
        if not x:
            return
        yield x


def _betmap(G_normalized_weight_sources_tuple):
    """Pool for multiprocess only accepts functions with one argument.
    This function uses a tuple as its only argument. We use a named tuple for
    python 3 compatibility, and then unpack it when we send it to
    `betweenness_centrality_source`
    """
    return nx.betweenness_centrality_source(*G_normalized_weight_sources_tuple)


def betweenness_centrality_parallel(G, processes=None):
    """Parallel betweenness centrality  function"""
    p = Pool(processes=processes)
    node_divisor = len(p._pool) * 4
    node_chunks = list(chunks(G.nodes(), int(G.order() / node_divisor)))
    num_chunks = len(node_chunks)
    bt_sc = p.map(_betmap,
                  zip([G] * num_chunks,
                      [True] * num_chunks,
                      [None] * num_chunks,
                      node_chunks))

    # Reduce the partial solutions
    bt_c = bt_sc[0]
    for bt in bt_sc[1:]:
        for n in bt:
            bt_c[n] += bt[n]
    return bt_c


# if __name__ == "__main__":
#     G_ba = nx.barabasi_albert_graph(1000, 3)
#     for G in [G_ba]:
#         print("")
#         print("Computing betweenness centrality for:")
#         print(nx.info(G))
#         print("\tParallel version")
#         start = time.time()
#         bt = betweenness_centrality_parallel(G)
#         print("\t\tTime: %.4F" % (time.time() - start))
#         print("\t\tBetweenness centrality for node 0: %.5f" % (bt[0]))
#         print("\tNon-Parallel version")
#         start = time.time()
#         bt = nx.betweenness_centrality(G)
#         print("\t\tTime: %.4F seconds" % (time.time() - start))
#         print("\t\tBetweenness centrality for node 0: %.5f" % (bt[0]))
#     print("")

#     nx.draw(G_ba)
#     plt.show()

In [4]:
tissue_list = ['Blood',
 'Bladder',
 'Stomach',
 'Blood Vessel',
 'Brain',
 'Adipose Tissue',
 'Pituitary',
 'Heart',
 'Colon',
 'Thyroid',
 'Ovary',
 'Esophagus',
 'Kidney',
 'Prostate',
 'Salivary Gland',
 'Lung',
 'Muscle',
 'Pancreas',
 'Skin',
 'Liver',
 'Adrenal Gland',
 'Small Intestine',
 'Cervix Uteri',
 'Breast',
 'Vagina',
 'Spleen',
 'Fallopian Tube',
 'Uterus',
 'Testis',
 'Nerve']

In [5]:
import datetime
import pandas as pd
# all_tissue_centrality = pd.DataFrame(columns = ['Gene','Centrality_coeff','Tissue'])
all_tissue_centrality = pickle.load( 
    open( "./data_generated/network_generated/all_tissue_centrality.p", "rb" ) )
tissue_list = ['Vagina',
 'Spleen',
 'Fallopian Tube',
 'Uterus',
 'Testis',
 'Nerve']
for tissue in tissue_list:
    print(tissue)
    print(datetime.datetime.now())
    name = './data_generated/network_generated/'+ tissue + '_network.p'
    tissue_nw = pickle.load( open( name, "rb" ) )
    Gm = nx.from_pandas_edgelist(tissue_nw, 'Gene1', 'Gene2', 'PPI_val')
    k = betweenness_centrality_parallel(Gm)
    print('k done')
    print(datetime.datetime.now())

    tissue_df = pd.DataFrame.from_dict(k,orient='index', 
                       columns =['Centrality_coeff'] ).reset_index().rename(columns = {'index':'Gene'})
    tissue_df['Tissue'] = tissue
    all_tissue_centrality = all_tissue_centrality.append(tissue_df,ignore_index=True)
pickle.dump( all_tissue_centrality, open( './data_generated/network_generated/all_tissue_centrality.p' , "wb" ) )

Vagina
2020-04-14 01:04:54.781402
k done
2020-04-14 01:09:51.991503
Spleen
2020-04-14 01:09:52.226230
k done
2020-04-14 01:15:16.546903
Fallopian Tube
2020-04-14 01:15:16.720584
k done
2020-04-14 01:17:54.444607
Uterus
2020-04-14 01:17:57.115671
k done
2020-04-14 01:23:00.782334
Testis
2020-04-14 01:23:01.351338
k done
2020-04-14 01:27:47.365732
Nerve
2020-04-14 01:27:47.637233
k done
2020-04-14 01:33:15.010282


In [6]:
len(set(all_tissue_centrality['Tissue'].tolist()))

30

In [11]:
tissue_nw[tissue_nw['Ratio_val']<0.25]

Unnamed: 0,Gene1,Gene2,PPI_val,Ratio_val,Tissue
