In [46]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [47]:
import networkx as nx
import pandas as pd
import markov_clustering as mc
import community
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def read_graph(file):
    
    G = nx.Graph()
    
    with open("../interactomes/"+file+".tsv", "r") as f:
        for row in f.readlines()[1:]:
            edge = row[:-1].split("\t")
            G.add_edge(edge[0],edge[1])
    
    return G


def find_best_inflation(mat):
    
    infl_lst = []
    for inflation in [i/10 for i in range(15, 26)]:
        
        result = mc.run_mcl(mat, inflation=inflation)
        clusters = mc.get_clusters(result)
        Q = mc.modularity(matrix=np.asmatrix(result), clusters=clusters)
        infl_lst.append((Q, inflation))
    
    return(max(infl_lst)[1])

def get_labels(G_lcc, clusters):
    
    # map node name to numbers
    lbls = {}
    for k in range(len(G_lcc)):
        lbls[k] = list(G_lcc.nodes())[k]

    # use labels
    for i in range(len(clusters)):
        clusters[i] = [lbls.get(item, item) for item in clusters[i]]
    
    return(clusters)


def MCL(G_lcc):
    
    mat = nx.to_numpy_matrix(G_lcc)
    
    # find best inflation
    max_infl = find_best_inflation(mat)
    
    result = mc.run_mcl(mat, inflation = max_infl)
    clusters = mc.get_clusters(result)
    
    # return name of genes
    return(get_labels(G_lcc, clusters))


def louvain(G_lcc):
    
    partition = community.best_partition(G_lcc)
    clusters = [] 
    for com in set(partition.values()) :
        list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]
        clusters.append(list_nodes)

    return(clusters)


def hypergeom_test(mod, genes, G_lcc):
    
    M = len(G_lcc.nodes())
    n = len(genes)
    N = len(mod)
    x = len(set(genes).intersection(set(mod)))
    
    pval = hypergeom.cdf(x, M, n, N)
    
    return(pval, x, M, n, N)


def fill_df(df, clusters, G_lcc, genes, algo):
    r = df.shape[0]
    for idx, c in enumerate(clusters):
        df.loc[r+idx, 'cl_algo'] = algo
        df.loc[r+idx, 'mod_id'] = r + idx
        df.loc[r+idx, 'n_sg'] = ut.hypergeom_test(c, genes, G_lcc)[1]
        df.loc[r+idx, 'n_g'] = ut.hypergeom_test(c, genes, G_lcc)[4]
        df.loc[r+idx, 'sg_id'] = list(set(genes).intersection(set(c)))
        df.loc[r+idx, 'g_id'] = list(set(c))
        df.loc[r+idx, 'p_value'] = ut.hypergeom_test(c, genes, G_lcc)[0]
    
    return(df)

In [None]:
# Initialize the gene list
with open("seed_genes.txt","r") as f:
    genes = [gene.rstrip() for gene in f.readlines()]


### read data

ii = ut.read_graph("ii")
ui = ut.read_graph("ui")

# get LCC
G_ii = max(nx.connected_component_subgraphs(ii), key=len)
G_ui = max(nx.connected_component_subgraphs(ui), key=len)


### LOUVAIN 

cl_ii_lou = ut.louvain(G_ii)
cl_ui_lou = ut.louvain(G_ui)


### MCL

cl_ii_mcl = ut.MCL(G_ii)
cl_ui_mcl = ut.MCL(G_ui)


# create table
col_lst = ['cl_algo', 'mod_id', 'n_sg', 'n_g', 'sg_id', 'g_id', 'p_value']
df_ui = pd.DataFrame(data=None, columns = col_lst)
df_ii = pd.DataFrame(data=None, columns = col_lst)


### hypergeometric test - ui, MCL
df_ui = pd.DataFrame(data=None, columns = col_lst)
df_ui = ut.fill_df(df_ui, cl_ui_mcl, G_ui, genes, 'MCL')
       
        

### hypergeometric test - ii, MCL

df_ii = pd.DataFrame(data=None, columns = col_lst)
df_ii = ut.fill_df(df_ii, cl_ii_mcl, G_ii, genes, 'MCL')


### hypergeometric test - ui, Louvain

df_ui = ut.fill_df(df_ui, cl_ui_lou, G_ui, genes, 'Louvain')

### hypergeometric test - ii, Louvain

df_ii = ut.fill_df(df_ii, cl_ii_lou, G_ii, genes, 'Louvain')


#save

df_ui.to_csv('results/df_ui.csv')
df_ii.to_csv('results/df_ii.csv')


# get putative disease modules
#df_ui[(df_ui.n_g >= 10) & (df_ui.p_value < 0.05)]

In [None]:
size = float(len(set(partition_ii.values())))
pos = nx.spring_layout(G_ii)
count = 0.
for com in set(partition_ii.values()) :
    count = count + 1.
    list_nodes = [nodes for nodes in partition_ii.keys()
                                if partition_ii[nodes] == com]
    nx.draw_networkx_nodes(G_ii, pos, list_nodes, node_size = 20,
                                node_color = str(count / size))


nx.draw_networkx_edges(G_ii, pos, alpha=0.5)
plt.show()

    plt.figure(num=None, figsize=(15,15), dpi=50)
    nx.draw(G, nodelist= list(d.keys()), node_size=[v*100  for v in d.values()], with_labels = True, 
            pos = get_coordinates(),  font_size=18, node_color=list(nx.get_node_attributes(G,'color').values()))
    plt.savefig('results/' + filename + '_degree' + '.png')

In [None]:
size = float(len(set(partition_ii.values())))
pos = nx.spring_layout(G_ii)
count = 0.
for com in set(partition_ii.values()) :
    count = count + 1.
    list_nodes = [nodes for nodes in partition_ii.keys()
                                if partition_ii[nodes] == com]
    nx.draw_networkx_nodes(G_ii, pos, list_nodes, node_size = 20,
                                node_color = str(count / size))


nx.draw_networkx_edges(G_ii, pos, alpha=0.5)
plt.show()

    plt.figure(num=None, figsize=(15,15), dpi=50)
    nx.draw(G, nodelist= list(d.keys()), node_size=[v*100  for v in d.values()], with_labels = True, 
            pos = get_coordinates(),  font_size=18, node_color=list(nx.get_node_attributes(G,'color').values()))
    plt.savefig('results/' + filename + '_degree' + '.png')

In [None]:
mc.draw_graph(ui_mat, clusters, node_size=50, with_labels=False, edge_color="silver")

In [None]:
# https://github.com/GuyAllard/markov_clustering

# https://blog.alexlenail.me/understanding-and-implementing-the-hypergeometric-test-in-python-a7db688a7458

# https://www.biostars.org/p/66729/

# 