In [1]:
%matplotlib inline
import sys 
import os
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx
import scipy.stats as st
import pandas as pd
import itertools
import random
import seaborn as sns
import time

In [2]:
diseases = pd.read_csv('/Users/ardahalu/Research/Channing/Manlio/manlio_diseases_icd9.txt', sep=' ', index_col=False, dtype=str)
multiplex_edges = np.genfromtxt('multiplex_disease_layer-node-node-weight.edges', dtype=int)
aggregate_edges = np.genfromtxt('multiplex_disease_aggregate_node-node-weight.edges', dtype=int)
disease_genes = np.genfromtxt('manlio_genes_nos.txt', delimiter=';', dtype=str)

layer1_edges = [[multiplex_edges[i, 1], multiplex_edges[i, 2]] for i in range(len(multiplex_edges)) if multiplex_edges[i, 0]==1]
layer2_edges = [[multiplex_edges[i, 1], multiplex_edges[i, 2]] for i in range(len(multiplex_edges)) if multiplex_edges[i, 0]==2]

overlap_edges = np.array([x for x in set(tuple(x) for x in np.array(layer1_edges)) & \
                          set(tuple(x) for x in np.array(layer2_edges))])

G1 = nx.Graph()
G1.add_edges_from(layer1_edges)
G2 = nx.Graph()
G2.add_edges_from(layer2_edges)
Gagg = nx.Graph()
Gagg.add_edges_from(aggregate_edges[:, 0:2])
pos = nx.nx_agraph.graphviz_layout(Gagg)
Goverlap = nx.Graph()
Goverlap.add_edges_from(overlap_edges)

#get clusters from Manlio's data
#genotype
clusters1 = np.genfromtxt('multiplex_disease_layer1_layer-node-cluster.clu', dtype=int)
cluster1list = [clusters1[clusters1[:, 2]==i, 1] for i in range(1, 107)]
#phenotype
clusters2 = np.genfromtxt('multiplex_disease_layer2_layer-node-cluster.clu', dtype=int)
cluster2list = [clusters2[clusters2[:, 2]==i, 1] for i in range(1, 106)]
#multiplex with r = 0.45
multiClusters045 = np.genfromtxt('multiplex_disease_multimap_a0.45_layer-node-cluster.clu', dtype=int)
multiCluster045list = [multiClusters045[multiClusters045[:, 2]==i, 1] for i in range(1, 129)]
multi045_1 = multiClusters045[multiClusters045[:, 0]==1]
multi045_1 = [multi045_1[multi045_1[:, 2]==i, 1] for i in range(1, 129)]
multi045_2 = multiClusters045[multiClusters045[:, 0]==2]
multi045_2 = [multi045_2[multi045_2[:, 2]==i, 1] for i in range(1, 129)]
backbone_045 = [list(set(multi045_1[n]) & set(multi045_2[n])) for n in range(len(multi045_1))]

In [3]:
print sum([len(sorted(list(set(multiCluster045list[i])))) for i in range(128)])
clusters_sorted = np.array(sorted([sorted(list(set(multiCluster045list[i]))) for i in range(128)], key=len, reverse=True))
comorbidity_arr_clu = []
for c in clusters_sorted:
    for dis in c:
        comorbidity_arr_clu.append(dis)

994


In [4]:
clu_disease_names = []
for clu in clusters_sorted:  
    clu_disease_names.append([diseases.loc[diseases['ID']=='%s' % id]['Name'].values[0] for id in clu])

### Try Bokeh

In [5]:
from bokeh.plotting import figure, gridplot, show, output_file, output_notebook
from bokeh.sampledata.les_mis import data

from bokeh.layouts import row, widgetbox, column
from bokeh.models import CustomJS, Select, HoverTool
from bokeh.plotting import figure, show, ColumnDataSource, curdoc
from bokeh.models.widgets import Paragraph, Div

In [6]:
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

In [7]:
from sklearn.metrics.pairwise import pairwise_distances
from scipy.spatial import distance
from scipy.cluster import hierarchy

In [8]:
output_notebook()

#### Cluster by cluster similarity matrices

In [9]:
comorbidity_mat = np.load('RRmatrix.npy')
comorbidity_mat_log = np.log10(comorbidity_mat) 

mim_mat = np.load('mimmatrix.npy')

GOBP_gs2_mat = np.load('GOBP_py2matrix.npy')

geneoverlap_mat = np.load('geneoverlapmatrix.npy')

  


In [10]:
comorbidity_mat_log_clu = np.zeros((len(comorbidity_arr_clu), len(comorbidity_arr_clu)))
for i, dis1 in enumerate(comorbidity_arr_clu):
    for j, dis2 in enumerate(comorbidity_arr_clu):
        comorbidity_mat_log_clu[i, j] = comorbidity_mat_log[dis1-1, dis2-1]
        
mim_mat_clu = np.zeros((len(comorbidity_arr_clu), len(comorbidity_arr_clu)))
for i, dis1 in enumerate(comorbidity_arr_clu):
    for j, dis2 in enumerate(comorbidity_arr_clu):
        mim_mat_clu[i, j] = mim_mat[dis1-1, dis2-1]
        
GOBP_gs2_mat_clu = np.zeros((len(comorbidity_arr_clu), len(comorbidity_arr_clu)))
for i, dis1 in enumerate(comorbidity_arr_clu):
    for j, dis2 in enumerate(comorbidity_arr_clu):
        GOBP_gs2_mat_clu[i, j] = GOBP_gs2_mat[dis1-1, dis2-1]
        
geneoverlap_mat_clu = np.zeros((len(comorbidity_arr_clu), len(comorbidity_arr_clu)))
for i, dis1 in enumerate(comorbidity_arr_clu):
    for j, dis2 in enumerate(comorbidity_arr_clu):
        geneoverlap_mat_clu[i, j] = geneoverlap_mat[dis1-1, dis2-1]

In [11]:
cluidx = np.insert(np.cumsum([len(clu) for clu in clu_disease_names]), 0, 0)

In [12]:
def exclude_nan_rows_from_df(df):
    nan_count = [1]
    while sum(nan_count) > 0:
        nan_count = []
        for row in df.index:
            nan_count.append(len(df.loc[row][pd.isna(df.loc[row])]))
        nan_excl = df.index[np.argmax(np.array(nan_count))]
        df = df.drop(index=nan_excl, columns=nan_excl)
    return df

In [13]:
def df_hierarchical_cluster(df, distance_metric, linkage_type):

    X = pairwise_distances(df.values, metric=distance_metric)
    Z = hierarchy.linkage(X, linkage_type)
    results = hierarchy.dendrogram(Z, no_plot=True)
    icoord, dcoord = results['icoord'], results['dcoord']
    dendro_order = list(map(int, results['ivl']))
    df = df.iloc[dendro_order]
    df = df[df.columns[dendro_order]]
    
    return df, icoord, dcoord

In [14]:
def calculate_bokeh_similarity_df_hierarchical(n):

    # get similarity values for the nth cluster, convert to dataframe
    char_lim = 50
    color_list = ['#66c2a5', '#fc8d62', '#8da0cb', '#e78ac3', '#d3ecaa', '#ffd92f', '#e4c393', '#b3b3b3']
    
    
    labels = [x[0:char_lim] for x in clu_disease_names[n]]
    comorbidity_mat = comorbidity_mat_log_clu[cluidx[n]:cluidx[n+1], cluidx[n]:cluidx[n+1]]
    mimminer_mat = mim_mat_clu[cluidx[n]:cluidx[n+1], cluidx[n]:cluidx[n+1]]
    GOBP_mat = GOBP_gs2_mat_clu[cluidx[n]:cluidx[n+1], cluidx[n]:cluidx[n+1]]
    geneoverlap_mat = geneoverlap_mat_clu[cluidx[n]:cluidx[n+1], cluidx[n]:cluidx[n+1]]
        
    comorbidity_clu_df = pd.DataFrame(comorbidity_mat, index=labels, columns=labels)
    mimminer_clu_df = pd.DataFrame(mimminer_mat, index=labels, columns=labels)
    GOBP_clu_df = pd.DataFrame(GOBP_mat, index=labels, columns=labels)
    geneoverlap_clu_df = pd.DataFrame(geneoverlap_mat, index=labels, columns=labels)    

    
    # clean up rows with missing values
    comorbidity_clu_df = comorbidity_clu_df.replace([np.inf, -np.inf], np.nan)
    mimminer_clu_df = mimminer_clu_df.replace([np.inf, -np.inf], np.nan)  
    GOBP_clu_df = GOBP_clu_df.replace([np.inf, -np.inf], np.nan)  
    geneoverlap_clu_df = geneoverlap_clu_df.replace([np.inf, -np.inf], np.nan)  
    
    comorbidity_clu_df = exclude_nan_rows_from_df(comorbidity_clu_df)
    mimminer_clu_df = exclude_nan_rows_from_df(mimminer_clu_df) 
    GOBP_clu_df = exclude_nan_rows_from_df(GOBP_clu_df) 
    geneoverlap_clu_df = exclude_nan_rows_from_df(geneoverlap_clu_df) 

    
    # calculate hierarchically clustered similarity dataframes and their dendrograms
    comorbidity_clu_df, comorbidity_icoord, comorbidity_dcoord = df_hierarchical_cluster(comorbidity_clu_df, 
                                                                                          'euclidean', 'ward')
    mimminer_clu_df, mimminer_icoord, mimminer_dcoord = df_hierarchical_cluster(mimminer_clu_df, 'euclidean', 'ward')
    GOBP_clu_df, GOBP_icoord, GOBP_dcoord = df_hierarchical_cluster(GOBP_clu_df, 'euclidean', 'ward')
    geneoverlap_clu_df, geneoverlap_icoord, geneoverlap_dcoord = df_hierarchical_cluster(geneoverlap_clu_df, 
                                                                                          'euclidean', 'ward')

    
    # make Bokeh dataframes with the missing value rows and columns removed and hierarchically clustered
    comorbidity_labels_excl = comorbidity_clu_df.index.values
    comorbidity_bokeh_df = pd.DataFrame({
    'xname': list(np.array([[name] * len(comorbidity_labels_excl) for name in comorbidity_labels_excl]).flatten()),
    'yname': list(np.array([comorbidity_labels_excl] * len(comorbidity_labels_excl)).flatten()),
    'comorbidity': comorbidity_clu_df.values.flatten()
    })
        
    mimminer_labels_excl = mimminer_clu_df.index.values
    mimminer_bokeh_df = pd.DataFrame({
    'xname': list(np.array([[name] * len(mimminer_labels_excl) for name in mimminer_labels_excl]).flatten()),
    'yname': list(np.array([mimminer_labels_excl] * len(mimminer_labels_excl)).flatten()),
    'mimminer': mimminer_clu_df.values.flatten(),
    })
    
    GOBP_labels_excl = GOBP_clu_df.index.values
    GOBP_bokeh_df = pd.DataFrame({
    'xname': list(np.array([[name] * len(GOBP_labels_excl) for name in GOBP_labels_excl]).flatten()),
    'yname': list(np.array([GOBP_labels_excl] * len(GOBP_labels_excl)).flatten()),
    'GOBP': GOBP_clu_df.values.flatten(),
    })
    
    geneoverlap_labels_excl = geneoverlap_clu_df.index.values
    geneoverlap_bokeh_df = pd.DataFrame({
    'xname': list(np.array([[name] * len(geneoverlap_labels_excl) for name in geneoverlap_labels_excl]).flatten()),
    'yname': list(np.array([geneoverlap_labels_excl] * len(geneoverlap_labels_excl)).flatten()),
    'geneoverlap': geneoverlap_clu_df.values.flatten(),
    })

    
    # set alpha levels: scale similarity measures between 0.0 and 1.0
    min_alpha = 0.0
    max_alpha = 1.0

    comorbidity_alpha_array = []
    # max and min logRR values of the significant entries
    max_comorbidity = max(comorbidity_bokeh_df['comorbidity'])
    min_comorbidity = min(comorbidity_bokeh_df['comorbidity'])
    scaling_factor = ((max_alpha - min_alpha) / (max_comorbidity - min_comorbidity))
    for val in comorbidity_bokeh_df['comorbidity']:
        comorbidity_alpha_array.append(min_alpha + (val - min_comorbidity) * scaling_factor)

    mimminer_alpha_array = []
    # max and min MimMiner values of the significant entries
    max_mimminer = max(mimminer_bokeh_df['mimminer'])
    min_mimminer = min(mimminer_bokeh_df['mimminer'])
    scaling_factor = ((max_alpha - min_alpha) / (max_mimminer - min_mimminer))
    for val in mimminer_bokeh_df['mimminer']:
        mimminer_alpha_array.append(min_alpha + (val - min_mimminer) * scaling_factor)

    GOBP_alpha_array = []
    # max and min GOBP_GS2 values of the significant entries
    max_GOBP = max(GOBP_bokeh_df['GOBP'])
    min_GOBP = min(GOBP_bokeh_df['GOBP'])
    scaling_factor = ((max_alpha - min_alpha) / (max_GOBP - min_GOBP))
    for val in GOBP_bokeh_df['GOBP']:
        GOBP_alpha_array.append(min_alpha + (val - min_GOBP) * scaling_factor)

    geneoverlap_alpha_array = []
    # max and min gene overlap (Jaccard) values of the significant entries
    max_geneoverlap = max(geneoverlap_bokeh_df['geneoverlap'])
    min_geneoverlap = min(geneoverlap_bokeh_df['geneoverlap'])
    scaling_factor = ((max_alpha - min_alpha) / (max_geneoverlap - min_geneoverlap))
    for val in geneoverlap_bokeh_df['geneoverlap']:
        geneoverlap_alpha_array.append(min_alpha + (val - min_geneoverlap) * scaling_factor)
            
    
    # set values for 'alpha' and 'colors' columns
    comorbidity_bokeh_df['comorbidity_alpha'] = comorbidity_alpha_array
    mimminer_bokeh_df['mimminer_alpha'] = mimminer_alpha_array
    GOBP_bokeh_df['GOBP_alpha'] = GOBP_alpha_array
    geneoverlap_bokeh_df['geneoverlap_alpha'] = geneoverlap_alpha_array
    
    comorbidity_bokeh_df['colors'] = [color_list[n % len(color_list)]]*len(comorbidity_alpha_array)    
    mimminer_bokeh_df['colors'] = [color_list[n % len(color_list)]]*len(mimminer_alpha_array)   
    GOBP_bokeh_df['colors'] = [color_list[n % len(color_list)]]*len(GOBP_alpha_array)
    geneoverlap_bokeh_df['colors'] = [color_list[n % len(color_list)]]*len(geneoverlap_alpha_array)   
    
    
    bokeh_df = {'comorbidity_df': comorbidity_bokeh_df, 'mimminer_df': mimminer_bokeh_df,
                          'GOBP_df': GOBP_bokeh_df, 'geneoverlap_df': geneoverlap_bokeh_df}
    
    return bokeh_df  

In [15]:
bokeh_df_dict = {}
for n_clu in np.arange(0, 30):
    bokeh_df_dict['Community %s' % n_clu] = calculate_bokeh_similarity_df_hierarchical(n_clu)  

np.save('/Users/ardahalu/Desktop/Similarity_df_dict_top30_hierarchical.npy', bokeh_df_dict)

  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


In [16]:
def modify_doc_hierarchical(doc):
    
    def update_df(clu, bokeh_df_dict):

        return bokeh_df_dict[clu]

    def callback(attr, old, new):

        new_bokeh_df = update_df(select.value, bokeh_df_dict) # get drowpdown menu value and put instead of "n_clu"
        source1.data = ColumnDataSource(new_bokeh_df['comorbidity_df']).data
        source2.data = ColumnDataSource(new_bokeh_df['mimminer_df']).data  
        source3.data = ColumnDataSource(new_bokeh_df['GOBP_df']).data  
        source4.data = ColumnDataSource(new_bokeh_df['geneoverlap_df']).data  
        
        new_len_labels1 = int(len(new_bokeh_df['comorbidity_df'])**0.5)  
        p1.x_range.factors = list(source1.data['yname'][0:new_len_labels1])
        p1.y_range.factors = list(source1.data['yname'][0:new_len_labels1])
        new_len_labels2 = int(len(new_bokeh_df['mimminer_df'])**0.5)  
        p2.x_range.factors = list(source2.data['yname'][0:new_len_labels2])
        p2.y_range.factors = list(source2.data['yname'][0:new_len_labels2])
        new_len_labels3 = int(len(new_bokeh_df['GOBP_df'])**0.5)  
        p3.x_range.factors = list(source3.data['yname'][0:new_len_labels3])
        p3.y_range.factors = list(source3.data['yname'][0:new_len_labels3])
        new_len_labels4 = int(len(new_bokeh_df['geneoverlap_df'])**0.5)  
        p4.x_range.factors = list(source4.data['yname'][0:new_len_labels4])
        p4.y_range.factors = list(source4.data['yname'][0:new_len_labels4])    
        text.text = '</br>'.join(sorted(list(set(source1.data['yname']) | set(source2.data['yname']) |
                                            set(source3.data['yname']) | set(source4.data['yname'])), 
                                        key=lambda s: s.lower())).replace('_', ' ')
      
    #----------------------------------------------------------------------------------------------------------------

    #bokeh_df_dict = {}
    #for n_clu in np.arange(0, 10):
    #    bokeh_df_dict['Community %s' % n_clu] = calculate_bokeh_similarity_df_hierarchical(n_clu)        
        
    bokeh_df_dict = np.load('/Users/ardahalu/Desktop/Similarity_df_dict_top30_hierarchical.npy').item()   
    init_comm = 'Community 8'
    
    #----------------------------------------------------------------------------------------------------------------
    
    source1 = ColumnDataSource(bokeh_df_dict[init_comm]['comorbidity_df'])
    len_labels1 = int(len(bokeh_df_dict[init_comm]['comorbidity_df'])**0.5)
    labels1 = list(source1.data['yname'][0:len_labels1])    
    
    p1 = figure(title="RR Comorbidity", x_axis_location="above", tools="hover,save,wheel_zoom,pan",
               x_range=labels1, y_range=labels1)

    p1.grid.grid_line_color = None
    p1.axis.axis_line_color = None
    p1.axis.major_tick_line_color = None
    p1.axis.major_label_text_font_size = "6pt"
    p1.axis.major_label_standoff = 0
    p1.xaxis.major_label_orientation = np.pi/2

    p1.rect('xname', 'yname', 0.9, 0.9, source=source1, color='colors', alpha='comorbidity_alpha', line_color=None,
           hover_line_color='black', hover_color='colors')

    hover = HoverTool(tooltips=[("Disease 1", "@xname"), ("Disease 2", "@yname"), 
                                ("Comorbidity (log RR)", "@comorbidity")])
    p1.add_tools(hover)
    #----------------------------------------------------------------------------------------------------------------    

    source2 = ColumnDataSource(bokeh_df_dict[init_comm]['mimminer_df'])
    len_labels2 = int(len(bokeh_df_dict[init_comm]['mimminer_df'])**0.5)
    labels2 = list(source2.data['yname'][0:len_labels2])   
        
    p2 = figure(title="MimMiner", x_axis_location="above", tools="hover,save,wheel_zoom,pan",
               x_range=labels2, y_range=labels2)

    p2.grid.grid_line_color = None
    p2.axis.axis_line_color = None
    p2.axis.major_tick_line_color = None
    p2.axis.major_label_text_font_size = "6pt"
    p2.axis.major_label_standoff = 0
    p2.xaxis.major_label_orientation = np.pi/2

    p2.rect('xname', 'yname', 0.9, 0.9, source=source2, color='colors', alpha='mimminer_alpha', line_color=None,
           hover_line_color='black', hover_color='colors')

    hover = HoverTool(tooltips=[("Disease 1", "@xname"), ("Disease 2", "@yname"), 
                                ("MiMMiner", "@mimminer")])
    p2.add_tools(hover) 
    #----------------------------------------------------------------------------------------------------------------     
    
    source3 = ColumnDataSource(bokeh_df_dict[init_comm]['GOBP_df'])
    len_labels3 = int(len(bokeh_df_dict[init_comm]['GOBP_df'])**0.5)
    labels3 = list(source3.data['yname'][0:len_labels3])   

    p3 = figure(title="GO:BP", x_axis_location="above", tools="hover,save,wheel_zoom,pan",
               x_range=labels3, y_range=labels3)
    
    p3.grid.grid_line_color = None
    p3.axis.axis_line_color = None
    p3.axis.major_tick_line_color = None
    p3.axis.major_label_text_font_size = "6pt"
    p3.axis.major_label_standoff = 0
    p3.xaxis.major_label_orientation = np.pi/2

    p3.rect('xname', 'yname', 0.9, 0.9, source=source3, color='colors', alpha='GOBP_alpha', line_color=None,
           hover_line_color='black', hover_color='colors')
    
    hover = HoverTool(tooltips=[("Disease 1", "@xname"), ("Disease 2", "@yname"), 
                                ("GO:BP Similarity (GS2)", "@GOBP")])
    p3.add_tools(hover)
    #----------------------------------------------------------------------------------------------------------------

    source4 = ColumnDataSource(bokeh_df_dict[init_comm]['geneoverlap_df'])
    len_labels4 = int(len(bokeh_df_dict[init_comm]['geneoverlap_df'])**0.5)
    labels4 = list(source4.data['yname'][0:len_labels4])   
    
    p4 = figure(title="Gene overlap", x_axis_location="above", tools="hover,save,wheel_zoom,pan",
               x_range=labels4, y_range=labels4)

    p4.grid.grid_line_color = None
    p4.axis.axis_line_color = None
    p4.axis.major_tick_line_color = None
    p4.axis.major_label_text_font_size = "6pt"
    p4.axis.major_label_standoff = 0
    p4.xaxis.major_label_orientation = np.pi/2

    p4.rect('xname', 'yname', 0.9, 0.9, source=source4, color='colors', alpha='geneoverlap_alpha', line_color=None,
           hover_line_color='black', hover_color='colors')

    hover = HoverTool(tooltips=[("Disease 1", "@xname"), ("Disease 2", "@yname"), 
                                ("Gene Overlap (Jaccard)", "@geneoverlap")])
    p4.add_tools(hover)
    #-----------------------------------------------------------------------------------------------------------------


    p = gridplot([[p3, p1], [p4, p2]], sizing_mode='fixed', plot_width=400, plot_height=400)

    select = Select(title="Select a community:", value=init_comm, 
                    options=sorted(bokeh_df_dict.keys()))
    select.on_change('value', callback)


    div = Div(text='<b>List of diseases:</b>', width=200, height=10)
    text = Div(text='</br>'.join(sorted(list(set(source1.data['yname']) | set(source2.data['yname']) |
                                            set(source3.data['yname']) | set(source4.data['yname'])), 
                                        key=lambda s: s.lower())).replace('_', ' '),
                                        width=250, height=400)  
       
    layout = row(column(widgetbox(select), div, text), p)
    doc.add_root(layout)
    
# Set up an application
handler = FunctionHandler(modify_doc_hierarchical)
app = Application(handler)   

In [17]:
show(modify_doc_hierarchical)