-----
# cartoGRAPHs - FIGURE 2B Suppl.
precalculated large networks , e.g. the Interactome

A Notebook to produce 2D and 3D network layouts from any Graph,
including interactive visualization (html files) and export functions 
to import into the VRNetzer analytics platform by Pirch et al.

-----

In [1]:
from cartoGRAPHs import * 

from func_load_data import *
from func_visual_properties import * 
from func_calculations import * 
from func_embed_plot import *
from func_exportVR import * 

2021-10-15 00:15:03.887686: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-10-15 00:15:03.888067: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


_____ 
# 1 | DATA PREPARATION 
_____


## HUMAN NETWORK


In [2]:
organism = 'human'

G = load_graph(organism) 

d_gene_sym = load_genesymbols(G, organism)
l_features = list(d_gene_sym.values())

d_gene_do = pickle.load( open( "input/d_gene_do.pkl", "rb" ) )
d_do_genes = pickle.load( open( "input/d_do_genes.pkl", "rb" ) )
d_do_names = pickle.load( open( "input/DO_names.pkl", "rb" ) )
d_names_do = {y:x for x,y in d_do_names.items()}

In [3]:
d_centralities = load_centralities(G, organism)
df_centralities = pd.DataFrame(d_centralities).T
df_centralities.columns = ['degree','closeness', 'betweeness', 'eigenvector']
essential_genes,non_ess_genes,notdefined_genes = load_essentiality(G, organism)

In [4]:
# get Layout basis --> global layout matrix 
DF_structural = load_datamatrix(G,organism, 'global')
df_max = DF_structural.max()
l_max_visprob = max(list(df_max.values))


# get Disease Feature Matrix
FM_disease = pd.read_csv('input/Features_Disease_Dataframe_'+organism+'.csv', index_col=0)

# VECTOR : NF1 GENE + DISEASE MODULE (8712 - neurofibromatosis)
e.g.
+ DOID:8712 / Neurofibromatosis
+ DOID:962 / Neurofibroma
+ DOID:0050736 / Autosomal Dominant Disease

In [339]:
path_nf = 'input/neurofibromatosis/'

# gene of interest
nf_goi_ = str(int(np.loadtxt(path_nf+'nf_gene_of_interest.txt', unpack=False)))
nf_goi = list(nf_goi_.split(" "))
nf_goi_str = ' '.join([str(item) for item in nf_goi])

nf_neighbors = []
for edge in G.edges():
    if edge[0] == nf_goi_str:
        nf_neighbors.append(edge[1])
    elif edge[1] == nf_goi_str: 
        nf_neighbors.append(edge[0])

nf_all = nf_neighbors + nf_goi

# get disease module genes 
num_doid = '8712' 

# define specific DOID 
dismod = FM_disease.loc[:,'DOID:'+num_doid]
dismod.index = FM_disease.index
dismod_id = 'DOID'+num_doid

dismod_genes = [] 
for i,v in enumerate(dismod.values):
    if v == 1:
        dismod_genes.append(dismod.index[i])
    else:
        pass

l_genes_nf = [str(i) for i in dismod_genes]
print('Number of Genes associated to DOID', num_doid,':',len(l_genes_nf))
l_genes_neighbors = [str(i) for i in nf_neighbors]
print('Number of NF1 neighbor genes', num_doid,':',len(l_genes_neighbors))

# create a dataframe (sorted by DF global) and fill 1 when disease-assoc. gene
DF_dismod = pd.DataFrame(dismod.reindex(DF_structural.index.copy() ,fill_value=0))
for i in DF_dismod.index:
    if i in l_genes_nf:
        DF_dismod.loc[i] = 1
    
DF_dismod.columns = ['Neurofib 8712']

Number of Genes associated to DOID 8712 : 12
Number of NF1 neighbor genes 8712 : 49


## VECTOR : NF1 neighbors

In [345]:
DF_dismod_neigh = pd.DataFrame(dismod.reindex(DF_structural.index.copy() ,fill_value=0))
for i in DF_dismod_neigh.index:
    if i in l_genes_neighbors:
        DF_dismod_neigh.loc[i] = 1
    
DF_dismod_neigh.columns = ['NF neighbours']

# VECTORS : Disease Context 
+ DOID:3165 - Skin benign neoplasm
+ DOID:10534 - Stomach Cancer
+ DOID:3620 - Central Nervous System Cancer
+ DOID:0060089 - Endocrine organ benign neoplasm
+ resources: https://disease-ontology.org/ https://en.wikipedia.org/wiki/Neurofibromatosis

### First Disease Vector | DOID:3165 - Skin benign neoplasm

In [307]:
# -----------------------------
num_doid_1 = '3165'
# -----------------------------

# define specific DOID 
dismod_1 = FM_disease.loc[:,'DOID:'+num_doid_1]
dismod_1.index = FM_disease.index

# get all genes associated to disease
dismod_genes_1 = [] 
for i,v in enumerate(dismod_1.values):
    if v == 1:
        dismod_genes_1.append(dismod_1.index[i])
    else:
        pass

l_genes_dismod_1 = [str(i) for i in dismod_genes_1]
#print('Number of Genes associated to DOID', num_doid_1,':',len(l_genes_dismod_1))

overlap_1 = []
for i in dismod_genes_1:
    if str(i) in nf_all or int(i) in nf_all:
    #if str(i) in dismod_genes or int(i) in dismod_genes:
        overlap_1.append(str(i))
#print('Number of Genes overlap of NF1 neighbors and Disease Module:', (len(overlap_1)))

l_genes_dismod_1_with = l_genes_dismod_1 + overlap_1 # + nf_neighbors[:10]
#print('Number of Genes associated to DOID', num_doid_1,':',len(l_genes_dismod_1_with))

# create a dataframe (sorted by DF global) and fill 1 when disease-assoc. gene
DF_dismod_1 = pd.DataFrame(dismod_1.reindex(DF_structural.index ,fill_value=0))
for i in DF_dismod_1.index:
    if i in l_genes_dismod_1: #_with:
        DF_dismod_1.loc[i] = 1

### Second Disease Vector | DOID:10534 - Stomach Cancer

In [308]:
# -----------------------------
num_doid_2 = '10534' #'3883' #'1612' #'3490'
# -----------------------------

# define specific DOID 
dismod_2 = FM_disease.loc[:,'DOID:'+num_doid_2]
dismod_2.index = FM_disease.index

# get all genes associated to disease
dismod_genes_2 = [] 
for i,v in enumerate(dismod_2.values):
    if v == 1:
        dismod_genes_2.append(dismod_2.index[i])
    else:
        pass
    
l_genes_dismod_2 = [str(i) for i in dismod_genes_2]
#print('Number of Genes associated to DOID', num_doid_2,':',len(l_genes_dismod_2)) 

overlap_2 = []
for i in dismod_genes_2:
    if str(i) in nf_all or int(i) in nf_all:
    #if str(i) in dismod_genes or int(i) in dismod_genes:
        overlap_2.append(str(i))
#print('Number of Genes overlap of NF1 neighbors and Disease Module:', (len(overlap_2)))
#print(overlap_2) 

l_genes_dismod_2_with = l_genes_dismod_2 + overlap_2 # + nf_neighbors[15:25]
#print('Number of Genes associated to DOID', num_doid_2,':',len(l_genes_dismod_2_with))

# create a dataframe (sorted by DF global) and fill 1 when disease-assoc. gene
DF_dismod_2 = pd.DataFrame(dismod_2.reindex(DF_structural.index ,fill_value=0))
for i in DF_dismod_2.index:
    if i in l_genes_dismod_2_with:
        DF_dismod_2.loc[i] = 1

### Third Disease Vector | DOID:3620 - Central Nervous System Cancer

In [309]:
# -----------------------------
num_doid_3 = '3620' #'0060115' 
# -----------------------------

# define specific DOID 
dismod_3 = FM_disease.loc[:,'DOID:'+num_doid_3]
dismod_3.index = FM_disease.index

dismod_genes_3 = [] 
for i,v in enumerate(dismod_3.values):
    if v == 1:
        dismod_genes_3.append(dismod_3.index[i])
    else:
        pass

l_genes_dismod_3 = [str(i) for i in dismod_genes_3]
#print('Number of Genes associated to DOID', num_doid_3,':',len(l_genes_dismod_3))

overlap_3 = []
for i in dismod_genes_3:
    if str(i) in nf_all or int(i) in nf_all:
    #if str(i) in dismod_genes or int(i) in dismod_genes:
        overlap_3.append(str(i))
#print('Number of Genes overlap of NF1 neighbors and Disease Module:', (len(overlap_3)))

l_genes_dismod_3_with = l_genes_dismod_3 + overlap_3 # + nf_neighbors[30:40]
#print('Number of Genes associated to DOID', num_doid_3,':',len(l_genes_dismod_3_with))

# create a dataframe (sorted by DF global) and fill 1 when disease-assoc. gene
DF_dismod_3 = pd.DataFrame(dismod_3.reindex(DF_structural.index ,fill_value=0))
for i in DF_dismod_3.index:
    if i in l_genes_dismod_3: #_with:
        DF_dismod_3.loc[i] = 1

### Fourth Disease Vector | DOID:0060089 - Endocrine Organ benign neoplasm

In [310]:
# -----------------------------
num_doid_4 = '0060089'
# -----------------------------

# define specific DOID 
dismod_4 = FM_disease.loc[:,'DOID:'+num_doid_4]
dismod_4.index = FM_disease.index
dismod_id_4 = 'DOID'+num_doid_4

dismod_genes_4 = [] 
for i,v in enumerate(dismod_4.values):
    if v == 1:
        dismod_genes_4.append(dismod_4.index[i])
    else:
        pass
    
l_genes_dismod_4 = [str(i) for i in dismod_genes_4]
print('Number of Genes associated to DOID', num_doid_4,':',len(l_genes_dismod_4))
                                                               
overlap_4 = []
for i in dismod_genes_4:
    if str(i) in nf_all or int(i) in nf_all:
    #if str(i) in dismod_genes or int(i) in dismod_genes:
        overlap_4.append(str(i))
#print('Number of Genes overlap of NF1 neighbors and Disease Module:', (len(overlap_4)))
      
l_genes_dismod_4_with = l_genes_dismod_4 + overlap_4 # + nf_neighbors[40:]
#print('Number of Genes associated to DOID', num_doid_4,':',len(l_genes_dismod_4_with))

# create a dataframe (sorted by DF global) and fill 1 when disease-assoc. gene
DF_dismod_4 = pd.DataFrame(dismod_4.reindex(DF_structural.index ,fill_value=0))
for i in DF_dismod_4.index:
    if i in l_genes_dismod_4: #_with:
        DF_dismod_4.loc[i] = 1

Number of Genes associated to DOID 0060089 : 122


# ALL DISEASE VECTORS / MATRIX PREP 

In [369]:
# --------------------------------------
#
scalar_val = 3 #2
#
# -------------------------------------- 
enhance_factor_discontext = round((1-l_max_visprob)*scalar_val,10)
print('ENHANCING FACTOR Disease Context : ', enhance_factor_discontext)

ENHANCING FACTOR Disease Context :  0.281271


In [370]:
# multiply with enhancing factor 

# Vector of NF1 Neurofibromatosis
DF_dismod_scal = DF_dismod * enhance_factor_discontext

# Vector of NF1 neighbor genes 
DF_dismod_neighbors_scal = DF_dismod_neigh * enhance_factor_discontext 

DF_dismod_1_scal = DF_dismod_1 * enhance_factor_discontext
DF_dismod_2_scal = DF_dismod_2 * enhance_factor_discontext
DF_dismod_3_scal = DF_dismod_3 * enhance_factor_discontext
DF_dismod_4_scal = DF_dismod_4 * enhance_factor_discontext

# merge into one dataframe 
DF_merge_discontext = pd.concat([
                                DF_structural, # original global matrix 
                                    
                                #DF_dismod_scal,  # Disease Module Vector 
                                #DF_dismod_neighbors_scal, # NF1 Neighbor Genes Vector 
                        
                                DF_dismod_1_scal, # Disease Context Vectors 1-4
                                DF_dismod_2_scal,
                                DF_dismod_3_scal,
                                DF_dismod_4_scal,
    
                                ],
                                axis=1
                                )

DF_merge_discontext

Unnamed: 0,66008,8473,2561,3759,22906,4928,1994,8481,81610,51361,...,51458,143903,10861,51471,221044,29965,DOID:3165,DOID:10534,DOID:3620,DOID:0060089
66008,0.900242,0.000687,0.012865,0.006433,0.005347,0.000460,0.000051,0.000345,0.003464,0.002905,...,0.0,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0
8473,0.002519,0.900245,0.000036,0.000018,0.005316,0.000005,0.000002,0.000003,0.000012,0.000008,...,0.0,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0
2561,0.002502,0.000002,0.900367,0.000018,0.000015,0.000002,0.000000,0.000001,0.000010,0.000008,...,0.0,0.000001,0.000001,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0
3759,0.002502,0.000002,0.000036,0.900389,0.000015,0.000002,0.000000,0.000001,0.000010,0.000008,...,0.0,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0
22906,0.002525,0.000685,0.000036,0.000018,0.900120,0.000459,0.000000,0.000001,0.000010,0.000009,...,0.0,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143903,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.900070,0.000070,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0
10861,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000070,0.900070,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0
51471,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.900052,0.00000,0.000000,0.0,0.0,0.0,0.0
221044,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.90003,0.000000,0.0,0.0,0.0,0.0


_____ 
# 2 | SPATIAL EMBEDDING
_____

### 2D PORTRAIT POSITIONING 

In [371]:
n_n = 10 #6 #20
spr = 1.0
md = 0.0 #.4
metr = 'cosine'

In [None]:
%%time 
posG_umap2D = layout_portrait_umap(G,DF_merge_discontext, 2, 
                                   n_neighbors = n_n, 
                                   spread = spr, 
                                   min_dist = md, 
                                   metric = metr) 
posG = posG_umap2D

____________
# 3 | VISUAL SETTINGS + PLOT PREPARATION
____________

#### GENERAL NODE + EDGE COLORS 

In [None]:
opacity_nodes = 0.8
node_edge_col = '#696969' 
node_linewidth = 2

scale_factor = 0.55
size_plotly = list(draw_node_degree(G, scale_factor/100).values())

### NF SPECIFIC

In [None]:
# Neurofibromatosis DOID8712 + NF1 neighbor genes 

node_col_nf = '#00D9FC' #'#008792' #'#00E3DF' #'#FF8E00' 
dismod_all = l_genes_dismod_1 + l_genes_dismod_2 + l_genes_dismod_3 + l_genes_dismod_4 

center_nf_sym = 'NF1' 

# get nodeID of gene symbol 
for ix, sym in d_gene_sym.items():
    if sym == center_nf_sym:
        center_nf_id = ix
        
# get coordinates from nodeID
for node, coords in posG.items():
    if node == center_nf_id:
        center_nf_coords = coords

# make shapely point from defined center of cluster 
point_nf = geometry.Point(center_nf_coords[0], center_nf_coords[1])

# create your circle buffer from one of the points
distance = 0.25
circle_buffer= point_nf.buffer(distance)

# get list of genes that are within radius around the center 
l_genes_circlebuffer_nf = []
for node,coords in posG.items():
    point = geometry.Point(coords[0], coords[1])
    if point.within(circle_buffer) and node in list(l_genes_nf+nf_neighbors): 
        l_genes_circlebuffer_nf.append(node)
print('Number of genes within radius around Center - NF:' ,len(l_genes_circlebuffer_nf))

# color edges within circle buffer     
d_edge_col_circlebuffer_nf = color_edges_from_nodelist_specific(G, l_genes_circlebuffer_nf, node_col_nf)
print('Number of links in circle buffer - NF:', len(d_edge_col_circlebuffer_nf))

d_edge_col_nf = color_edges_from_nodelist_specific(G, (l_genes_nf+nf_neighbors), node_col_nf)

### DISEASE SPECIFIC

In [None]:
#
# DISEASE 1 | ORANGE
#

# color nodes
node_col_1 = '#FF7001' # (orange) #'#B00160' 

center_dis_1_sym = 'PTEN' 

# get nodeID of gene symbol 
for ix, sym in d_gene_sym.items():
    if sym == center_dis_1_sym:
        center_dis_1_id = ix

# get coordinates from nodeID
for node, coords in posG.items():
    if node == center_dis_1_id:
        center_dis_1_coords = coords

# make shapely point from defined center of cluster 
point_1 = geometry.Point(center_dis_1_coords[0], center_dis_1_coords[1])

# create your circle buffer from one of the points
distance = 0.08
circle_buffer = point_1.buffer(distance)

# get list of genes that are within radius around the center 
l_genes_circlebuffer_1 = []
for node,coords in posG.items():
    point = geometry.Point(coords[0], coords[1])
    if point.within(circle_buffer) and node in l_genes_dismod_1: 
        l_genes_circlebuffer_1.append(node)
        
print('Number of genes within radius around Center - Disease 1:' ,len(l_genes_circlebuffer_1))

# get edges in between nodes within radius around center 

l_links_circlebuffer_1 = []
for edge in G.edges():    
    if edge[0] in l_genes_circlebuffer_1 and edge[1] in l_genes_circlebuffer_1 and edge[0] in l_genes_dismod_1 and edge[1] in l_genes_dismod_1:
        l_links_circlebuffer_1.append(edge)
#print('Number of links in circle buffer - Disease 1:', len(l_links_circlebuffer_1))

# color edges within circle buffer     
d_edge_col_circlebuffer_1 = color_edges_from_nodelist_specific(G, l_genes_circlebuffer_1, node_col_1)
print('Number of links in circle buffer - Disease 1:', len(d_edge_col_circlebuffer_1))

# color all dismod edges    
d_edge_col_1 = color_edges_from_nodelist_specific(G, l_genes_dismod_1_with, node_col_1)

In [None]:
#
# DISEASE 2 | YELLOW
#

# color nodes
node_col_2 = '#FFC433' #(yellow)

center_dis_2_sym = 'TBX3' 
# get nodeID of gene symbol 
for ix, sym in d_gene_sym.items():
    if sym == center_dis_2_sym:
        center_dis_2_id = ix

# get coordinates from nodeID
for node, coords in posG.items():
    if node == center_dis_2_id:
        center_dis_2_coords = coords

# make shapely point from defined center of cluster 
point_2 = geometry.Point(center_dis_2_coords[0], center_dis_2_coords[1])

# create your circle buffer from one of the points
distance = 0.05
circle_buffer = point_2.buffer(distance)

# get list of genes that are within radius around the center 
l_genes_circlebuffer_2 = []
for node,coords in posG.items():
    point = geometry.Point(coords[0], coords[1])
    if point.within(circle_buffer) and node in l_genes_dismod_2: 
        l_genes_circlebuffer_2.append(node)
        
print('Number of genes within radius around Center - Disease 2:' ,len(l_genes_circlebuffer_2))

# get edges in between nodes within radius around center 

l_links_circlebuffer_2 = []
for edge in G.edges():    
    if edge[0] in l_genes_circlebuffer_2 and edge[1] in l_genes_circlebuffer_2 and edge[0] in l_genes_dismod_2 and edge[1] in l_genes_dismod_2:
        l_links_circlebuffer_2.append(edge)
#print('Number of links in circle buffer - Disease 2:', len(l_links_circlebuffer_2))

# color edges within circle buffer     
d_edge_col_circlebuffer_2 = color_edges_from_nodelist_specific(G, l_genes_circlebuffer_2, node_col_2)
print('Number of links in circle buffer - Disease 2:', len(d_edge_col_circlebuffer_2))

# color all dismod edges    
d_edge_col_2 = color_edges_from_nodelist_specific(G, l_genes_dismod_2_with, node_col_2)

In [None]:
#
# DISEASE 3 | GREEN
#


# color nodes
node_col_3 = '#BEDC00' # (yellowgreen)

center_dis_3_sym = 'MTR' 
# get nodeID of gene symbol 
for ix, sym in d_gene_sym.items():
    if sym == center_dis_3_sym:
        center_dis_3_id = ix

# get coordinates from nodeID
for node, coords in posG.items():
    if node == center_dis_3_id:
        center_dis_3_coords = coords

# make shapely point from defined center of cluster 
point_3 = geometry.Point(center_dis_3_coords[0], center_dis_3_coords[1])

# create your circle buffer from one of the points
distance = 0.08
circle_buffer = point_3.buffer(distance)

# get list of genes that are within radius around the center 
l_genes_circlebuffer_3 = []
for node,coords in posG.items():
    point = geometry.Point(coords[0], coords[1])
    if point.within(circle_buffer) and node in l_genes_dismod_3: 
        l_genes_circlebuffer_3.append(node)
        
print('Number of genes within radius around Center - Disease 3:' ,len(l_genes_circlebuffer_3))

# get edges in between nodes within radius around center 

l_links_circlebuffer_3 = []
for edge in G.edges():    
    if edge[0] in l_genes_circlebuffer_3 and edge[1] in l_genes_circlebuffer_3 and edge[0] in l_genes_dismod_3 and edge[1] in l_genes_dismod_3:
        l_links_circlebuffer_3.append(edge)        
#print('Number of links in circle buffer - Disease 3:', len(l_links_circlebuffer_3))

# color edges within circle buffer     
d_edge_col_circlebuffer_3 = color_edges_from_nodelist_specific(G, l_genes_circlebuffer_3, node_col_3)
print('Number of links in circle buffer - Disease 3:', len(d_edge_col_circlebuffer_3))

# color all dismod edges    
d_edge_col_3 = color_edges_from_nodelist_specific(G, l_genes_dismod_3_with, node_col_3)

In [None]:
#
# DISEASE 4 | DARK RED
#

# color nodes
node_col_4 = '#992121' # '#A60C0C' # (darkred)

center_dis_4_sym = 'SLC26A3' 
# get nodeID of gene symbol 
for ix, sym in d_gene_sym.items():
    if sym == center_dis_4_sym:
        center_dis_4_id = ix

# get coordinates from nodeID
for node, coords in posG.items():
    if node == center_dis_4_id:
        center_dis_4_coords = coords

# make shapely point from defined center of cluster 
point_4 = geometry.Point(center_dis_4_coords[0], center_dis_4_coords[1])

# create your circle buffer from one of the points
distance = 0.1
circle_buffer = point_4.buffer(distance)

# get list of genes that are within radius around the center 
l_genes_circlebuffer_4 = []
for node,coords in posG.items():
    point = geometry.Point(coords[0], coords[1])
    if point.within(circle_buffer) and node in l_genes_dismod_4: 
        l_genes_circlebuffer_4.append(node)
        
print('Number of genes within radius around Center - Disease 4:' ,len(l_genes_circlebuffer_4))

# get edges in between nodes within radius around center 

l_links_circlebuffer_4 = []
for edge in G.edges():   
    if edge[0] in l_genes_circlebuffer_4 and edge[1] in l_genes_circlebuffer_4 and edge[0] in l_genes_dismod_4 and edge[1] in l_genes_dismod_4:
        l_links_circlebuffer_4.append(edge)
#print('Number of links in circle buffer - Disease 4:', len(l_links_circlebuffer_4))

# color edges within circle buffer     
d_edge_col_circlebuffer_4 = color_edges_from_nodelist_specific(G, l_genes_circlebuffer_4, node_col_4)
print('Number of links in circle buffer - Disease 4:', len(d_edge_col_circlebuffer_4))

# color all dismod edges    
d_edge_col_4 = color_edges_from_nodelist_specific(G, l_genes_dismod_4_with, node_col_4)

#### MERGE NODE COLORS > 4 diseases = 4 different colors 

In [None]:
# merge all node colors (and sort like G.nodes)

col_param = list(df_centralities['closeness'].values)
d_node_colors = dict(zip(list(G.nodes()), col_param))

c1='#E6E6E6' 
c2='#9B9B9B' 
n=len(set(col_param))

colors_for_palette = []
for x in range(n+1):
    colors_for_palette.append(colorFader(c1,c2,x/n))    

customPalette = sns.set_palette(sns.color_palette(colors_for_palette))
d_colors = color_nodes_from_dict(G, d_node_colors, palette = customPalette)

d_col_all = {}
for k,v in d_colors.items():
    
    # color for all NF1 associated genes (i.e. neighbors, NF1 disease module) 
    if k in l_genes_dismod or k in nf_neighbors:
        d_col_all[k] = node_col_dismod
        
    # disease module colors 
    elif k in l_genes_dismod_4:
        d_col_all[k] = node_col_4
        
    elif k in l_genes_dismod_2:
        d_col_all[k] = node_col_2
        
    elif k in l_genes_dismod_3:
        d_col_all[k] = node_col_3
    
    elif k in l_genes_dismod_1:
        d_col_all[k] = node_col_1
    
    else:
        d_col_all[k]=v
        
colors = list(d_col_all.values())

## PLOT 

In [None]:
# plot nodes based on focus > separate to background/foreground 

l_genes_all = l_genes_nf + l_genes_dismod_1 + l_genes_dismod_2 + l_genes_dismod_3 + l_genes_dismod_4 + nf_neighbors 

posG_foreground = {}
posG_background = {}
for k,v in posG.items():
    if k in l_genes_all:
        posG_foreground[k]=v
    else: 
        posG_background[k]=v
    
d_colors_foreground = {}
d_colors_background = {}
for i,c in d_col_all.items():
    if i in posG_foreground.keys():
        d_colors_foreground[i]=c
    else: 
        d_colors_background[i]=c

colors_foreground = list(d_colors_foreground.values())
colors_background = list(d_colors_background.values())

d_feat_foreground = {}
d_feat_background = {}
for k,v in d_gene_sym.items():
    if k in posG_foreground: 
        d_feat_foreground[k]=v
    else:
        d_feat_background[k]=v
        
feat_foreground = list(d_feat_foreground.values())
feat_background = list(d_feat_background.values())

d_size_plotly = draw_node_degree(G, scale_factor/100)
d_size_plotly_foreground = {}
d_size_plotly_background = {}
for k,v in d_size_plotly.items():
    if k in posG_foreground.keys():
        d_size_plotly_foreground[k]=v
    else:
        d_size_plotly_background[k]=v
        
size_plotly_foreground = list(d_size_plotly_foreground.values())
size_plotly_background = list(d_size_plotly_background.values())


umap_nodes_foreground = get_trace_nodes_2D(posG_foreground, feat_foreground, colors_foreground, size_plotly_foreground, 
                                           node_linewidth*0.25, 0.8)
umap_nodes_foreground_ = get_trace_nodes_2D(posG_foreground, feat_foreground, colors_foreground, 5,
                                           None, 0.15)
umap_nodes_background = get_trace_nodes_2D(posG_background, feat_background, colors_background, size_plotly_background, 
                                           None,0.5) 
umap_nodes_background_ = get_trace_nodes_2D(posG_background, feat_background, colors_background, 2, 
                                           None,0.5)

# -----------   
# E D G E S 
# -----------  
opac_shine = 0.08
lw_shine = 3.0
lw_buffer = 0.25
opac_buffer = 0.5 
umap_edges_circlebuffer_1 = get_trace_edges_specific2D(d_edge_col_circlebuffer_1,posG, linew=lw_buffer, opac=opac_buffer)
umap_edges_circlebuffer_1_shine = get_trace_edges_specific2D(d_edge_col_circlebuffer_1, posG, linew=lw_shine, opac=opac_shine)

umap_edges_circlebuffer_2 = get_trace_edges_specific2D(d_edge_col_circlebuffer_2,posG, linew=lw_buffer, opac=opac_buffer)
umap_edges_circlebuffer_2_shine = get_trace_edges_specific2D(d_edge_col_circlebuffer_2,posG, linew=lw_shine, opac=opac_shine)

umap_edges_circlebuffer_3 = get_trace_edges_specific2D(d_edge_col_circlebuffer_3,posG, linew=lw_buffer, opac=opac_buffer)
umap_edges_circlebuffer_3_shine = get_trace_edges_specific2D(d_edge_col_circlebuffer_3,posG, linew=lw_shine, opac=opac_shine)

umap_edges_circlebuffer_4 = get_trace_edges_specific2D(d_edge_col_circlebuffer_4,posG, linew=lw_buffer, opac=opac_buffer)
umap_edges_circlebuffer_4_shine = get_trace_edges_specific2D(d_edge_col_circlebuffer_4,posG, linew=lw_shine, opac=opac_shine)

umap_edges_circlebuffer_nf = get_trace_edges_specific2D(d_edge_col_circlebuffer_nf,posG, linew=0.35, opac=0.65)
umap_edges_circlebuffer_nf_shine = get_trace_edges_specific2D(d_edge_col_circlebuffer_nf,posG, linew=lw_shine, opac=opac_shine)



opacity_edges = 0.09
width_edges = 0.25

umap_edges_nf = get_trace_edges_specific2D(d_edge_col_nf,posG, linew=0.25, opac=0.25)

umap_edges_1 = get_trace_edges_specific2D(d_edge_col_1, posG, linew=width_edges, opac=opacity_edges)
umap_edges_2 = get_trace_edges_specific2D(d_edge_col_2, posG, linew=width_edges, opac=opacity_edges)
umap_edges_3 = get_trace_edges_specific2D(d_edge_col_3, posG, linew=width_edges, opac=opacity_edges)
umap_edges_4 = get_trace_edges_specific2D(d_edge_col_4, posG, linew=width_edges, opac=opacity_edges)
                  
    
    
data = [
    
        ######################
        # nodes background 
        ######################
    
        umap_nodes_background_, 
        umap_nodes_background, 
    
        ######################
        # all edges 
        ######################
        
        #umap_edges_1, 
        #umap_edges_2,
        #umap_edges_3,
        #umap_edges_4,
        
        ######################
        # edges within radius
        ######################
        
        umap_edges_circlebuffer_1_shine,
        umap_edges_circlebuffer_2_shine,
        umap_edges_circlebuffer_3_shine,
        umap_edges_circlebuffer_4_shine,
    
        umap_edges_circlebuffer_1, 
        umap_edges_circlebuffer_2, 
        umap_edges_circlebuffer_3, 
        umap_edges_circlebuffer_4, 
            
        umap_edges_nf,
        umap_edges_circlebuffer_nf_shine,
        umap_edges_circlebuffer_nf,

        ######################
        # nodes foreground 
        ######################
        umap_nodes_foreground_,
        umap_nodes_foreground]



path = 'output_plots/2Dlayouts/'
schema = 'light' #'dark' 
#fname = '2Dportrait_NF8712_'+'DOIDs'+num_doid_1+'_'+num_doid_2+'_'+num_doid_3+'_'+num_doid_4+'_enh'+str(scalar_val_discontext)+'_nn'+str(n_n)+'_spr'+str(spr)+'_md'+str(md)+'_'+str(metr)+'_'+organism
fname = ' test'

plot_2D(data,path,fname)

# EXPORT POSITIONS >  VRNETZER Format 

In [259]:
path = 'output_plots/VRlayouts/' 
layout_nmspce = '2Dportrait_NF1'
cols = colors
export_to_csv2D(path, layout_nmspce, posG, cols)

# FIGURE - NF1 + 8712 + NEIGHBORS OF NF1
highlighting Neurofibromatosis + neighbors of gene NF1

In [175]:
# merge all node colors (and sort like G.nodes)

col_param = list(df_centralities['closeness'].values)
d_node_colors = dict(zip(list(G.nodes()), col_param))

c1='#E6E6E6' 
c2='#9B9B9B' 
n=len(set(col_param))

colors_for_palette = []
for x in range(n+1):
    colors_for_palette.append(colorFader(c1,c2,x/n))    

customPalette = sns.set_palette(sns.color_palette(colors_for_palette))
d_colors = color_nodes_from_dict(G, d_node_colors, palette = customPalette)

d_col_all = {}
for k,v in d_colors.items():
    
    if k in l_genes_dismod:
        d_col_all[k] = node_col_dismod

    elif k in nf_neighbors: 
        d_col_all[k] = node_col_dismod
    
    else:
        d_col_all[k]=v
        
colors = list(d_col_all.values())

In [107]:
# plot nodes based on focus > separate to background/foreground 

posG_foreground = {}
posG_background = {}
for k,v in posG.items():
    if k in l_genes_dismod:
        posG_foreground[k]=v
    else: 
        posG_background[k]=v
    
d_colors_foreground = {}
d_colors_background = {}
for i,c in d_col_all.items():
    if i in posG_foreground.keys():
        d_colors_foreground[i]=c
    else: 
        d_colors_background[i]=c

colors_foreground = list(d_colors_foreground.values())
colors_background = list(d_colors_background.values())

d_feat_foreground = {}
d_feat_background = {}
for k,v in d_gene_sym.items():
    if k in posG_foreground: 
        d_feat_foreground[k]=v
    else:
        d_feat_background[k]=v
        
feat_foreground = list(d_feat_foreground.values())
feat_background = list(d_feat_background.values())

d_size_plotly = draw_node_degree(G, scale_factor/100)
d_size_plotly_foreground = {}
d_size_plotly_background = {}
for k,v in d_size_plotly.items():
    if k in posG_foreground.keys():
        d_size_plotly_foreground[k]=v
    else:
        d_size_plotly_background[k]=v
        
size_plotly_foreground = list(d_size_plotly_foreground.values())
size_plotly_background = list(d_size_plotly_background.values())


umap_nodes_foreground = get_trace_nodes_2D(posG_foreground, feat_foreground, colors_foreground, size_plotly_foreground, 
                                           node_linewidth*0.25, 0.8)
umap_nodes_foreground_ = get_trace_nodes_2D(posG_foreground, feat_foreground, colors_foreground, 5,
                                           None, 0.2)
umap_nodes_background = get_trace_nodes_2D(posG_background, feat_background, colors_background, size_plotly_background, 
                                           None,0.5) #node_linewidth*0.05, 0.5)
umap_nodes_background_ = get_trace_nodes_2D(posG_background, feat_background, colors_background, 2, 
                                            None,0.5)

umap_edges_dismod = get_trace_edges_specific2D(d_edge_col_dismod, posG, linew=width_edges, 
                                               opac=0.5)# opacity_edges)
umap_edges_dismod_neigh = get_trace_edges_specific2D(d_edge_col_neigh, posG, linew=width_edges, 
                                                     opac=0.5) #opacity_edges)

data = [
        umap_edges_dismod,
        umap_edges_dismod_neigh,
        umap_nodes_background_, 
        umap_nodes_background, 
        umap_nodes_foreground_,
        umap_nodes_foreground]

path = 'output_plots/2Dlayouts/'
schema = 'light' #'dark' 
fname_NF1links = '2Dportrait_NF8712_'+'DOIDs'+num_doid_1+'_'+num_doid_2+'_'+num_doid_3+'_'+num_doid_4+'_enhDismod'+str(scalar_val_dismod)+'_enhContext'+str(scalar_val_discontext)+'_nn'+str(n_n)+'_spr'+str(spr)+'_md'+str(md)+'_'+str(metr)+'_'+organism+'_NF1links'

plot_2D(data,path,fname_NF1links)

'output_plots/2Dlayouts/2Dportrait_NF8712_DOIDs3165_10534_3620_0060089_enhDismod2_enhContext2_nn10_spr1.0_md0.0_cosine_human_NF1links.html'

# FIGURE - ONLY NF1 + 8712
highlighting Neurofibromatosis

In [109]:
col_param = list(df_centralities['closeness'].values)
d_node_colors = dict(zip(list(G.nodes()), col_param))

c1='#E6E6E6' 
c2='#9B9B9B' 
n=len(set(col_param))

colors_for_palette = []
for x in range(n+1):
    colors_for_palette.append(colorFader(c1,c2,x/n))    

customPalette = sns.set_palette(sns.color_palette(colors_for_palette))
d_colors = color_nodes_from_dict(G, d_node_colors, palette = customPalette)

d_col_all = {}
for k,v in d_colors.items():
    if k in l_genes_dismod:
        d_col_all[k] = node_col_dismod
    
    else:
        d_col_all[k]=v
        
colors = list(d_col_all.values())

In [111]:
# plot nodes based on focus > separate to background/foreground 

posG_foreground = {}
posG_background = {}
for k,v in posG.items():
    if k in l_genes_dismod:
        posG_foreground[k]=v
    else: 
        posG_background[k]=v
    
d_colors_foreground = {}
d_colors_background = {}
for i,c in d_col_all.items():
    if i in posG_foreground.keys():
        d_colors_foreground[i]=c
    else: 
        d_colors_background[i]=c

colors_foreground = list(d_colors_foreground.values())
colors_background = list(d_colors_background.values())

d_feat_foreground = {}
d_feat_background = {}
for k,v in d_gene_sym.items():
    if k in posG_foreground: 
        d_feat_foreground[k]=v
    else:
        d_feat_background[k]=v
        
feat_foreground = list(d_feat_foreground.values())
feat_background = list(d_feat_background.values())

d_size_plotly = draw_node_degree(G, scale_factor/100)
d_size_plotly_foreground = {}
d_size_plotly_background = {}
for k,v in d_size_plotly.items():
    if k in posG_foreground.keys():
        d_size_plotly_foreground[k]=v
    else:
        d_size_plotly_background[k]=v
        
size_plotly_foreground = list(d_size_plotly_foreground.values())
size_plotly_background = list(d_size_plotly_background.values())


umap_nodes_foreground = get_trace_nodes_2D(posG_foreground, feat_foreground, colors_foreground, size_plotly_foreground, 
                                           node_linewidth*0.25, 0.8)
umap_nodes_foreground_ = get_trace_nodes_2D(posG_foreground, feat_foreground, colors_foreground, 5,
                                           None, 0.2)
umap_nodes_background = get_trace_nodes_2D(posG_background, feat_background, colors_background, size_plotly_background, 
                                           None,0.5) #node_linewidth*0.05, 0.5)
umap_nodes_background_ = get_trace_nodes_2D(posG_background, feat_background, colors_background, 2, 
                                            None,0.5)

umap_edges_dismod = get_trace_edges_specific2D(d_edge_col_dismod, posG, linew=0.5,#width_edges, 
                                               opac=0.9) #opacity_edges)

data = [
        #umap_edges_dismod,
        umap_nodes_background_, 
        umap_nodes_background, 
        umap_nodes_foreground_,
        umap_nodes_foreground]

path = 'output_plots/2Dlayouts/'
schema = 'light' #'dark' 
fname_NF1module = '2Dportrait_NF8712_'+'DOIDs'+num_doid_1+'_'+num_doid_2+'_'+num_doid_3+'_'+num_doid_4+'_enhDismod'+str(scalar_val_dismod)+'_enhContext'+str(scalar_val_discontext)+'_nn'+str(n_n)+'_spr'+str(spr)+'_md'+str(md)+'_'+str(metr)+'_'+organism+'_NF1module_nolinks'

plot_2D(data,path,fname_NF1module)

'output_plots/2Dlayouts/2Dportrait_NF8712_DOIDs3165_10534_3620_0060089_enhDismod2_enhContext2_nn10_spr1.0_md0.0_cosine_human_NF1module_nolinks.html'

# VERY MUCH WORK IN PROGRESS 

# 4. | STRUC + FUNC (complete disease matrix)

In [None]:
FM_disease = pd.read_csv('input/Features_Disease_Dataframe_'+organism+'.csv', index_col=0)

In [None]:
# --------------------------------------
#
scalar_val = 4
#
# --------------------------------------

enhance_factor = round((1-l_max_visprob)*scalar_val,10) # for emphasizing functional features > should be >= max of structural matrix values  # if significantly higher than max values in structural matrix > causes isolation of nodes with functional features enhanced
print('ENHANCING FACTOR functional features: ',enhance_factor)

n_n = 14
spr = 1
md = 0.0
metr = 'cosine'

In [None]:
# create an empty matrix for zeros of rest genes (not associated to any disease)
rest = []
for i in G.nodes():
    if str(i) in FM_disease.index or int(i) in FM_disease.index:
        pass
    else: 
        rest.append(str(i))
        
empty = np.zeros(shape=(len(rest),len(FM_disease.columns)))
empty_stacked = np.vstack(empty)
df_empty = pd.DataFrame(empty_stacked)
df_empty.index = rest
df_empty.columns = FM_disease.columns

In [None]:
# combine disease vector with rest of genes in graph
df_diseases_complete = pd.concat([FM_disease, df_empty])
df_diseases_complete

In [None]:
# reorder according to G.nodes
DF_diseases_reorder = df_diseases_complete.reindex(DF_structural.index)

# fill nan positions (due to reordering) with 1 
DF_diseases_new = DF_diseases_reorder.replace(np.nan,1)

# multiply with enhancing factor
DF_diseases_scal = DF_diseases_new*enhance_factor 

DF_merge_diseases = pd.concat([DF_structural.T, 
                             DF_diseases_scal],axis=1)

DF_merge_diseases

### 2D PORTRAIT

In [None]:
n_n = 12
spr = 1
md = 0.0
metr = 'cosine'

In [None]:
%%time 

posG_umap2D = layout_portrait_umap(G,DF_merge_diseases, 2,
                                   n_neighbors = n_n,
                                   spread = spr,
                                   min_dist = md,
                                   metric = metr) 
posG = posG_umap2D

## VISUAL SETTINGS + PLOT PREPARATION

#### NODES - GENERAL

In [None]:
opacity_nodes = 0.8
node_edge_col = '#696969' 
node_linewidth = 1

scale_factor = 0.5
size_plotly = list(draw_node_degree(G, scale_factor/100).values())

In [None]:
color_method = 'NF1'
l_genes = enhance_genes

node_col = '#00E3DF' #'#FF8E00' 
undefined_col = '#DEDEDE'
rest_col_nodes = '#DEDEDE'

d_col_all = color_nodes_from_list(G, l_genes, node_col)
colors = list(d_col_all.values())
    
edge_color = node_col 
d_edge_col = color_edges_from_nodelist_specific(G, l_genes, node_col)
width_edges = 0.25
opacity_edges = 0.5

## PLOT NETWORK PORTRAIT - plotly 

In [None]:
# plot nodes based on focus > separate to background/foreground 

posG_foreground = {}
posG_background = {}
for k,v in posG.items():
    if k in l_genes:
        posG_foreground[k]=v
    else: 
        posG_background[k]=v
    
d_colors_foreground = {}
d_colors_background = {}
for i,c in d_col_all.items():
    if i in posG_foreground.keys():
        d_colors_foreground[i]=c
    else: 
        d_colors_background[i]=c

colors_foreground = list(d_colors_foreground.values())
colors_background = list(d_colors_background.values())

d_feat_foreground = {}
d_feat_background = {}
for k,v in d_gene_sym.items():
    if k in posG_foreground: 
        d_feat_foreground[k]=v
    else:
        d_feat_background[k]=v
        
feat_foreground = list(d_feat_foreground.values())
feat_background = list(d_feat_background.values())

d_size_plotly = draw_node_degree(G, scale_factor/100)
d_size_plotly_foreground = {}
d_size_plotly_background = {}
for k,v in d_size_plotly.items():
    if k in posG_foreground.keys():
        d_size_plotly_foreground[k]=v
    else:
        d_size_plotly_background[k]=v
        
size_plotly_foreground = list(d_size_plotly_foreground.values())
size_plotly_background = list(d_size_plotly_background.values())

In [None]:
umap_nodes_foreground = get_trace_nodes_2D(posG_foreground, feat_foreground, colors_foreground, size_plotly_foreground, node_linewidth)
umap_nodes_background = get_trace_nodes_2D(posG_background, feat_background, colors_background, size_plotly_background, node_linewidth*0.25)
umap_nodes_background_ = get_trace_nodes_2D(posG_background, feat_background, colors_background, 0.5, node_linewidth*0.5)

umap_edges = get_trace_edges_specific2D(d_edge_col, posG, linew=width_edges, opac=opacity_edges)
data = [umap_edges, umap_nodes_background_, umap_nodes_background, umap_nodes_foreground]

path = 'output_plots/2Dlayouts/'
schema = 'light' #'dark' 
fname = '2Dportrait_NetlayoutDisease_DiseaseFeatureMatrix_enhance'+str(enhance_factor)+'_'+color_method+'_nn'+str(n_n)+'_spr'+str(spr)+'_md'+str(md)+'_'+str(metr)+'_'+organism

plot_2D(data,path,fname)