-----
# cartoGRAPHs - FIGURE 2A - Spring addition
precalculated large networks , e.g. the Interactome

A Notebook to produce 2D and 3D network layouts from any Graph,
including interactive visualization (html files) and export functions 
to import into the VRNetzer analytics platform by Pirch et al.

-----

In [1]:
from cartoGRAPHs import * 

DEBUG: in init: import modules
DEBUG:in cartographs
DEBUG:in cartographs - import done
DEBUG: in init: import done


_____
# HUMAN NETWORK
_____

In [2]:
organism = 'human'

G = load_graph(organism) 

d_gene_sym = load_genesymbols(G, organism)
l_features = list(d_gene_sym.values())

d_gene_do = pickle.load( open( "input/d_gene_do.pkl", "rb" ) )
d_do_genes = pickle.load( open( "input/d_do_genes.pkl", "rb" ) )
d_do_names = pickle.load( open( "input/DO_names.pkl", "rb" ) )
d_names_do = {y:x for x,y in d_do_names.items()}

In [3]:
d_centralities = load_centralities(G, organism)
df_centralities = pd.DataFrame(d_centralities).T
df_centralities.columns = ['degree','closeness', 'betweeness', 'eigenvector']
essential_genes,non_ess_genes,notdefined_genes = load_essentiality(G, organism)

_____ 
# 2 | FIGURE 2A supplementary | Highlighted functional genes with Force-based Layout
_____

In [4]:
%%time 

iteration = 5
posG_spring2D = nx.spring_layout(G, dim = 2)
posG = posG_spring2D

CPU times: user 10min 28s, sys: 7.31 s, total: 10min 36s
Wall time: 10min 20s


## VISUAL SETTINGS + PLOT PREPARATION

#### NODES - GENERAL

In [5]:
opacity_nodes = 0.8
node_edge_col = '#696969' 
node_linewidth = 1

scale_factor = 0.5
#size_nx = list(draw_node_degree(G, scale_factor).values())
size_plotly = list(draw_node_degree(G, scale_factor/100).values())

scale_factor3D = 0.025
size3d = list(draw_node_degree_3D(G, scale_factor3D).values())

nodesglow_diameter = 8.0
nodesglow_transparency = 0.01 # 0.01

#### ESSENTIALITY color settings 

In [6]:
color_method = 'essentiality'

l_genes = essential_genes
    
node_col = '#00abff' 
undefined_col = '#DEDEDE '
rest_col_nodes = '#DEDEDE '

d_col_all = color_nodes_from_list(G, essential_genes, node_col)
colors = list(d_col_all.values())
    
edge_color = node_col 
d_edge_col = color_edges_from_nodelist_specific(G, l_genes, node_col)
width_edges = 0.25
opacity_edges = 0.25

print('Amount of Essential Genes', len(essential_genes))
print('Links inbetween Essential Genes', len(d_edge_col))

Amount of Essential Genes 2911
Links inbetween Essential Genes 9627


#### DEVELOPMENTAL color settings 

In [24]:
color_method = '8cell' 

if color_method == 'oocyte' or color_method == '2cell' or color_method == '4cell' or color_method == '8cell':

    node_col = '#0181A3' 
    devgenes_selection = color_method
    df_devgenes = pd.read_csv('input/EmExplorer/'+devgenes_selection+'_Homo-sapiens.txt', delimiter = "\t")
    
    devgenes = list(df_devgenes['stage'])

    d_devgenes_entrezid = {}
    for i,k in d_gene_sym.items():
        if k in devgenes:
             d_devgenes_entrezid[k] = i 
    devgenes_entrezid = list(d_devgenes_entrezid.values())
    
    l_genes = devgenes_entrezid
    
    d_col_all = color_nodes_from_list(G, l_genes, node_col)
    colors = list(d_col_all.values())

    edge_color = node_col
    d_edge_col = color_edges_from_nodelist_specific(G, l_genes, node_col)
    width_edges = 0.25
    opacity_edges = 0.5
        
else:
    pass

In [17]:
print('Amount of Developmental Genes ('+ color_method + ')', len(devgenes_entrezid))
print('Links inbetween Developmental Genes ('+ color_method + ')', len(d_edge_col))

Amount of Developmental Genes (2cell) 49
Links inbetween Developmental Genes (2cell) 93


In [21]:
print('Amount of Developmental Genes ('+ color_method + ')', len(devgenes_entrezid))
print('Links inbetween Developmental Genes ('+ color_method + ')', len(d_edge_col))

Amount of Developmental Genes (4cell) 106
Links inbetween Developmental Genes (4cell) 342


In [25]:
print('Amount of Developmental Genes ('+ color_method + ')', len(devgenes_entrezid))
print('Links inbetween Developmental Genes ('+ color_method + ')', len(d_edge_col))

Amount of Developmental Genes (8cell) 38
Links inbetween Developmental Genes (8cell) 84


#### CANCER color settings 

In [10]:
disease_category = 'cancer'

color_method = disease_category
disease_genes = get_disease_genes(G, d_names_do, d_do_genes, disease_category)
l_genes = disease_genes

node_col = '#00C9FF' 
d_col_all = color_nodes_from_list(G, disease_genes, node_col)
colors = list(d_col_all.values())

edge_color = node_col 
d_edge_col = color_edges_from_nodelist_specific(G, l_genes, node_col)

width_edges = 0.25
opacity_edges = 0.25

print('Amount of Cancer genes:' , len(disease_genes))
print('Links inbetween Cancer genes', len(d_edge_col))

Amount of Cancer genes: 2499
Links inbetween Cancer genes 26135


#### RARE DISEASE genes color settings 

In [13]:
import xml.etree.ElementTree as ET
tree = ET.parse('input/orphadata.xml')
root = tree.getroot()

# retrieve genes from all rare diseases from database/file  
disorder_to_genes = {}
for disorder in root.iter('Disorder'):
    dis = list(disorder.attrib.values())[0]
    for elem in disorder.findall('DisorderGeneAssociationList'):
        genes = []
        for disorderassoc in elem.findall('DisorderGeneAssociation'):
            for gene in disorderassoc.findall('Gene'):
                for reference in gene.findall('ExternalReferenceList'):
                     for refs in reference.findall('ExternalReference'):
                        for ensg in refs.findall('Source'):
                            if ensg.text == 'Ensembl':
                                for e in refs.findall('Reference'):
                                    ensgID = (e.text)
                            else:
                                pass
                            
            genes.append(ensgID)
    disorder_to_genes[dis] = genes

    
# get a list of all rare disease genes obtained 
all_raredisease_genes = []
for dis,genes in disorder_to_genes.items():
    for i in genes:
        all_raredisease_genes.append(i)
      
    
# make sure there are no duplicates
set_all_raredisease_genes = set(all_raredisease_genes)


# convert ensgID to entrezID and get gene symbols
ensg_to_entrez = pd.read_csv('input/ensg_to_entrezid.txt',sep="	")

ensg = list(ensg_to_entrez['From'])
entrez = [str(int(i)) for i in ensg_to_entrez['To'][:-1]] #list(ensg_to_entrez['To'])
d_ensg_entrez = dict(zip(ensg,entrez))


# match ensgID with entrezID
set_all_raredisease_genes_id = {}
for ix,entrez in d_ensg_entrez.items():
    if ix in set_all_raredisease_genes:
        set_all_raredisease_genes_id[ix]=entrez

        
# color all rare disease genes  
l_rarediseasegenes = list(set_all_raredisease_genes_id.values())
color_method = 'rarediseasegenes'

l_genes = l_rarediseasegenes

node_col =  '#004EAB'#'#0A81A1' 
undefined_col = '#DEDEDE '
rest_col_nodes = '#DEDEDE '

d_col_all = color_nodes_from_list(G, l_genes, node_col)
colors = list(d_col_all.values())
    
edge_color = node_col 
d_edge_col = color_edges_from_nodelist_specific(G, l_genes, node_col)
width_edges = 0.25
opacity_edges = 0.25

print('Amount of Rare disease genes:' , len(l_rarediseasegenes))
print('Links inbetween Rare disease genes:' , len(d_edge_col))

Amount of Rare disease genes: 4181
Links inbetween Rare disease genes: 33744


## PLOT NETWORK PORTRAIT - plotly 

In [26]:
# plot nodes based on focus > separate to background/foreground 

posG_foreground = {}
posG_background = {}
for k,v in posG.items():
    if k in l_genes:
        posG_foreground[k]=v
    else: 
        posG_background[k]=v
    
d_colors_foreground = {}
d_colors_background = {}
for i,c in d_col_all.items():
    if i in posG_foreground.keys():
        d_colors_foreground[i]=c
    else: 
        d_colors_background[i]=c

colors_foreground = list(d_colors_foreground.values())
colors_background = list(d_colors_background.values())

d_feat_foreground = {}
d_feat_background = {}
for k,v in d_gene_sym.items():
    if k in posG_foreground: 
        d_feat_foreground[k]=v
    else:
        d_feat_background[k]=v
        
feat_foreground = list(d_feat_foreground.values())
feat_background = list(d_feat_background.values())

d_size_plotly = draw_node_degree(G, scale_factor/100)
d_size_plotly_foreground = {}
d_size_plotly_background = {}
for k,v in d_size_plotly.items():
    if k in posG_foreground.keys():
        d_size_plotly_foreground[k]=v
    else:
        d_size_plotly_background[k]=v
        
size_plotly_foreground = list(d_size_plotly_foreground.values())
size_plotly_background = list(d_size_plotly_background.values())

In [27]:
nodes_foreground = get_trace_nodes_2D(posG_foreground, feat_foreground, colors_foreground, size_plotly_foreground, 
                                           node_linewidth)
nodes_background = get_trace_nodes_2D(posG_background, feat_background, colors_background, size_plotly_background, 
                                           node_linewidth*0.25)
nodes_background_ = get_trace_nodes_2D(posG_background, feat_background, colors_background, 0.5, node_linewidth*0.5)

edges = get_trace_edges_specific2D(d_edge_col, posG, linew=width_edges, opac=opacity_edges)
data = [edges, 
        nodes_background_, 
        nodes_background, 
        nodes_foreground]

path = 'output_plots/2Dlayouts/'
schema = 'light' #'dark' 
fname = '2Dportrait_NetlayoutImportance_'+color_method+'_'+organism

plot_2D(data,path,fname)

'output_plots/2Dlayouts/2Dportrait_NetlayoutImportance_8cell_human.html'