-----
# cartoGRAPHs - FIGURE 2D
-----

In [1]:
from cartoGRAPHs import *

  from .autonotebook import tqdm as notebook_tqdm
2023-10-20 19:40:38.627029: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


DEBUG: in init: import done


_____
# HUMAN PPI NETWORK
_____

In [2]:
organism = 'human'
G = load_graph(organism)

d_centralities = load_centralities(G, organism)
df_centralities = pd.DataFrame(d_centralities).T
df_centralities.columns = ['degree','closeness', 'betweeness', 'eigenvector']
essential_genes,non_ess_genes,notdefined_genes = load_essentiality(G, organism)

d_gene_sym = load_genesymbols(G, organism)
l_features = list(d_gene_sym.values())

# Human specific > to include Disease Ontology
if organism == 'human':
    d_gene_do = pickle.load( open( "input/d_gene_do.pkl", "rb" ) )
    d_do_genes = pickle.load( open( "input/d_do_genes.pkl", "rb" ) )
    d_do_names = pickle.load( open( "input/DO_names.pkl", "rb" ) )
    d_names_do = {y:x for x,y in d_do_names.items()}
else: 
    pass

d_clos = {}
for k,v in d_centralities.items():
    d_clos[k] = v[1]

In [3]:
G.name = "Human Interactome"
G.graph["desc"] = "The human interactome is a complex, dynamic network of protein-protein interactions that are essential for many biological processes. It is estimated that the human interactome consists of hundreds of thousands of interactions between thousands of proteins. These interactions can take many forms, including direct physical binding, allosteric regulation, and post-translational modifications. Many proteins participate in multiple interactions, forming highly interconnected clusters or modules within the network. The human interactome is not static and can change in response to cellular and environmental cues. "

____________
## VISUAL SETTINGS + PLOT PREPARATION
____________

### NODES GENERAL 

In [4]:
opacity_nodes = 0.8
node_edge_col = '#696969' 
node_linewidth = 0.1

scale_factor3D = 0.025
size = list(draw_node_degree_3D(G, scale_factor3D).values())

nodesglow_diameter = 8.0
nodesglow_transparency = 0.01 # 0.01

opacity_edges = 0.5
width = 0.5
edge_color = '#696969' 

### DATA SPECIFIC : PATIENT VARIANTS + SEEDS 


In [6]:
# Text identifier for file output name
rad_param = 'r-varseeds'

# Load Gene Information for Gene lists 
variants = pd.read_csv('input/IBD_variants_mod.txt')

variant_list = [str(i) for i in list(variants['variants'])]
variant_sym = pd.read_csv('input/IBD_variants_sym.txt')
genes_sym_list = [str(i) for i in list(variant_sym['variant_sym'])]
d_gene_sym_variants = dict(zip(variant_list,genes_sym_list))

df_seeds = pd.read_csv('input/seeds_from_genelists.txt')
df_seeds.columns = ['seeds']
df_seeds_z = pd.read_csv('input/seeds_from_zNumbers.txt')#, sep='\n')
seeds_list_sym = [str(i) for i in list(df_seeds['seeds'])]
seeds_list = []
for s in seeds_list_sym:
    for k,v in d_gene_sym.items():  
        if s == v:
            seeds_list.append(k)
        else:
            pass
        
print("number of seeds: ", len(seeds_list))
print("number of variants: ", len(variant_list))

number of seeds:  246
number of variants:  13


In [7]:
feature = 'bioproc'
FM_BP = pd.read_pickle('input/Features_GO_BiolProc_Dataframe_human.pickle')
FM_BP.index = [str(i) for i in FM_BP.index]
bioproc_list = list(FM_BP.index)

# From obtained Gene lists select those to be mapped onto different shells 
# select genes for shells 
shell_one = variant_list
shell_two = seeds_list
shell_three = [str(i) for i in bioproc_list]
print('Number of genes on shell 1:', len(shell_one))
print('Number of genes on shell 2:', len(shell_two))
print('Number of genes on shell 3:', len(shell_three))

small_rad = 1
mid_rad = 5
large_rad = 20
outershell_rad = 25

d_param = {}
l_features_new = []
for i in G.nodes():
    if i in variant_list:
        d_param[str(i)] = small_rad
        l_features_new.append(("variant: " + str(i)))
        
    elif i in seeds_list:
        d_param[str(i)] = mid_rad 
        l_features_new.append(("seed gene: " + str(i)))

    elif i in bioproc_list:
        d_param[str(i)] = large_rad 
        l_features_new.append(("GO term: " + str(i)))

    else:
        d_param[str(i)] = outershell_rad
        l_features_new.append(i)

#genes_rad = variant_list + seeds_list + bioproc_list

# check how many shells of spheres there are :
print('Number of Spherical Shells:', len(set(d_param.values())))

# Specifying coloring based on genes on different shells 
d_nodecol = d_clos 
col_pal = 'Blues'
d_colours = color_nodes_from_dict(G, d_nodecol, palette = col_pal)

node_col_shell1 = '#8b0000' # red
node_col_shell2 = '#FFF000' # yellow
node_col_shellrest = '#d3d3d3' # grey

d_colours_spec_genes = {}

for k,v in d_colours.items():
    if k in shell_one:
        d_colours_spec_genes[k]=node_col_shell1
    elif k in shell_two:
        d_colours_spec_genes[k]=node_col_shell2
    elif k in shell_three:
        d_colours_spec_genes[k]=v
    else: 
        d_colours_spec_genes[k]=node_col_shellrest

d_colours_spec_genes_sort = {key:d_colours_spec_genes[key] for key in G.nodes()}
colours = list(d_colours_spec_genes_sort.values())

Number of genes on shell 1: 13
Number of genes on shell 2: 246
Number of genes on shell 3: 14380
Number of Spherical Shells: 4


In [8]:
d_edge_col = color_edges_from_nodelist_specific(G, variant_list+seeds_list, node_col_shell1)

---
## GEODESIC MAP
---

In [9]:
%%time 

n_neighbors = 20
spread = 1.0
min_dist = 0.0

umap_sphere = embed_umap_sphere(FM_BP, n_neighbors, spread, min_dist)

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [None]:
%%time 

radius = 1 
posG_complete_sphere_norm = get_posG_sphere_norm(G, FM_BP, 
                                                 umap_sphere, d_param, radius)

CPU times: user 15.8 s, sys: 88.3 ms, total: 15.8 s
Wall time: 15.9 s


---
## PLOT 
---

In [None]:
#scheme = 'light'
scheme = 'dark'

umap_nodes = get_trace_nodes_3D(posG_complete_sphere_norm, l_features_new, colours, 
                                2,
                                opacity_nodes)
umap_edges = get_trace_edges_3D(G, posG_complete_sphere_norm, edge_color, opac = 0.05, linewidth=0.5)

#specific edges:
#umap_edges_spec = get_trace_edges_specific3D(d_edge_col, posG_complete_sphere_norm, linew=width, opac=opacity_edges)

umap_data = [#umap_edges,
            #umap_edges_spec,
            umap_nodes]

data = umap_data
fname = 'output/GeodesicMap_umap_'+feature+'_neigh-'+str(n_neighbors)+'_spr-'+str(spread)+'_mindist-'+str(min_dist)
    
plot_3D(data, fname, scheme)

'output/GeodesicMap_umap_bioproc_neigh-20_spr-1.0_mindist-0.0dark.html'

---
## VRNETZER2 (multiplayer version) EXPORT
---

In [11]:
# annotations 

d_gene_donames = {}
for node, doid in d_gene_do.items():
    sub=[]
    for do,name in d_do_names.items():
        if str(do) in doid:
            sub.append(name)
        d_gene_donames[node] = sub

d_allgene_donames = {}
for g in G.nodes(): 
    if g in d_gene_donames.keys():
        d_allgene_donames[g] = d_gene_donames[g]
    else:
        d_allgene_donames[g] = 'No Disease associated.'

# node annotations
l_annotations_csv = ['Gene: '+str(i)+'; ID:  '+str(j)+'; Diseases: '+str(k) for i,j,k in zip(d_gene_sym.values(), d_gene_sym.keys(),d_allgene_donames.values())]
l_annotations_json = [list(("Gene: "+str(i),"ID: "+str(j),"Diseases: "+str(k))) for i,j,k in zip(d_gene_sym.values(), d_gene_sym.keys(), d_allgene_donames.values())]
d_annotations_csv = dict(zip(list(G.nodes()),l_annotations_csv))
d_annotations_json = dict(zip(list(G.nodes()),l_annotations_json))

In [12]:
# link colors 

# only show links between disease assoc. proteins (i.e. z != 0) 
d_links_colors = {}
for link in G.edges():
    if link in d_edge_col.keys():
        d_links_colors[link] = d_edge_col[link]
    else:
        d_links_colors[link] = (0,0,0,0)


In [13]:
# no clusters
clusterlist = ["", 0]
dict_for_cluster = {}

In [14]:
# ---------------
# 3D
#----------------  
filename = 'VR/temp/'+'2D-'+G.name+'-geodesic3D'
exportVR_CSV(filename, G, posG_complete_sphere_norm, d_colours_spec_genes_sort, d_annotations_csv, d_links_colors, clusterlist)
exportVR_JSON(filename, G, posG_complete_sphere_norm, dict_for_cluster, d_colours_spec_genes_sort, d_annotations_json, d_links_colors)


Exported Files: 
 ['VR/temp/2D-HumanInteractome-geodesic3D_nodepositions.csv', 'VR/temp/2D-HumanInteractome-geodesic3D_nodecolors.csv', 'VR/temp/2D-HumanInteractome-geodesic3D_nodeproperties.csv', 'VR/temp/2D-HumanInteractome-geodesic3D_links.csv', 'VR/temp/2D-HumanInteractome-geodesic3D_linkcolors.csv', 'VR/temp/2D-HumanInteractome-geodesic3D_clusterlabels.csv']
Exported File: 
 ['VR/temp/2D-HumanInteractome-geodesic3D.json']
