# Supplemental Notebook - Interactive Hierarchy Creation

This notebook shows the generation of hierarchy networks in HCX format for interactive capabilities using [web.cytoscape.org](web.cytoscape.org)

### Set Up

In [1]:
import pandas as pd
import os
import seaborn as sns
import sys
import ndex2 as ndex
import networkx as nx
import matplotlib.pyplot as plt
from ndex2.cx2 import NetworkXToCX2NetworkFactory, CX2Network,  PandasDataFrameToCX2NetworkFactory

In [2]:
cwd = os.getcwd()
sys.path.append(os.path.join(cwd, '../carva'))
from network_utils import *
from geneset_utils import *
from hierarchy_utils import *

In [3]:
from getpass import getpass
user = getpass('Username:')
password = getpass('Password:')
client = ndex2.client.Ndex2(username=user, password=password)

Username:········
Password:········


## Load the subnetworks

In [5]:
subnetworks = {'autism spectrum disorder': '94590325-4ed4-11f0-a218-005056ae3c32',
 'Alzheimer disease': 'a25cd00e-4ed4-11f0-a218-005056ae3c32',
 'bipolar disorder': 'a454f6a0-4ed4-11f0-a218-005056ae3c32'}

In [6]:
asd_parentG= load_network(uuid=subnetworks['autism spectrum disorder'], use_password=True,ndex_password=password, ndex_user=user)
bip_parentG=load_network(uuid=subnetworks['bipolar disorder'], use_password=True,ndex_password=password, ndex_user=user)
azd_parentG=load_network(uuid=subnetworks['Alzheimer disease'], use_password=True,ndex_password=password, ndex_user=user)

Network Name:autism spectrum disorder
Number of nodes: 871
Number of edges: 30131
Network Name:bipolar disorder
Number of nodes: 387
Number of edges: 7711
Network Name:Alzheimer disease
Number of nodes: 526
Number of edges: 9574


## Create hierarchy via community detection

Example for creation of network hierarchy using HiDeF

In [None]:
uuid = '3cc84672-1653-11f0-9806-005056ae3c32'
outdir = '/cellar/users/snwright/Data/RareCommon/outputs/subnetworks/hierarchies/'

In [None]:
G_cx = load_network(uuid, use_password=True, return_cx=True, username=username, password=password)

In [None]:
name = G_cx.get_name()

In [None]:
G_hier = create_hierarchy(G_cx, verbose=True)
hier_df = create_hier_df(G_hier)

In [None]:
hier_df = add_seed_gene_fractions(hier_df, G_cx)

In [None]:
hier_df_annot = name_hierarchy_systems(hier_df, outdir=outdir, gene_col='SymbolList', write=True, hier_name=name)

In [None]:
write_nx_hierarchy(G_hier, hier_df_annot, outdir, name)

In [None]:
G_out = add_annotations_to_hierarchy(G_hier, hier_df)

In [None]:
upload_cx_hierarchy(G_hier, hier_df_annot, outdir, name, username, password)

### Load Pre-computed Hierarchies

In [7]:
asd_uuid = '4c4ffe8a-3cc5-11f0-a469-005056ae3c32'
azd_uuid = '44345758-3cc5-11f0-a469-005056ae3c32'
bip_uuid = '3f551676-3cc5-11f0-a469-005056ae3c32'

In [8]:
asdG = load_network(uuid=asd_uuid, ndex_password=password, ndex_user=user, verbose=True, use_password=True,
                return_cx=False)

Network Name:hidef_(none)_ASD BRAIN
Number of nodes: 57
Number of edges: 58


In [9]:
azdG = load_network(uuid=azd_uuid, ndex_password=password, ndex_user=user, verbose=True, use_password=True,
                return_cx=False)

Network Name:hidef_(none)_AZD BRAIN
Number of nodes: 57
Number of edges: 56


In [10]:
bipG = load_network(uuid=bip_uuid, ndex_password=password, ndex_user=user, verbose=True, use_password=True,
                return_cx=False)

Network Name:hidef_(none)_BIP BRAIN
Number of nodes: 57
Number of edges: 56


In [11]:
def load_hierarchy_info(G):
    node_data = {}
    for n, data in G.nodes(data=True):
        node_data[n] = data
    node_data = pd.DataFrame(node_data).T
    node_data = node_data.loc[:, ['CD_MemberList', 'CD_AnnotatedMembers_Pvalue', 'CD_CommunityName', 'CD_MemberList_LogSize',
                                 'CD_AnnotatedMembers_SourceTerm', 'CD_AnnotatedMembers_SourceDB']]
    node_data.columns = ['Genes', 'Pvalue', 'Name', 'LogSize', 'SourceTerm', 'SourceDB']
    gene_dict = {}
    for comm, genes in zip(node_data.index, node_data.Genes):
        gene_dict[comm] = [int(x) for x in genes.split(' ')]
    return node_data, gene_dict

In [12]:
asd_df, asd_genes = load_hierarchy_info(asdG)
azd_df, azd_genes = load_hierarchy_info(azdG)
bip_df, bip_genes = load_hierarchy_info(bipG)

## Construct the HCX Object

In [13]:
def get_cx2_networks(hierG, parentG, comm_df):
    factory = NetworkXToCX2NetworkFactory()
    factorypd = PandasDataFrameToCX2NetworkFactory()
    parent_net =factory.get_cx2network(parentG)
    hier_df = nx.to_pandas_edgelist(hierG)
    hier_net = factorypd.get_cx2network(hier_df, source_field='source', target_field='target')
    for node_id, node_obj in hier_net.get_nodes().items():
        comm = hier_net.get_node(node_id).get('v', {}).get('name')
        hier_net.add_node_attribute(node_id, 'CD_MemberList', comm_df.loc[comm]['Genes'] ,datatype='string')
    return hier_net, parent_net

In [14]:
asd_hier, asd_parent = get_cx2_networks(asdG, asd_parentG, asd_df)
azd_hier, azd_parent = get_cx2_networks(azdG, azd_parentG, azd_df)
bip_hier, bip_parent = get_cx2_networks(bipG, bip_parentG, bip_df)

In [15]:
def get_hcx(hier_net, parent_net, parent_uuid, hier_name):
    hier_net.add_network_attribute('ndexSchema', 'hierarchy_v0.1', datatype='string')
    hier_net.add_network_attribute('HCX::modelFileCount', '2', datatype='integer')
    hier_net.set_name(hier_name)
    hier_net.add_network_attribute('HCX::interactionNetworkUUID', parent_uuid, datatype='string')
    all_nodes = set(hier_net.get_nodes().keys())
    targets = set()
    for edge_id, edge_obj in hier_net.get_edges().items():
        targets.add(edge_obj['t'])
    # Source node is not a target of any edge
    root_nodes = all_nodes.difference(targets)
    attr_name = 'HCX::isRoot'
    for node_id in hier_net.get_nodes().keys():
        hier_net.add_node_attribute(node_id, attr_name, str(node_id in root_nodes).lower(), datatype='boolean')
    for node_id, node_obj in hier_net.get_nodes().items():
        memberlist = hier_net.get_node(node_id).get('v', {}).get('CD_MemberList', '').split(' ')
        membersids = []
        for member in memberlist:
            membersids.append(int(member))
        hier_net.add_node_attribute(node_id, 'HCX::members', membersids, datatype='list_of_integer')
    return hier_net

In [None]:
asd_HCX = get_hcx(asd_hier, asd_parent, 'ccd5e0d3-31ac-11f0-a469-005056ae3c32', hier_name='ASD Hierarchy HCX')
azd_HCX = get_hcx(azd_hier, azd_parent, 'cd0ad385-31ac-11f0-a469-005056ae3c32', hier_name='AZD Hierarchy HCX')
bip_HCX = get_hcx(bip_hier, bip_parent, 'cd515269-31ac-11f0-a469-005056ae3c32', hier_name='BIP Hierarchy HCX')

### Upload hiearchies

In [None]:
client.save_new_cx2_network(asd_HCX.to_cx2(), visibility='PRIVATE')

In [None]:
client.save_new_cx2_network(azd_HCX.to_cx2(), visibility='PRIVATE')

In [None]:
client.save_new_cx2_network(bip_HCX.to_cx2(), visibility='PRIVATE')

## Add hierarchy annotations

In [None]:
def annotate_and_clean_hierarchy(hier_df, parentG):
    hier_df['logp'] = hier_df.Pvalue.apply(lambda x: -1 * np.log10(float(x)))
    hier_df['Name'] = clean_names(hier_df['Name'].values)
    hier_df = hier_df.drop(columns = ['Pvalue', 'SourceDB'])
    comm_features = {comm: {} for comm in hier_df.index.values} 
    node_data = parentG.nodes(data=True)
    for comm in comm_features:
        genes = hier_df.at[comm, 'Genes'].split(' ')
        # seed genes
        gene_classes = pd.DataFrame({'gene_class':[node_data[n]['gene_class'] for n in genes]}).gene_class.value_counts()
        for frac in ['shared_fraction', 'rare_fraction', 'common_fraction', 'network_fraction']:
            comm_features[comm][frac] = 0
        if 'shared' in gene_classes.index.values:
            comm_features[comm]['shared_fraction'] = gene_classes['shared']/len(genes)
        if 'rare' in gene_classes.index.values:
            comm_features[comm]['rare_fraction'] = gene_classes['rare']/len(genes)
        if 'common' in gene_classes.index.values:
            comm_features[comm]['common_fraction'] = gene_classes['common']/len(genes)
        if 'Network' in gene_classes.index.values:
            comm_features[comm]['network_fraction'] = gene_classes['Network']/len(genes)
        assert (comm_features[comm]['shared_fraction'] +comm_features[comm]['rare_fraction']+comm_features[comm]['common_fraction']+comm_features[comm]['network_fraction']) == 1, 'Fractions do not add up to ...'

        # NPS scores
        for z, nps in zip(['z_C', 'z_R', 'Z_coloc'], ['NPSc', 'NPSr', 'NPSrc']):
            scores = [float(node_data[n][z]) for n in genes]
            comm_features[comm][nps] = np.mean(np.array(scores))
        comm_features[comm]['c_vs_r'] = comm_features[comm]['NPSc'] / (comm_features[comm]['NPSc'] + comm_features[comm]['NPSr']) - 0.5
        # symbols
        symbols = [node_data[n]['HGNC'] for n in genes]
        comm_features[comm]['HGNC'] = ' '.join(symbols)

    comm_df = pd.DataFrame.from_dict(comm_features, orient='index')
    return hier_df.join(comm_df)
    
def clean_names(names):
    replace = {'calcium': 'Ca', 'Calcium':'Ca', 'regulation':'reg.', 'Regulation':'Reg.', 
          'activity': 'activ.', 'organization':'org.', '(none)': 'NA'}
    names_out =[]
    for name in names:
        for before, after in replace.items():
            name = name.replace(before, after)
        names_out.append(name[0].capitalize() + name[1:])
    return names_out

In [None]:
asd_out = annotate_and_clean_hierarchy(asd_df.copy(), asd_parentG.copy())

In [None]:
azd_out = annotate_and_clean_hierarchy(azd_df.copy(), azd_parentG.copy())

In [None]:
bip_out = annotate_and_clean_hierarchy(bip_df.copy(), bip_parentG.copy())

In [None]:
asd_out.to_csv('~/Data/Transfer/RVC/figures/NPD/ASD_hier_info.tsv', sep='\t')
azd_out.to_csv('~/Data/Transfer/RVC/figures/NPD/AZD_hier_info.tsv', sep='\t')
bip_out.to_csv('~/Data/Transfer/RVC/figures/NPD/BIP_hier_info.tsv', sep='\t')