Load datasets used in the manuscript "A Swiss-Army Knife for Hierarchical Modeling of Biological Systems" (Yu et al.)

In [None]:
import ddot
from ddot import Ontology

# Gene-disease associations from Monarch Initiative

In [None]:
# Retrieve a table of gene-disease associations from the Monarch Initiative (reformatted and stored on NDEx)
monarch, _ = ddot.ndex_to_sim_matrix(
    ddot.MONARCH_DISEASE_GENE_SLIM_URL,
    similarity=None,
    input_fmt='cx',
    output_fmt='sparse')
monarch.head()

In [None]:
# Example: get the known genes for "Caffey Disease"
seed = monarch.loc[monarch['disease']=='caffey_disease', 'gene'].tolist()
print('Seed:', seed)

# Human gene-gene similarity network

In [None]:
# Install the simplejson package (it is recommend you run this in a separate bash terminal, not in this Jupyter notebook. If you want to use a conda virtual environment, then you first need to activate the environment)
! pip install simplejson

In [None]:
## Download human gene-gene similarity network from NDEx
## -- WARNING: This network is very large (19,009-by-19,009 matrix). Downloading will take ~10 min for a fast internet connection.
sim, sim_names = ddot.ndex_to_sim_matrix(
    ndex_url=ddot.HUMAN_GENE_SIMILARITIES_URL,
    input_fmt='cx_matrix',
    output_fmt='matrix',
    subset=None)

import pandas as pd
sim = pd.DataFrame(sim, columns=sim_names, index=sim_names)

sim.head()

# The Gene Ontology

In [None]:
# Read Gene Ontology from NDEx. 
# -- This version has been pre-processed to contain a non-redundant set of GO terms and connections that are relevant to human genes (see Process_the_Gene_Ontology.ipynb) 
go_human = Ontology.from_ndex(ddot.GO_HUMAN_URL)
print(go_human)

# Fanconi Anemia gene ontology (FanGO)

In [None]:
fango = Ontology.from_ndex(ddot.FANGO_URL)
print(fango)

# Other disease gene ontologies (based on gene-disease associations in Monarch Initiative)

In [None]:
import pandas as pd
df = pd.read_table('disease_gene_ontologies.txt', header=0, index_col=False)
df = df.set_index('Disease')
df.head()

In [None]:
# Example: get the URL to view the disease "hydronephrosis" on HiView
print(df.loc['hydronephrosis', 'HiView_URL'])