Load datasets used in the manuscript "A Swiss-Army Knife for Hierarchical Modeling of Biological Systems" (Yu et al.)

In [1]:
import ddot
from ddot import Ontology

# Gene-disease associations from Monarch Initiative

In [7]:
# Retrieve a table of gene-disease associations from the Monarch Initiative (reformatted and stored on NDEx)
monarch, _ = ddot.ndex_to_sim_matrix(
    ddot.MONARCH_DISEASE_GENE_SLIM_URL,
    similarity=None,
    input_fmt='cx',
    output_fmt='sparse')
monarch.head()




Unnamed: 0,Node1,Node2,disease_original_name,gene,disease,similarity
0,MIR6084,parkinson_disease_6__autosomal_recessive_early...,"Parkinson Disease 6, Autosomal Recessive Early...",MIR6084,parkinson_disease_6__autosomal_recessive_early...,1.0
1,ELANE,autosomal_dominant_severe_congenital_neutropenia,Autosomal dominant severe congenital neutropenia,ELANE,autosomal_dominant_severe_congenital_neutropenia,1.0
2,B2M,bare_lymphocyte_syndrome_type_1,Bare lymphocyte syndrome type 1,B2M,bare_lymphocyte_syndrome_type_1,1.0
3,PDCD10,familial_cerebral_cavernous_malformation,Familial cerebral cavernous malformation,PDCD10,familial_cerebral_cavernous_malformation,1.0
4,PDCD10,cerebral_cavernous_malformation,Cerebral cavernous malformation,PDCD10,cerebral_cavernous_malformation,1.0


In [8]:
# Example: get the known genes for "Caffey Disease"
seed = monarch.loc[monarch['disease']=='caffey_disease', 'gene'].tolist()
print('Seed:', seed)

Seed: ['COL1A1', 'A4GALT']


# Human gene-gene similarity network

In [None]:
# Install the simplejson package (it may be easier to run this in a separate bash terminal).
! pip install simplejson

In [5]:
## Download human gene-gene similarity network from NDEx
## -- WARNING: This network is very large (19,009-by-19,009 matrix). Downloading will take ~10 min for a fast internet connection.
sim, sim_names = ddot.ndex_to_sim_matrix(
    ndex_uuid=ddot.HUMAN_GENE_SIMILARITIES_URL,
    similarity='similarity',
    input_fmt='cx_matrix',
    output_fmt='matrix',
    subset=None)

import pandas as pd
sim = pd.DataFrame(sim, columns=sim_names, index=sim_names)

sim.head()

NDEx download time (sec): 0.07517433166503906
response.content time: 231.18325209617615
Read HTTP response as JSON simplejson time (sec): 326.2878167629242
loop time (sec): 13.906715154647827


Unnamed: 0,A1BG,A1CF,A2M,A2ML1,A3GALT2,A4GALT,A4GNT,AAAS,AACS,AADAC,...,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3
A1BG,0.0,3.431698,2.995258,2.485376,1.232452,2.96318,2.740923,2.636584,3.164647,3.111191,...,3.030626,2.13346,2.139856,4.836614,2.500669,5.75361,3.179621,3.085719,1.398557,2.737073
A1CF,3.431698,0.0,4.359057,2.530033,1.346039,3.3762,3.739248,3.486164,2.975972,5.348464,...,3.125845,3.449779,2.975913,5.248934,2.748682,4.152,5.086524,3.503827,4.535737,3.060234
A2M,2.995258,4.359057,0.0,7.321767,1.265758,4.137097,3.941315,3.487693,3.333286,4.303947,...,3.101827,3.386778,2.540423,3.380266,2.677951,2.743995,2.966277,4.337028,4.838756,3.218988
A2ML1,2.485376,2.530033,7.321767,0.0,1.209089,2.809288,2.544417,2.969902,2.728513,2.532099,...,2.383435,2.437612,2.401979,5.131732,1.858816,4.38724,2.501511,2.807124,3.867039,2.168479
A3GALT2,1.232452,1.346039,1.265758,1.209089,0.0,1.252855,1.192584,1.275544,1.301369,1.316169,...,1.403003,1.536927,1.336344,2.543513,1.316635,2.621008,4.016145,1.420911,3.459424,1.265704


# The Gene Ontology

In [5]:
# Read Gene Ontology from NDEx. 
# -- This version has been pre-processed to contain a non-redundant set of GO terms and connections that are relevant to human genes (see Process_the_Gene_Ontology.ipynb) 
go_human = Ontology.from_ndex(ddot.GO_HUMAN_URL)
print(go_human)


19015 genes, 19343 terms, 215488 gene-term relations, 36362 term-term relations
node_attributes: ['Branch', 'name', 'Vis:Shape', 'Vis:Fill Color', 'Vis:Border Paint', 'Term_Description', 'Vis:Size']
edge_attributes: ['Vis:Visible']
