In [2]:
import networkx
import obonet

### Read the Gene Ontology

In [3]:
%%time
url = 'https://raw.githubusercontent.com/obophenotype/human-phenotype-ontology/master/hp.obo'
graph = obonet.read_obo(url)

CPU times: user 1.85 s, sys: 165 ms, total: 2.02 s
Wall time: 5.41 s


In [4]:
# Number of nodes
len(graph)

14586

In [5]:
# Number of edges
graph.number_of_edges()

18416

In [6]:
# Check if the ontology is a DAG
networkx.is_directed_acyclic_graph(graph)

True

### Lookup node properties

Returns a dictionary.

In [7]:
# Retreive properties of HP:3000074
graph.node['HP:3000074']

{'name': 'Abnormal lingual artery morphology',
 'def': '"Any structural abnormality of a lingual artery." [GOC:TermGenie]',
 'synonym': ['"Abnormality of lingual artery" EXACT []'],
 'xref': ['UMLS:C4073281'],
 'is_a': ['HP:0011004', 'HP:0030809', 'HP:3000036'],
 'created_by': 'vasilevs',
 'creation_date': '2015-08-07T03:39:06Z'}

In [8]:
# Retreive properties of HP:0001287
graph.node['HP:0001287']

{'name': 'Meningitis',
 'def': '"Inflammation of the meninges." [HPO:probinson]',
 'xref': ['MSH:D008581', 'SNOMEDCT_US:7180009', 'UMLS:C0025289'],
 'is_a': ['HP:0011450']}

### Create name mappings


In [9]:
id_to_name = {id_: data.get('name') for id_, data in graph.nodes(data=True)}
name_to_id = {data['name']: id_ for id_, data in graph.nodes(data=True) if 'name' in data}

In [10]:
# Get the name for HP:0001287
id_to_name['HP:0001287']

'Meningitis'

In [12]:
id_to_name['HP:0000135']

'Hypogonadism'

In [None]:
id_to_name

In [10]:
# Get the id for Meningitis
name_to_id['Meningitis']

'HP:0001287'

### Find parent or child relationships

In [31]:
# Find edges to parent terms
node = name_to_id['Abnormality of cartilage of nasal septum']
for child, parent, key in graph.out_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟶ {key} ⟶ {id_to_name[parent]}')

• Abnormality of cartilage of nasal septum ⟶ is_a ⟶ Abnormality of the nasal septum
• Abnormality of cartilage of nasal septum ⟶ is_a ⟶ Abnormality of the nasal skeleton
• Abnormality of cartilage of nasal septum ⟶ is_a ⟶ Abnormality of cartilage morphology


In [12]:
# Find edges to children terms
node = name_to_id['Meningitis']
for parent, child, key in graph.in_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵ {key} ⟵ {id_to_name[parent]}')

• Meningitis ⟵ is_a ⟵ Chronic lymphocytic meningitis
• Meningitis ⟵ is_a ⟵ Fungal meningitis


### Find all superterms of myelination

In [17]:
sorted(id_to_name[superterm] for superterm in networkx.descendants(graph, 'HP:0001287'))

['Abnormality of immune system physiology',
 'Abnormality of nervous system morphology',
 'Abnormality of the immune system',
 'Abnormality of the nervous system',
 'All',
 'Morphological abnormality of the central nervous system',
 'Phenotypic abnormality',
 'Unusual CNS infection',
 'Unusual infection',
 'Unusual infection by anatomical site']

### Find all subterms of myelination

In [18]:
sorted(id_to_name[subterm] for subterm in networkx.ancestors(graph, 'HP:0001287'))

['Chronic lymphocytic meningitis',
 'Coccidioidal meningitis',
 'Cryptococcal meningitis',
 'Fungal meningitis']

### Find all paths to the root

In [24]:
paths = networkx.all_simple_paths(
    graph,
    source=name_to_id['Meningitis'],
    target=name_to_id['All']
)
for path in paths:
    print('•', ' ⟶ '.join(id_to_name[node] for node in path))

• Meningitis ⟶ Unusual CNS infection ⟶ Morphological abnormality of the central nervous system ⟶ Abnormality of nervous system morphology ⟶ Abnormality of the nervous system ⟶ Phenotypic abnormality ⟶ All
• Meningitis ⟶ Unusual CNS infection ⟶ Unusual infection by anatomical site ⟶ Unusual infection ⟶ Abnormality of immune system physiology ⟶ Abnormality of the immune system ⟶ Phenotypic abnormality ⟶ All


### See the ontology metadata

In [25]:
graph.graph

{'name': 'hp.obo',
 'typedefs': [],
 'instances': [],
 'format-version': '1.2',
 'data-version': 'hp/releases/2019-09-06',
 'saved-by': 'Peter Robinson, Sebastian Koehler, Sandra Doelken, Chris Mungall, Melissa Haendel, Nicole Vasilevsky, Monarch Initiative, et al.',
 'subsetdef': ['hposlim_core "Core clinical terminology"',
  'secondary_consequence "Consequence of a disorder in another organ system."'],
 'synonymtypedef': ['HP:0031859 "obsolete synonym"',
  'HP:0045076 "UK spelling"',
  'HP:0045077 "abbreviation"',
  'HP:0045078 "plural form"',
  'layperson "layperson term"'],
 'default-namespace': ['human_phenotype'],
 'remark': ['Please see license of HPO at http://www.human-phenotype-ontology.org'],
 'ontology': 'hp.obo',
 'property_value': ['http://purl.org/dc/elements/1.1/creator "Human Phenotype Ontology Consortium" xsd:string',
  'http://purl.org/dc/elements/1.1/creator "Monarch Initiative" xsd:string',
  'http://purl.org/dc/elements/1.1/creator "Peter Robinson" xsd:string',
  

### Phenotype-Disease Table

In [15]:
import pandas as pd
data = pd.read_csv("phenotype.txt", sep='\t', header=None)
data.head(3)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,DECIPHER,1,Wolf-Hirschhorn Syndrome,,HP:0000252,DECIPHER:1,IEA,,,,P,WOLF-HIRSCHHORN SYNDROME,HPO:skoehler[2013-05-29],
1,DECIPHER,1,Wolf-Hirschhorn Syndrome,,HP:0001249,DECIPHER:1,IEA,,,,P,WOLF-HIRSCHHORN SYNDROME,HPO:skoehler[2013-05-29],
2,DECIPHER,1,Wolf-Hirschhorn Syndrome,,HP:0001250,DECIPHER:1,IEA,,,,P,WOLF-HIRSCHHORN SYNDROME,HPO:skoehler[2013-05-29],


In [20]:
syn_disease = data.iloc[:, [2, 4]]
syn_disease.columns = ['syndrome', 'disease_id']
syn_disease.head(3)

Unnamed: 0,syndrome,disease_id
0,Wolf-Hirschhorn Syndrome,HP:0000252
1,Wolf-Hirschhorn Syndrome,HP:0001249
2,Wolf-Hirschhorn Syndrome,HP:0001250


In [21]:
len(syn_disease)

182022

In [24]:
# link disease id to disease
syn_disease['disease'] = syn_disease['disease_id'].map(id_to_name)
syn_disease.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,syndrome,disease_id,disease
0,Wolf-Hirschhorn Syndrome,HP:0000252,Microcephaly
1,Wolf-Hirschhorn Syndrome,HP:0001249,Intellectual disability
2,Wolf-Hirschhorn Syndrome,HP:0001250,Seizures


In [25]:
syn_disease[syn_disease['disease'] == 'Microcephaly']

Unnamed: 0,syndrome,disease_id,disease
0,Wolf-Hirschhorn Syndrome,HP:0000252,Microcephaly
36,Cri du Chat Syndrome (5p deletion),HP:0000252,Microcephaly
44,Miller-Dieker syndrome (MDS),HP:0000252,Microcephaly
91,Angelman syndrome (Type 1),HP:0000252,Microcephaly
116,Xq28 (MECP2) duplication,HP:0000252,Microcephaly
...,...,...,...
180385,PAGOD syndrome,HP:0000252,Microcephaly
180968,Amish lethal microcephaly,HP:0000252,Microcephaly
181072,Mosaic trisomy 9,HP:0000252,Microcephaly
181165,LIG4 syndrome,HP:0000252,Microcephaly


In [26]:
syn_disease[syn_disease['syndrome'] == 'Microcephaly']

Unnamed: 0,syndrome,disease_id,disease


In [28]:
syn_disease

Unnamed: 0,syndrome,disease_id,disease
0,Wolf-Hirschhorn Syndrome,HP:0000252,Microcephaly
1,Wolf-Hirschhorn Syndrome,HP:0001249,Intellectual disability
2,Wolf-Hirschhorn Syndrome,HP:0001250,Seizures
3,Wolf-Hirschhorn Syndrome,HP:0001252,Muscular hypotonia
4,Wolf-Hirschhorn Syndrome,HP:0001518,Small for gestational age
...,...,...,...
182017,Klatskin tumor,HP:0002716,Lymphadenopathy
182018,Klatskin tumor,HP:0004936,Venous thrombosis
182019,Klatskin tumor,HP:0012334,Extrahepatic cholestasis
182020,Klatskin tumor,HP:0012378,Fatigue


In [31]:
import pandas as pd
network = pd.read_csv("data3.txt", sep='\t')
network.head(3)

Unnamed: 0,MeSH Symptom Term,MeSH Disease Term,PubMed occurrence,TFIDF score
0,"Aging, Premature",Respiratory Syncytial Virus Infections,1,3.464551
1,"Aging, Premature",Orthomyxoviridae Infections,1,3.464551
2,"Aging, Premature",HIV Infections,3,10.393654


In [30]:
len(network)

147979

In [32]:
network[network['MeSH Disease Term'] == 'HIV Infections']

Unnamed: 0,MeSH Symptom Term,MeSH Disease Term,PubMed occurrence,TFIDF score
2,"Aging, Premature",HIV Infections,3,10.393654
180,Asthenia,HIV Infections,4,8.252877
1044,Fever,HIV Infections,153,94.697458
3075,Fever of Unknown Origin,HIV Infections,59,86.916140
3892,Hypothermia,HIV Infections,2,3.890872
...,...,...,...,...
144434,Proteinuria,HIV Infections,57,71.191733
145401,Albuminuria,HIV Infections,16,33.041387
145934,Hemoglobinuria,HIV Infections,1,2.883522
146322,Urinary Incontinence,HIV Infections,1,1.617529
