In [3]:
from ontobio import OntologyFactory
ofa = OntologyFactory()

In [4]:
icd = ofa.create("icd10.json")

In [5]:
hp = ofa.create('hp')

In [44]:
hpsimple = hp.subontology(relations='subClassOf')

In [8]:
# Read in the previously generated mappings
import pandas as pd
df=pd.read_csv('ontobio-align-hp.tsv',sep="\t")

In [10]:
df[0:10]

Unnamed: 0,left,left_label,right,right_label,left_match_type,right_match_type,left_match_val,right_match_val,score,left_simscore,right_simscore,reciprocal_score,conditional_pr_equiv,equiv_clique_size
0,http://purl.obolibrary.org/obo/ICD10_H16.0,Corneal ulcer,HP:0012804,Corneal ulceration,label,hasExactSynonym,Corneal ulcer,Corneal ulcer,90.0,0.5,1.0,4,1.0,2
1,http://purl.obolibrary.org/obo/ICD10_M26.01,Maxillary hyperplasia,HP:0430028,Hyperplasia of the maxilla,label,hasExactSynonym,Maxillary hyperplasia,Maxillary hyperplasia,90.0,1.0,1.0,4,1.0,2
2,http://purl.obolibrary.org/obo/ICD10_L94.6,Ainhum,HP:0031009,Ainhum,label,label,Ainhum,Ainhum,100.0,1.0,1.0,4,1.0,2
3,http://purl.obolibrary.org/obo/ICD10_I44.4,Left anterior fascicular block,HP:0011711,Left anterior fascicular block,label,label,Left anterior fascicular block,Left anterior fascicular block,100.0,1.0,1.0,4,1.0,2
4,http://purl.obolibrary.org/obo/ICD10_R47.81,Slurred speech,HP:0001350,Slurred speech,label,label,Slurred speech,Slurred speech,100.0,1.0,1.0,4,1.0,2
5,http://purl.obolibrary.org/obo/ICD10_Q17.5,Prominent ear,HP:0000411,Protruding ear,label,hasExactSynonym,Prominent ear,Prominent ear,90.0,1.0,1.0,4,1.0,2
6,http://purl.obolibrary.org/obo/ICD10_Q25.3,Supravalvular aortic stenosis,HP:0004381,Supravalvular aortic stenosis,label,label,Supravalvular aortic stenosis,Supravalvular aortic stenosis,100.0,1.0,1.0,4,1.0,2
7,http://purl.obolibrary.org/obo/ICD10_Q79.0,Congenital diaphragmatic hernia,HP:0000776,Congenital diaphragmatic hernia,label,label,Congenital diaphragmatic hernia,Congenital diaphragmatic hernia,100.0,0.5,1.0,4,1.0,2
8,http://purl.obolibrary.org/obo/ICD10_H05.4,Enophthalmos,HP:0000490,Deeply set eye,label,hasExactSynonym,Enophthalmos,Enophthalmos,90.0,1.0,1.0,4,1.0,2
9,http://purl.obolibrary.org/obo/ICD10_F31,bipolar disorder,HP:0007302,Bipolar affective disorder,label,hasExactSynonym,bipolar disorder,Bipolar disorder,90.0,0.5,1.0,4,1.0,2


In [27]:
svas = df.loc[df['right'] == 'HP:0004381']
svas

Unnamed: 0,left,left_label,right,right_label,left_match_type,right_match_type,left_match_val,right_match_val,score,left_simscore,right_simscore,reciprocal_score,conditional_pr_equiv,equiv_clique_size
6,http://purl.obolibrary.org/obo/ICD10_Q25.3,Supravalvular aortic stenosis,HP:0004381,Supravalvular aortic stenosis,label,label,Supravalvular aortic stenosis,Supravalvular aortic stenosis,100.0,1.0,1.0,4,1.0,2


In [36]:
from collections import defaultdict
hp2icd = defaultdict(list)
for _,row in df.iterrows():
    hp2icd[row['right']].append(row['left'])
    
hp2icd['HP:0004381']

['http://purl.obolibrary.org/obo/ICD10_Q25.3']

In [12]:
from ontobio import AssociationSetFactory
afa = AssociationSetFactory()

In [79]:
aset = afa.create(ontology=hp, subject_category='disease', object_category='phenotype', taxon='NCBITaxon:9606')

In [66]:
def rollup(hpterm):
    mterms = set(hp2icd[hpterm])
    if len(mterms) > 0:
        return mterms
    for p in hpsimple.parents(hpterm):
        #print("P {} ->{}".format(hpterm,p))
        mterms.update(rollup(p))
    return mterms

def test_rollup(t):
    print("ROLLING UP: {} {}".format(t,hp.label(t)))
    terms = rollup(t)
    print(" --> {}".format( ["{} {}".format(x,icd.label(x)) for x in terms] ))
    
# test on one with direct mapping
test_rollup('HP:0004381')

# test on one with no direct mapping
test_rollup('HP:0005173')

ROLLING UP: HP:0004381 Supravalvular aortic stenosis
 --> ['http://purl.obolibrary.org/obo/ICD10_Q25.3 Supravalvular aortic stenosis']
ROLLING UP: HP:0005173 Calcific aortic valve stenosis
 --> []


In [80]:
d2icd = {}
for d in aset.subjects:
    hpterms = aset.annotations(d)
    mterms = set()
    for t in hpterms:
        mterms.update(rollup(t))
    d2icd[d] = mterms
    

In [81]:
def show_phekb(d):
    print("\nTESTING: {} {}".format(d,aset.label(d)))
    print("  HPO PROFILE: {}".format( ["{} {}".format(t,hp.label(t)) for t in aset.annotations(d)] ))
    mterms = d2icd[d]
    print("  ROLLED UP: {}".format( ["{} {}".format(t,icd.label(t)) for t in mterms] ))

show_phekb('OMIM:118450')
show_phekb('OMIM:610205')


TESTING: OMIM:118450 Alagille syndrome 1
  HPO PROFILE: ['HP:0001394 Cirrhosis', 'HP:0001738 Exocrine pancreatic insufficiency', 'HP:0001328 Specific learning disability', 'HP:0000627 Posterior embryotoxon', 'HP:0003189 Long nose', 'HP:0004969 Peripheral pulmonary artery stenosis', 'HP:0006571 Reduced number of intrahepatic bile ducts', 'HP:0005280 Depressed nasal bridge', 'HP:0001284 Areflexia', 'HP:0002910 Elevated hepatic transaminases', 'HP:0000482 Microcornea', 'HP:0001680 Coarctation of aorta', 'HP:0000400 Macrotia', 'HP:0000585 Band keratopathy', 'HP:0000076 Vesicoureteral reflux', 'HP:0001256 Intellectual disability, mild', 'HP:0000772 Abnormality of the ribs', 'HP:0000533 Chorioretinal atrophy', 'HP:0001492 Axenfeld anomaly', 'HP:0000518 Cataract', 'HP:0006579 Prolonged neonatal jaundice', 'HP:0001297 Stroke', 'HP:0000316 Hypertelorism', 'HP:0000337 Broad forehead', 'HP:0000545 Myopia', 'HP:0001631 Atrial septal defect', 'HP:0000089 Renal hypoplasia', 'HP:0002155 Hypertriglyc