In [1]:
import pandas as pd
from oaklib import get_adapter
from oaklib.implementations.ubergraph.ubergraph_implementation import UbergraphImplementation

# Get all HP terms that are a subclass of dental abnormality using UberGraph

In [2]:
foi = UbergraphImplementation()

In [13]:
dental_subtree = [x['hp'] for x in list(foi.query('SELECT ?hp WHERE {?hp rdfs:subClassOf* <http://purl.obolibrary.org/obo/HP_0000164>.}'))]

Query has no LIMIT clause: SELECT ?hp WHERE {?hp rdfs:subClassOf* <http://purl.obolibrary.org/obo/HP_0000164>.} LIMIT 100


# Get the Monarch KG

In [14]:
!wget https://data.monarchinitiative.org/monarch-kg-dev/latest/monarch-kg.tar.gz
!gunzip monarch-kg.tar.gz
!tar -xvzf monarch-kg.tar

--2023-09-14 06:27:01--  https://data.monarchinitiative.org/monarch-kg-dev/latest/monarch-kg.tar.gz
Resolving data.monarchinitiative.org (data.monarchinitiative.org)... 35.208.191.193
Connecting to data.monarchinitiative.org (data.monarchinitiative.org)|35.208.191.193|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 111339995 (106M) [application/x-tar]
Saving to: ‘monarch-kg.tar.gz’


2023-09-14 06:27:05 (27.7 MB/s) - ‘monarch-kg.tar.gz’ saved [111339995/111339995]

monarch-kg.tar already exists -- do you wish to overwrite (y or n)? ^C
x monarch-kg_nodes.tsv
x monarch-kg_edges.tsv


In [16]:
edges = pd.read_csv('monarch-kg_edges.tsv', sep='\t', dtype=str)

In [17]:
nodes = pd.read_csv('monarch-kg_nodes.tsv', sep='\t', dtype=str)

In [18]:
node_label_map = dict(zip(nodes['id'], nodes['name']))
edges['subject_label'] = edges['subject'].apply(lambda x: node_label_map[x])
edges['object_label'] = edges['object'].apply(lambda x: node_label_map[x])

# Get all disease to phenotype associations that are a subclass of dental anomaly

In [19]:
dental_d2p = edges[
    (edges['category'] == 'biolink:DiseaseToPhenotypicFeatureAssociation') &
    (edges['subject'].str.contains("MONDO")) &
    (edges['object'].isin(dental_subtree))
]

# Show the dental phenotype associated diseases.

In [20]:
dental_d2p[['subject_label', 'subject']].drop_duplicates()

Unnamed: 0,subject_label,subject
15180,cerebrocostomandibular syndrome,MONDO:0007301
15241,van Maldergem syndrome 2,MONDO:0014242
15602,neurodevelopmental disorder with absent speech...,MONDO:0859519
15743,"ectodermal dysplasia 13, hair/tooth type",MONDO:0044305
16014,mandibulofacial dysostosis-macroblepharon-macr...,MONDO:0011255
...,...,...
254530,"Ehlers-Danlos syndrome, spondylodysplastic typ...",MONDO:0014139
254621,"autosomal recessive cutis laxa type 2, classic...",MONDO:0009054
254978,"acrofacial dysostosis, Palagonia type",MONDO:0011154
255042,Usher syndrome type 1,MONDO:0010168


In [21]:
dental_d2p[['subject_label', 'subject']].drop_duplicates().to_csv("dental_diseases.tsv", sep="\t", index=False)

# Get all the d2p associations for the above diseases (including non-dental abnormalities)

In [22]:
dental_abnormality_d2p_associations = edges[edges['subject'].isin(dental_d2p['subject'].tolist())]

In [23]:
dental_abnormality_d2p_associations[['subject_label', 'object_label']]

Unnamed: 0,subject_label,object_label
15142,cerebrocostomandibular syndrome,Anal stenosis (HPO)
15143,cerebrocostomandibular syndrome,Epicanthus (HPO)
15144,cerebrocostomandibular syndrome,Congenital hip dislocation (HPO)
15145,cerebrocostomandibular syndrome,Thoracic hypoplasia (HPO)
15146,cerebrocostomandibular syndrome,Ectopic kidney (HPO)
...,...,...
5222217,auriculocondylar syndrome 2,Autosomal recessive inheritance (HPO)
5222218,auriculocondylar syndrome 2,Autosomal dominant inheritance (HPO)
5222221,lethal acantholytic epidermolysis bullosa,Autosomal recessive inheritance (HPO)
5222222,bardet-biedl syndrome 21,Autosomal recessive inheritance (HPO)


In [24]:
edges['category'].value_counts().reset_index().to_csv('category_counts.tsv', sep="\t", index=False)