In [2]:
import pandas as pd
from oaklib import get_adapter
from oaklib.implementations.ubergraph.ubergraph_implementation import UbergraphImplementation

# Get all HP terms that are a subclass of dental anomaly using UberGraph

In [3]:
foi = UbergraphImplementation()

In [4]:
dental_subtree = [x['hp'] for x in list(foi.query('SELECT ?hp WHERE {?hp rdfs:subClassOf* <http://purl.obolibrary.org/obo/HP_0011061>.}'))]

Query has no LIMIT clause: SELECT ?hp WHERE {?hp rdfs:subClassOf* <http://purl.obolibrary.org/obo/HP_0011061>.} LIMIT 100


# Get the Monarch KG

In [4]:
!wget https://data.monarchinitiative.org/monarch-kg-dev/latest/monarch-kg.tar.gz
!gunzip monarch-kg.tar.gz
!tar -xvzf monarch-kg.tar

--2023-09-11 22:15:12--  https://data.monarchinitiative.org/monarch-kg-dev/latest/monarch-kg.tar.gz
Resolving data.monarchinitiative.org (data.monarchinitiative.org)... 35.208.191.193
Connecting to data.monarchinitiative.org (data.monarchinitiative.org)|35.208.191.193|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 111339995 (106M) [application/x-tar]
Saving to: ‘monarch-kg.tar.gz’


2023-09-11 22:15:16 (26.1 MB/s) - ‘monarch-kg.tar.gz’ saved [111339995/111339995]

x monarch-kg_nodes.tsv
x monarch-kg_edges.tsv


In [5]:
edges = pd.read_csv('monarch-kg_edges.tsv', sep='\t', dtype=str)

In [13]:
nodes = pd.read_csv('monarch-kg_nodes.tsv', sep='\t', dtype=str)

In [20]:
node_label_map = dict(zip(nodes['id'], nodes['name']))
edges['subject_label'] = edges['subject'].apply(lambda x: node_label_map[x])
edges['object_label'] = edges['object'].apply(lambda x: node_label_map[x])

# Get all disease to phenotype associations that are a subclass of dental anomaly

In [21]:
dental_d2p = edges[
    (edges['category'] == 'biolink:DiseaseToPhenotypicFeatureAssociation') &
    (edges['subject'].str.contains("MONDO")) &
    (edges['object'].isin(dental_subtree))
]

# Show the dental phenotype associated diseases.

In [22]:
dental_d2p['subject_label'].value_counts()

subject_label
regional odontodysplasia                                               13
amelogenesis imperfecta                                                 9
dentinogenesis imperfecta                                               8
otodental syndrome                                                      8
oculodentodigital dysplasia                                             6
                                                                       ..
neurodevelopmental disorder with hypotonia and dysmorphic facies        1
ectodermal dysplasia-cutaneous syndactyly syndrome                      1
tuberous sclerosis 1                                                    1
short-rib thoracic dysplasia 16 with or without polydactyly             1
ectrodactyly, ectodermal dysplasia, and cleft lip-palate syndrome 3     1
Name: count, Length: 375, dtype: int64

# Get all the d2p associations for the above diseases (including non-dental abnormalities)

In [25]:
dental_abnormality_d2p_associations = edges[edges['subject'].isin(dental_d2p['subject'].tolist())]

In [27]:
dental_abnormality_d2p_associations[['subject_label', 'object_label']]

Unnamed: 0,subject_label,object_label
15142,cerebrocostomandibular syndrome,Anal stenosis (HPO)
15143,cerebrocostomandibular syndrome,Epicanthus (HPO)
15144,cerebrocostomandibular syndrome,Congenital hip dislocation (HPO)
15145,cerebrocostomandibular syndrome,Thoracic hypoplasia (HPO)
15146,cerebrocostomandibular syndrome,Ectopic kidney (HPO)
...,...,...
5222088,AREDYLD syndrome,Autosomal recessive inheritance (HPO)
5222094,combined immunodeficiency due to STIM1 deficiency,Autosomal recessive inheritance (HPO)
5222122,"tumoral calcinosis, hyperphosphatemic, familia...",Autosomal recessive inheritance (HPO)
5222198,"tooth agenesis, selective, 7",Autosomal dominant inheritance (HPO)
