This is an example script for converting MESH disease identifiers to MONDO ids using the MONDO ontology file

In your virtual environment of choice:

    pip install rdflib prefixcommons

In [6]:
from prefixcommons import contract_uri, expand_uri
from rdflib import Graph, URIRef, OWL, RDFS
import logging

logging.basicConfig(level=logging.INFO)

# First load the mondo owl file into memory, this takes ~ 7 minutes
logging.info("loading mondo into memory")

mondo = Graph()
mondo.parse("http://purl.obolibrary.org/obo/mondo.owl", format='xml')

logging.info("finished loading mondo")

INFO:root:loading mondo into memory
INFO:root:finished loading mondo


In [21]:
# Example MESH ids
mesh_curies = [
    'MESH:D003550',
    'MESH:C537335',
    'MESH:D031249',
    'MESH:12345' # Made up ID
]

for mesh_curie in mesh_curies:
    mesh_iri = URIRef(expand_uri(mesh_curie, strict=True))
    mondo_curie = None
    # Look for equivalent MONDO disease
    for subj in mondo.objects(mesh_iri, OWL['equivalentClass']):
        curie = contract_uri(str(subj), strict=True)[0]
        if curie.startswith('MONDO'):
            mondo_curie = curie
            mondo_iri = subj
            print("{}\t{}".format(mesh_curie, mondo_curie))
    if mondo_curie is None:
        logging.info("No mondo id for {}".format(mesh_curie))


INFO:root:No mondo id for MESH:12345


MESH:D003550	MONDO:0009061
MESH:C537335	MONDO:0017849
MESH:D031249	MONDO:0018153
