# Investigation into `has_disease_location` field of EFO3

Some diseases in EFO3 have `has_disease_location` attributes, which take UBERON mapped tissue locations. Open Targets does not currently use this information.

This notebook aims to check the coverage of `has_disease_location` across diseases, to help decide whether to incorporate this field into Open Targets data. Open Targets uses UBERON elsewhere, as identifiers for baseline expression tissues.

Some possible benefits of adding `has_disease_location` are:
* as a way to filter diseases on the target associations page
* as a way to suggest similar diseases (those having `has_disease_location` in common)
* as a way to prioritise targets based on baseline expression (in relevant tissues)

In [1]:
# imports
import rdflib
from rdflib.namespace import RDF
from opentargets_ontologyutils import URLZSource
import opentargets_ontologyutils.efo
from opentargets_ontologyutils.rdf_utils import OntologyClassReader, DiseaseUtils

In [2]:
# load the ontology-utils disease ontology, the diseases used by open targets
disease_ontology = OntologyClassReader()
efo_uri = 'https://github.com/EBISPOT/efo/releases/download/v3.3.0/efo.owl'
opentargets_ontologyutils.efo.load_open_targets_disease_ontology(disease_ontology, efo_uri)

In [3]:
# independently of the ontology-utils disease ontology, load efo directly
g = rdflib.Graph()
with URLZSource(efo_uri).open() as source:
    g.parse(file = source, format='xml')

In [5]:
# useful shorthands
subclass_of = rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf')
has_disease_location = rdflib.term.URIRef('http://www.ebi.ac.uk/efo/EFO_0000784')

In [6]:
# example diseases (for testing)
gallbladder_carcinoma = rdflib.term.URIRef('http://www.ebi.ac.uk/efo/EFO_1001956')
rheumatoid_arthritis = rdflib.term.URIRef('http://www.ebi.ac.uk/efo/EFO_0000685')
rickets = rdflib.term.URIRef('http://www.ebi.ac.uk/efo/EFO_0005583')

In [7]:
# check if a (disease) node in the efo graph has the has_disease_location field
def has_disease_location_attribute(node):
    found_has_disease_location = False
    for s in g.objects(subject=node, predicate=subclass_of):
        if (type(s) == rdflib.term.BNode):
            ps = [p for p in g.predicates(subject=s, object=has_disease_location)]
            if (len(ps) > 0):
                found_has_disease_location = True
    return found_has_disease_location

In [8]:
# test on some example diseases
print('gallbladder carcinoma = ', has_disease_location_attribute(gallbladder_carcinoma))
print('rheumatoid arthritis = ', has_disease_location_attribute(rheumatoid_arthritis))
print('rickets = ', has_disease_location_attribute(rickets))

gallbladder carcinoma =  True
rheumatoid arthritis =  True
rickets =  False


In [9]:
# check for each disease if it has the has_disease_location field
def check_direct(diseases_dict):
    label_dict = {}
    uri_dict = {}
    for uri in diseases_dict:
        label = diseases_dict[uri]
        node = rdflib.term.URIRef(uri)
        value = has_disease_location_attribute(node)
        label_dict[label] = value
        uri_dict[uri] = value
    return (label_dict, uri_dict)

In [10]:
# run it
(direct_label_dict, direct_uri_dict) = check_direct(disease_ontology.current_classes)
print('total diseases considered = ', len(direct_label_dict.values()))
print('diseases with direct has_disease_location = ', len([x for x in direct_label_dict.values() if x]))

total diseases considered =  14425
diseases with direct has_disease_location =  1315


In [11]:
# helper function to traverse graph (finds parent diseases)
def parents(node, g):
    for o in g.objects(subject=node, predicate=subclass_of):
        if type(o) == rdflib.term.URIRef:
            yield o

In [12]:
# check for each disease if it (or one of its parents) has the has_disease_location field
def check_indirect(disease_dict, direct_uri_dict, direct_label_dict):
    indirect_label_dict = {}
    for uri in disease_dict:
        label = disease_dict[uri]
        node = rdflib.term.URIRef(uri)
        parent_nodes = set(rt for rt in g.transitiveClosure(parents, node))
        has_disease_locations = False
        for p in parent_nodes:
            if str(p) in direct_uri_dict and direct_uri_dict[str(p)]:
                has_disease_locations = True
        has_disease_locations = has_disease_locations or direct_label_dict[label]
        indirect_label_dict[label] = has_disease_locations
    return indirect_label_dict

In [13]:
# run it
indirect_label_dict = check_indirect(disease_ontology.current_classes, direct_uri_dict, direct_label_dict)
print('total diseases considered = ', len(indirect_label_dict.values()))
print('diseases with indirect has_disease_location = ', len([x for x in indirect_label_dict.values() if x]))

total diseases considered =  14425
diseases with indirect has_disease_location =  8554


In [14]:
# test transitiveClosure on single disease
discrete_subaortic_stenosis = rdflib.term.URIRef('http://www.ebi.ac.uk/efo/EFO_1000901')
for p in g.transitiveClosure(parents, discrete_subaortic_stenosis):
    print(str(p), disease_ontology.current_classes[str(p)] if str(p) in disease_ontology.current_classes else '')

http://www.ebi.ac.uk/efo/EFO_1001199 subvalvular aortic stenosis
http://www.ebi.ac.uk/efo/EFO_0000266 aortic stenosis
http://www.ebi.ac.uk/efo/EFO_0009531 aortic valve disease
http://purl.obolibrary.org/obo/MONDO_0045001 cardiac ventricle disease
http://www.ebi.ac.uk/efo/EFO_0003777 heart disease
http://purl.obolibrary.org/obo/MONDO_0000651 thoracic disease
http://purl.obolibrary.org/obo/MONDO_0024505 disorder by anatomical region
http://www.ebi.ac.uk/efo/EFO_0000408 
http://www.ifomis.org/bfo/1.1/snap#Disposition 
http://www.ifomis.org/bfo/1.1/snap#SpecificallyDependentContinuant 
http://www.ebi.ac.uk/efo/EFO_0000001 
http://www.w3.org/2002/07/owl#Thing 
http://www.ebi.ac.uk/efo/EFO_0000319 cardiovascular disease
http://purl.obolibrary.org/obo/MONDO_0021199 disease by anatomical system
http://www.ebi.ac.uk/efo/EFO_0000408 
http://www.ebi.ac.uk/efo/EFO_0005775 aortic disease
http://purl.obolibrary.org/obo/MONDO_0000473 arterial disorder
http://www.ebi.ac.uk/efo/EFO_0004264 vascular dis