In [4]:
pip install neo4j

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install rdflib

Collecting rdflib
  Obtaining dependency information for rdflib from https://files.pythonhosted.org/packages/d4/b0/7b7d8b5b0d01f1a0b12cc2e5038a868ef3a15825731b8a0d776cf47566c0/rdflib-7.0.0-py3-none-any.whl.metadata
  Downloading rdflib-7.0.0-py3-none-any.whl.metadata (11 kB)
Collecting isodate<0.7.0,>=0.6.0 (from rdflib)
  Obtaining dependency information for isodate<0.7.0,>=0.6.0 from https://files.pythonhosted.org/packages/b6/85/7882d311924cbcfc70b1890780763e36ff0b140c7e51c110fc59a532f087/isodate-0.6.1-py2.py3-none-any.whl.metadata
  Downloading isodate-0.6.1-py2.py3-none-any.whl.metadata (9.6 kB)
Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)
   ---------------------------------------- 0.0/531.9 kB ? eta -:--:--
   -- ------------------------------------ 30.7/531.9 kB 660.6 kB/s eta 0:00:01
   ------ --------------------------------- 92.2/531.9 kB 1.3 MB/s eta 0:00:01
   ----------------- ---------------------- 235.5/531.9 kB 2.0 MB/s eta 0:00:01
   -----------------------------

In [7]:
from neo4j import GraphDatabase
from rdflib import Graph, URIRef, Literal, Namespace, BNode
from rdflib.namespace import RDF, RDFS, OWL, XSD
import re

In [8]:
uri = "bolt://localhost:7687"  
user = "neo4j"  
password = "12345678"  
driver = GraphDatabase.driver(uri, auth=(user, password))

In [46]:
def fetch_data(session):
    query ="""
        MATCH (sr:SafetyReport)-[:HAS_PATIENT]->(p:Patient)
        OPTIONAL MATCH (p)-[:EXPERIENCED]->(ae:AdverseEvent)
        OPTIONAL MATCH (p)-[:TOOK]->(d:Drug)
        RETURN sr.safetyreportid AS reportId, p.id AS patientId, 
               collect(DISTINCT d.name) AS drugNames, collect(DISTINCT ae.name) AS aeNames,
               collect(DISTINCT d.activeSubstanceName) AS activeSubstances
        LIMIT 100
    """
    result = session.run(query)
    return [record.data() for record in result]

g = Graph()
n = Namespace("http://Koncordant.org/CIS890/FDA/")

def sanitize_for_uri(name):
    name = name.replace(' ', '_')
    name = re.sub(r"[^a-zA-Z0-9_-]", '', name)
    return name

for cls in ["SafetyReport", "Patient", "Drug", "AdverseEvent"]:
    g.add((URIRef(n[cls]), RDF.type, RDFS.Class))
    
# Define Object Properties
is_partOf_causing = URIRef(n["is_partOf_causing"])
has_patient = URIRef(n["has_patient"])
took = URIRef(n["took"])
has_reported = URIRef(n["has_reported"])

# Add the object properties to the graph
g.add((is_partOf_causing, RDF.type, OWL.ObjectProperty))
g.add((has_patient, RDF.type, OWL.ObjectProperty))
g.add((took, RDF.type, OWL.ObjectProperty))
g.add((has_reported, RDF.type, OWL.ObjectProperty))

# Define the data property
has_activesubstance = URIRef(n["has_activesubstance"])
g.add((has_activesubstance, RDF.type, OWL.DatatypeProperty))


safetyReportClass = URIRef(n["SafetyReport"])
patientClass = URIRef(n["Patient"])
drugClass = URIRef(n["Drug"])
adverseEventClass = URIRef(n["AdverseEvent"])

# Adding domain and range 
g.add((has_patient, RDFS.domain, safetyReportClass))
g.add((has_patient, RDFS.range, patientClass))

g.add((took, RDFS.domain, patientClass)) 
g.add((took, RDFS.range, drugClass))

g.add((has_reported, RDFS.domain, patientClass))
g.add((has_reported, RDFS.range, adverseEventClass))

g.add((has_activesubstance, RDFS.domain, drugClass))  
g.add((has_activesubstance, RDFS.range, XSD.string))

g.add((is_partOf_causing, RDFS.domain, drugClass))
g.add((is_partOf_causing, RDFS.range, adverseEventClass))


with driver.session() as session:
    data = fetch_data(session)
    
for d in data:
    # SafetyReport subclasses
    sr_class_uri = URIRef(n[f"{d['reportId']}"])
    g.add((sr_class_uri, RDF.type, RDFS.Class))
    g.add((sr_class_uri, RDFS.subClassOf, URIRef(n["SafetyReport"])))
    
    # Patient subclasses
    p_class_uri = URIRef(n[f"{d['patientId']}"])
    g.add((p_class_uri, RDF.type, RDFS.Class))
    g.add((p_class_uri, RDFS.subClassOf, URIRef(n["Patient"])))
    
    restriction_uri = BNode()
    g.add((restriction_uri, RDF.type, OWL.Restriction))
    g.add((restriction_uri, OWL.onProperty, has_patient))
    g.add((restriction_uri, OWL.allValuesFrom, p_class_uri))
    
    g.add((sr_class_uri, RDFS.subClassOf, restriction_uri))
    
    
    # Drug subclasses
    for drug_name, active_substance in zip(d['drugNames'], d['activeSubstances']):
        sanitized_drug_name = sanitize_for_uri(drug_name)
        drug_class_uri = URIRef(n[sanitized_drug_name])
        g.add((drug_class_uri, RDF.type, RDFS.Class))
        g.add((drug_class_uri, RDFS.subClassOf, URIRef(n["Drug"])))
        
        restriction = BNode()  # Anonymous node for the restriction
        g.add((restriction, RDF.type, OWL.Restriction))
        g.add((restriction, OWL.onProperty, has_activesubstance))
        g.add((restriction, OWL.someValuesFrom, XSD.string))

        # Apply the restriction to the Drug class
        g.add((drug_class_uri, RDFS.subClassOf, restriction))

        g.add((drug_class_uri, has_activesubstance, Literal(active_substance)))
        
        
        took_restriction_uri = BNode()  # Blank node for the restriction
        g.add((took_restriction_uri, RDF.type, OWL.Restriction))
        g.add((took_restriction_uri, OWL.onProperty, took))
        g.add((took_restriction_uri, OWL.someValuesFrom, drug_class_uri))
        
        # Apply the restriction to the specific patient subclass
        g.add((p_class_uri, RDFS.subClassOf, took_restriction_uri))
    
        # AdverseEvent subclasses
        for ae_name in d['aeNames']:
            sanitized_ae_name = sanitize_for_uri(ae_name)
            ae_class_uri = URIRef(n[sanitized_ae_name])
            
            if (ae_class_uri, RDF.type, RDFS.Class) not in g:
                g.add((ae_class_uri, RDF.type, RDFS.Class))
                g.add((ae_class_uri, RDFS.subClassOf, URIRef(n["AdverseEvent"])))

            has_reported_restriction_uri = BNode()
            g.add((has_reported_restriction_uri, RDF.type, OWL.Restriction))
            g.add((has_reported_restriction_uri, OWL.onProperty, has_reported))
            g.add((has_reported_restriction_uri, OWL.someValuesFrom, ae_class_uri))

            # Apply the restriction to the specific patient subclass
            g.add((p_class_uri, RDFS.subClassOf, has_reported_restriction_uri))

            is_partOf_causing_restriction_uri = BNode()
            g.add((is_partOf_causing_restriction_uri, RDF.type, OWL.Restriction))
            g.add((is_partOf_causing_restriction_uri, OWL.onProperty, is_partOf_causing))
            g.add((is_partOf_causing_restriction_uri, OWL.someValuesFrom, ae_class_uri))

            # Apply the restriction to the specific patient subclass
            g.add((drug_class_uri, RDFS.subClassOf, is_partOf_causing_restriction_uri))

In [47]:
g.serialize(destination="ontology_with_subclasses2.owl", format="xml")

print("Ontology with subclasses created and saved to ontology_with_subclasses.owl.")

Ontology with subclasses created and saved to ontology_with_subclasses.owl.
