In [1]:
from neo4j import GraphDatabase
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import re
import datetime 
import numpy as np

In [2]:
events = pd.read_csv("patients.csv")

In [3]:
events

Unnamed: 0,patient_id,event_id,start_date,end_date,event_type,opcs4,opcs4_desc,icd10,icd10_desc,test_type,notes
0,patient_001,event_001,10/10/2021 12:30,10/10/2021 12:30,test,,,,,FIT,
1,patient_001,event_002,15/10/2021 10:00,15/10/2021 10:00,referral,,,,,,
2,patient_001,event_003,15/10/2021 10:00,15/10/2021 10:00,diagnosis,,,C183,Malignant neoplasm of colon: Hepatic flexure,,
3,patient_001,event_004,30/10/2021 12:00,30/10/2021 12:00,procedure,H083,Transverse colectomy and anastomosis NEC,,,,surgery
4,patient_001,event_005,03/11/2021 13:15,03/11/2021 13:15,procedure,X352,Intraveneous chemotherapy,,,,chemo
5,patient_001,event_006,05/11/2021 14:00,05/11/2021 14:00,procedure,X352,Intraveneous chemotherapy,,,,chemo
6,patient_001,event_007,01/12/2021 14:00,01/12/2021 14:00,procedure,X352,Intraveneous chemotherapy,,,,chemo
7,patient_002,event_008,01/01/20001 12:00,01/01/2001 12:00,test,,,,,FIT,
8,patient_002,event_009,01/01/20001 12:01,02/01/2001 12:00,referral,,,,,,
9,patient_002,event_010,01/01/20001 12:02,03/01/2001 12:00,diagnosis,,,C20,Malignan neoplasm of rectum,,


In [4]:
hasEvent = pd.DataFrame({ 'subject':events['patient_id'],
                          'predicate':"hasEvent",
                          'object':events['event_id']})
hasEvent = hasEvent.drop_duplicates()
hasEvent.head()

hasDiagnosis = pd.DataFrame({ 'subject':events['event_id'],
                          'predicate':"hasDiagnosis",
                          'object':events['icd10']})
hasDiagnosis = hasDiagnosis.drop_duplicates()
hasDiagnosis.head()

ofType = pd.DataFrame({ 'subject':events['event_id'],
                          'predicate':"ofType",
                          'object':events['opcs4']})
ofType = ofType.drop_duplicates()
ofType.head()

Unnamed: 0,subject,predicate,object
0,patient_001,hasEvent,event_001
1,patient_001,hasEvent,event_002
2,patient_001,hasEvent,event_003
3,patient_001,hasEvent,event_004
4,patient_001,hasEvent,event_005


Unnamed: 0,subject,predicate,object
0,event_001,hasDiagnosis,
1,event_002,hasDiagnosis,
2,event_003,hasDiagnosis,C183
3,event_004,hasDiagnosis,
4,event_005,hasDiagnosis,


Unnamed: 0,subject,predicate,object
0,event_001,ofType,
1,event_002,ofType,
2,event_003,ofType,
3,event_004,ofType,H083
4,event_005,ofType,X352


In [5]:
OPCS_DICT = {
    'A':'A Nervous System',
    'B':'B Endocrine System and Breast',
    'C':'C Eye',
    'D':'E Ear',
    'E':'F Respiratory Tract',
    'G':'G Upper Digestive Tract',
    'H':'H Lower Digestive Tract',
    'J':'J Other Abdominal Organs – Principally Digestive',
    'K':'K Heart',
    'L':'L Arteries and Veins',
    'M':'M Urinary', 
    'N':'N Male Genital Organs',
    'P':'P Lower Female Genital Tract',
    'Q':'Q Upper Female Genital Tract',
    'R':'R Female Genital Tract Associated with Pregnancy, Childbirth and Puerperium',
    'S':'S Skin',
    'T':'T Soft Tissue',
    'U':'U Diagnostic Imaging, Testing and Rehabilitation',
    'V':'V Bones and Joints of Skull and Spine',
    'W':'W Other Bones and Joints',
    'X':'X Miscellaneous Operations',
    'Y':'Y Subsidiary Classification of Methods of Operation',
    'Z':'Z Subsidiary Classification of Sites of Operation'
}

def createOPCSTriples(df):
    df = df.loc[ ~df.procedure_position.isna() ]

    codeInCategory = []
    for level_3 in df['procedure_code_opcs'].unique():
        try:
            level_1 = OPCS_DICT[level_3[0]] # First character = 1st level
            level_2 = level_3[0:3]          #First 3 characters = 2nd level

            codeInCategory.append([level_3,'inCategory',level_2])
            codeInCategory.append([level_2,'inCategory','procedures_'+level_1])
        except:
            # do nothing if the code isn't found in the dictionary
            pass

    codeInCategory = pd.DataFrame(columns=['subject','predicate','object'], data=codeInCategory)
    return codeInCategory

events['procedure_code_opcs'] = events['opcs4']
events['procedure_position'] = 1
ontologyTriples = createOPCSTriples(events)
ontologyTriples.head()

Unnamed: 0,subject,predicate,object
0,H083,inCategory,H08
1,H08,inCategory,procedures_H Lower Digestive Tract
2,X352,inCategory,X35
3,X35,inCategory,procedures_X Miscellaneous Operations
4,X701,inCategory,X70


In [6]:
opcs_cats = ontologyTriples[ ontologyTriples['object'].str.startswith('procedures_')]['object'] #Get highest level OPCS codes

ontologyRoot = pd.DataFrame({ 'subject':opcs_cats,
                          'predicate':"ofType",
                          'object':'procedures'} )
ontologyTriples = ontologyTriples.append(ontologyRoot)

ontologyTriples['object'] = ontologyTriples['object'].str.replace("[^0-9a-zA-Z]+", "_")
ontologyTriples['subject'] = ontologyTriples['subject'].str.replace("[^0-9a-zA-Z]+", "_")

ontologyTriples


  ontologyTriples['object'] = ontologyTriples['object'].str.replace("[^0-9a-zA-Z]+", "_")
  ontologyTriples['subject'] = ontologyTriples['subject'].str.replace("[^0-9a-zA-Z]+", "_")


Unnamed: 0,subject,predicate,object
0,H083,inCategory,H08
1,H08,inCategory,procedures_H_Lower_Digestive_Tract
2,X352,inCategory,X35
3,X35,inCategory,procedures_X_Miscellaneous_Operations
4,X701,inCategory,X70
5,X70,inCategory,procedures_X_Miscellaneous_Operations
6,H073,inCategory,H07
7,H07,inCategory,procedures_H_Lower_Digestive_Tract
8,H333,inCategory,H33
9,H33,inCategory,procedures_H_Lower_Digestive_Tract


In [7]:
uri             = "neo4j+s://1069e392.databases.neo4j.io"

userName        = "neo4j"

password        = "C315QwpDKsTlYkp1vC78I0kZTvO5x8RuHcHPhyUla7Y"

graphDB_Driver  = GraphDatabase.driver(uri, auth=(userName, password)) 

In [8]:

def getPrecedesTriples(all_events):
    
    all_patients = all_events['patient_id'].unique()
    eventPrecedesEvent = pd.DataFrame({ 'subject':[], 'predicate':[], 'object':[]})

    for p in all_patients:
        #patient_events = all_events[ (all_events['patient_id']==p) & (~all_events['procedure_code_opcs'].isna()) ]
        #patient_codes = patient_events['procedure_code_opcs'].tolist()

        patient_events = all_events[ (all_events['patient_id']==p) ]

        triples = pd.DataFrame({ 'subject':patient_events['event_id'].astype(str),
                             'predicate':"precedes",
                             'object':patient_events['event_id'].astype(str) })

        triples['object'] = triples['object'].shift(-1)
        triples = triples.drop(triples.tail(1).index) 

        eventPrecedesEvent = eventPrecedesEvent.append(triples)
    return eventPrecedesEvent

sequenceTriples = getPrecedesTriples(events)
sequenceTriples.head()

Unnamed: 0,subject,predicate,object
0,event_001,precedes,event_002
1,event_002,precedes,event_003
2,event_003,precedes,event_004
3,event_004,precedes,event_005
4,event_005,precedes,event_006


In [9]:


def addTriples(triple_df, subject_type, object_type  ):
    with graphDB_Driver.session() as graphDB_Session:
        for index,row in triple_df.iterrows():
            a = """MERGE (%(subject_val)s:%(subject_type)s {name:"%(subject_val)s"}) """  % {'subject_val':  row['subject'],
           'subject_type': subject_type}

            b = """MERGE (%(object_val)s:%(object_type)s {name:"%(object_val)s"}) """% {'object_val':  row['object'],
                               'object_type': object_type}

            c = """ MATCH (a:%(subject_type)s),(b:%(object_type)s) 
                    WHERE a.name = "%(subject_val)s" 
                      AND b.name = "%(object_val)s"
                    MERGE (a)
                        -[r:%(relationship)s]->(b) 
                    RETURN type(r)
                """% {'subject_val':  row['subject'],
                   'object_val': row['object'],
                   'subject_type':subject_type,
                   'object_type': object_type,
                   'relationship': row['predicate']}
            print(a)
            graphDB_Session.run(a)
            print(b)
            graphDB_Session.run(b)
            print(c)
            graphDB_Session.run(c)

            
            #print(a)
            #print(b)


In [10]:
addTriples(hasEvent, 'patient', 'event')


hasDiagnosis = hasDiagnosis.dropna()
addTriples(hasDiagnosis, 'event', 'diagnosis')

ofType = ofType.dropna()
addTriples(ofType, 'event', 'opcs4')

addTriples(ontologyTriples, 'opcs4', 'opcs4')
addTriples(sequenceTriples, 'event', 'event')

MERGE (patient_001:patient {name:"patient_001"}) 
MERGE (event_001:event {name:"event_001"}) 
 MATCH (a:patient),(b:event) 
                    WHERE a.name = "patient_001" 
                      AND b.name = "event_001"
                    MERGE (a)
                        -[r:hasEvent]->(b) 
                    RETURN type(r)
                
MERGE (patient_001:patient {name:"patient_001"}) 
MERGE (event_002:event {name:"event_002"}) 
 MATCH (a:patient),(b:event) 
                    WHERE a.name = "patient_001" 
                      AND b.name = "event_002"
                    MERGE (a)
                        -[r:hasEvent]->(b) 
                    RETURN type(r)
                
MERGE (patient_001:patient {name:"patient_001"}) 
MERGE (event_003:event {name:"event_003"}) 
 MATCH (a:patient),(b:event) 
                    WHERE a.name = "patient_001" 
                      AND b.name = "event_003"
                    MERGE (a)
                        -[r:hasEvent]->(b) 
           

In [None]:
#MATCH (n) DETACH DELETE n
# wipe neo4j datanase