In [7]:
pip install neo4j pandas torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [7]:
from neo4j_connections import Neo4jConnection
import yaml
import pandas as pd 

In [9]:
class GraphFetcher(object):
    def __init__(self, configuration):
        self.configuration = configuration 

    def load_config(self):
        global config
        with open(self.configuration, 'r') as config_file:
            config = yaml.load(config_file, Loader = yaml.FullLoader)

In [13]:
configure = 'config.yaml'
graphFetcher = GraphFetcher(configure)
graphFetcher.load_config()
connection = Neo4jConnection(config)

In [20]:
patient_df = pd.read_csv('../../../PyG-Neo4j/dataset/eicu-collaborative-research-database-2.0/patient.csv')
diagnosis_df = pd.read_csv('../../../PyG-Neo4j/dataset/eicu-collaborative-research-database-2.0/diagnosis.csv')

In [23]:
patient_df.columns

Index(['patientunitstayid', 'patienthealthsystemstayid', 'gender', 'age',
       'ethnicity', 'hospitalid', 'wardid', 'apacheadmissiondx',
       'admissionheight', 'hospitaladmittime24', 'hospitaladmitoffset',
       'hospitaladmitsource', 'hospitaldischargeyear',
       'hospitaldischargetime24', 'hospitaldischargeoffset',
       'hospitaldischargelocation', 'hospitaldischargestatus', 'unittype',
       'unitadmittime24', 'unitadmitsource', 'unitvisitnumber', 'unitstaytype',
       'admissionweight', 'dischargeweight', 'unitdischargetime24',
       'unitdischargeoffset', 'unitdischargelocation', 'unitdischargestatus',
       'uniquepid'],
      dtype='object')

In [22]:
diagnosis_df.columns

Index(['diagnosisid', 'patientunitstayid', 'activeupondischarge',
       'diagnosisoffset', 'diagnosisstring', 'icd9code', 'diagnosispriority'],
      dtype='object')

In [28]:
def create_patient_nodes_neo4j(node_list):
    patients_node_query = """
    UNWIND $node_list as node
    CREATE( p: patient {
        patientunitstayid: toInteger(node.patientunitstayid),
        patienthealthsystemstayid: toInteger(node.patienthealthsystemstayid),
        gender: node.gender,
        age: node.age,
        ethnicity: node.ethnicity,
        hospitalid: toInteger(node.hospitalid),
        wardid: toInteger(node.wardid),
        apacheadmissiondx: node.apacheadmissiondx,
        admissionheight: toFloat(node.admissionheight),
        hospitaladmittime24: node.hospitaladmittime24,
        hospitaladmitoffset: toInteger(node.hospitaladmitoffset),
        hospitaladmitsource: node.hospitaladmitsource,
        hospitaldischargeyear: toInteger(node.hospitaldischargeyear),
        hospitaldischargetime24: node.hospitaldischargetime24,
        hospitaldischargeoffset: toInteger(node.hospitaldischargeoffset),
        hospitaldischargelocation: node.hospitaldischargelocation,
        hospitaldischargestatus: node.hospitaldischargestatus,
        unittype: node.unittype,
        unitadmittime24: node.unitadmittime24,
        unitadmitsource: node.unitadmitsource,
        unitvisitnumber: toInteger(node.unitvisitnumber),
        unitstaytype: node.unitstaytype,
        admissionweight: toFloat(node.admissionweight),
        dischargeweight: toFloat(node.dischargeweight),
        unitdischargetime24: node.unitdischargetime24,
        unitdischargeoffset: toInteger(node.unitdischargeoffset),
        unitdischargelocation: node.unitdischargelocation,
        unitdischargestatus: node.unitdischargestatus,
        uniquepid: node.uniquepid
    })
    """

    connection = Neo4jConnection(config)

    batch_len = 5000

    for batch_start in range(0, len(node_list), batch_len):
        batch_end = batch_start + batch_len
        records = node_list.iloc[batch_start:batch_end].to_dict("records")
        connection.query(patients_node_query, {"node_list": records})

In [29]:
create_patient_nodes_neo4j(patient_df)

In [56]:
diagnosis_df.columns

Index(['diagnosisid', 'patientunitstayid', 'activeupondischarge',
       'diagnosisoffset', 'diagnosisstring', 'icd9code', 'diagnosispriority'],
      dtype='object')

In [57]:
diagnosis_df

Unnamed: 0,diagnosisid,patientunitstayid,activeupondischarge,diagnosisoffset,diagnosisstring,icd9code,diagnosispriority
0,4222318,141168,False,72,cardiovascular|chest pain / ASHD|coronary arte...,"414.00, I25.10",Other
1,3370568,141168,True,118,cardiovascular|ventricular disorders|cardiomyo...,,Other
2,4160941,141168,False,72,pulmonary|disorders of the airways|COPD,"491.20, J44.9",Other
3,4103261,141168,True,118,pulmonary|disorders of the airways|COPD,"491.20, J44.9",Other
4,3545241,141168,True,118,cardiovascular|ventricular disorders|congestiv...,"428.0, I50.9",Other
...,...,...,...,...,...,...,...
2710667,46330138,3353251,False,11304,renal|disorder of urinary tract / renal system...,"599.0, N39.0",Other
2710668,46150971,3353251,False,4080,cardiovascular|shock / hypotension|sepsis,"038.9, A41.9",Major
2710669,46259796,3353254,True,41,renal|disorder of kidney|acute renal failure|d...,"584.9, N17.9",Major
2710670,46204273,3353254,True,41,gastrointestinal|GI bleeding / PUD|lower GI bl...,"578.9, K92.2",Primary


In [58]:
diagnosis_df

Unnamed: 0,diagnosisid,patientunitstayid,activeupondischarge,diagnosisoffset,diagnosisstring,icd9code,diagnosispriority
0,4222318,141168,False,72,cardiovascular|chest pain / ASHD|coronary arte...,"414.00, I25.10",Other
1,3370568,141168,True,118,cardiovascular|ventricular disorders|cardiomyo...,,Other
2,4160941,141168,False,72,pulmonary|disorders of the airways|COPD,"491.20, J44.9",Other
3,4103261,141168,True,118,pulmonary|disorders of the airways|COPD,"491.20, J44.9",Other
4,3545241,141168,True,118,cardiovascular|ventricular disorders|congestiv...,"428.0, I50.9",Other
...,...,...,...,...,...,...,...
2710667,46330138,3353251,False,11304,renal|disorder of urinary tract / renal system...,"599.0, N39.0",Other
2710668,46150971,3353251,False,4080,cardiovascular|shock / hypotension|sepsis,"038.9, A41.9",Major
2710669,46259796,3353254,True,41,renal|disorder of kidney|acute renal failure|d...,"584.9, N17.9",Major
2710670,46204273,3353254,True,41,gastrointestinal|GI bleeding / PUD|lower GI bl...,"578.9, K92.2",Primary


In [59]:
def create_diagnosis_nodes_neo4j(node_list):
    diagnosis_node_query = """
    UNWIND $node_list as node
    CREATE( d: diagnosis {
        diagnosisid: toInteger(node.diagnosisid),
        patientunitstayid: toInteger(node.patientunitstayid),
        activeupondischarge: node.activeupondischarge,
        diagnosisoffset: toInteger(node.diagnosisoffset),
        diagnosisstring: node.diagnosisstring,
        icd9code: node.icd9code,
        diagnosisPriority: node.diagnosisPriority
    })
    """

    connection = Neo4jConnection(config)

    batch_len = 5000

    for batch_start in range(0, len(node_list), batch_len):
        batch_end = batch_start + batch_len
        records = node_list.iloc[batch_start:batch_end].to_dict("records")
        connection.query(diagnosis_node_query, {"node_list": records})