In [9]:
import csv
import os
import pickle
import sys
import time

import sklearn.model_selection as ms
import torch
from torch.utils.data import TensorDataset
from tqdm import tqdm

In [1]:
import pyhealth
from pyhealth.data import Event, Visit, Patient

import numpy as np
np.random.seed(1234)

In [2]:
from pyhealth.datasets import eICUDataset
dataset_const = eICUDataset(
    root='../eicu_csv',
    tables=["diagnosis", "treatment", "admissionDx"],
    refresh_cache = False
)

dataset = eICUDataset(
    root='../eicu_csv',
    tables=["diagnosis", "treatment", "admissionDx"],
    refresh_cache = False
)

In [3]:
dataset.stat()
dataset.info()


Statistics of base dataset (dev=False):
	- Dataset: eICUDataset
	- Number of patients: 166355
	- Number of visits: 200859
	- Number of visits per patient: 1.2074
	- Number of events per visit in diagnosis: 22.6781
	- Number of events per visit in treatment: 18.3648
	- Number of events per visit in admissionDx: 3.1209


dataset.patients: patient_id -> <Patient>

<Patient>
    - visits: visit_id -> <Visit> 
    - other patient-level info
    
    <Visit>
        - event_list_dict: table_name -> List[Event]
        - other visit-level info
    
        <Event>
            - code: str
            - other event-level info



In [4]:
dataset.patients.values()

dict_values([Patient 002-10009+193705 with 1 visits, Patient 002-10018+178200 with 1 visits, Patient 002-10034+141169 with 1 visits, Patient 002-10050+183274 with 1 visits, Patient 002-10050+190893 with 2 visits, Patient 002-10052+137239 with 1 visits, Patient 002-10063+189145 with 1 visits, Patient 002-10066+185872 with 1 visits, Patient 002-10067+168546 with 1 visits, Patient 002-1007+178462 with 1 visits, Patient 002-10076+187781 with 2 visits, Patient 002-10079+136669 with 1 visits, Patient 002-10086+153868 with 1 visits, Patient 002-10094+196194 with 2 visits, Patient 002-1010+154941 with 1 visits, Patient 002-1012+162659 with 1 visits, Patient 002-10122+140376 with 1 visits, Patient 002-10129+188447 with 2 visits, Patient 002-10145+142615 with 2 visits, Patient 002-10145+180353 with 1 visits, Patient 002-10148+172762 with 5 visits, Patient 002-1015+176710 with 1 visits, Patient 002-10156+169654 with 1 visits, Patient 002-10157+145878 with 2 visits, Patient 002-10157+152760 with 1

In [5]:
patient_id = '035-957+2742991'
patient = dataset.patients[patient_id]
visits = dataset.patients[patient_id].visits
visits

OrderedDict([('3353127',
              Visit 3353127 from patient 035-957+2742991 with 17 events from tables ['diagnosis', 'treatment', 'admissionDx'])])

In [6]:
visit_id = '3353127'
dir(visits[visit_id])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'add_event',
 'attr_dict',
 'available_tables',
 'discharge_status',
 'discharge_time',
 'encounter_time',
 'event_list_dict',
 'get_code_list',
 'get_event_list',
 'num_events',
 'patient_id',
 'set_event_list',
 'visit_id']

In [7]:
visit = visits[visit_id]
print("### Accessing the diagnosis events ###")
print(visit.get_event_list('diagnosis'))
visit.get_code_list('diagnosis')

print("### Accessing the admissionDx events ###")
print(visit.get_event_list('admissionDx'))

print("### Accessing the treatment events ###")
print(visit.get_event_list('treatment'))

### Accessing the diagnosis events ###
[Event with ICD9CM code 518.81 from table diagnosis, Event with ICD10CM code J96.00 from table diagnosis, Event with ICD9CM code 518.83 from table diagnosis, Event with ICD10CM code J96.10 from table diagnosis, Event with ICD9CM code 584.9 from table diagnosis, Event with ICD10CM code N17.9 from table diagnosis, Event with ICD9CM code 518.81 from table diagnosis, Event with ICD10CM code J96.00 from table diagnosis, Event with ICD9CM code 518.83 from table diagnosis, Event with ICD10CM code J96.10 from table diagnosis, Event with ICD9CM code 584.9 from table diagnosis, Event with ICD10CM code N17.9 from table diagnosis]
### Accessing the admissionDx events ###
[Event with eICU_ADMITDXPATH code admission diagnosis|Was the patient admitted from the O.R. or went to the O.R. within 4 hours of admission?|No from table admissionDx, Event with eICU_ADMITDXPATH code admission diagnosis|Non-operative Organ Systems|Organ System|Respiratory from table admissi

In [8]:
# Dropping patient with less than 24 hours duration minute
# should be stated in the data entry 'unitdischargeoffset'
# aka visit.discharge_time - visit.encounter_time
def process_patient(ds, hour_threshold=24):
    dataset_processed = ds
    encounter_processed_count = 0
    encounter_deleted_count = 0

    for patient_id, patient in ds.patients.items():
        visits = patient.visits.copy()
        for visit_id, visit in visits.items():
            encounter_processed_count += 1
            if (visit.discharge_time - visit.encounter_time) < np.timedelta64(hour_threshold, 'h'):
                # print("Dropping patient {} visit {} due to less than {} hours duration".format(patient_id, visit_id, hour_threshold))
                encounter_deleted_count += 1
                del dataset_processed.patients[patient_id].visits[visit_id]

    print("Processed {} encounters, deleted {} encounters".format(encounter_processed_count, encounter_deleted_count))
    return dataset_processed

dataset_processed = process_patient(dataset)


Processed 200859 encounters, deleted 67959 encounters
