In [1]:
import os 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# load in the csv files as dataframes
BASE_DIR = os.path.expanduser("~/Fluid-Solutions-ML/data/raw/")
items_df = pd.read_csv(os.path.join(BASE_DIR, "d_items.csv"))
chart_df = pd.read_csv(os.path.join(BASE_DIR, "chartevents.csv"))
fluid_df = pd.read_csv(os.path.join(BASE_DIR, "inputevents.csv"))

In [None]:
# extract the specific vitals/items we want from the items dataframe
features = ['central venous pressure', 'mean arterial pressure', 'spo2', 'ppv', 'blood pressure', 'heart rate', 'cvp', 'map']

# apply a mask on the items df that looks at each row of the dataframe and finds something that matches the features
mask = items_df.apply(lambda row: any(feature in str(value).lower() for value in row for feature in features), axis=1)
filtered_items_df = items_df[mask].drop_duplicates(subset=['itemid'])

# filtered_items_df

In [None]:
fluid_events = [""]

In [4]:
# get the ids for each of the items in the filtered dataframe
filtered_items_ids = list(filtered_items_df['itemid'])

In [5]:
# get the chart events (vitals that were taken) for each of the items that we found above. 
# The items found above represent the vitals itself and the chart events represent the actual data that was taken
# also remove any items that are NaN
vitals_events = chart_df[
    (chart_df['itemid'].isin(filtered_items_ids)) &
    (~chart_df['valuenum'].isna())
]

# vitals_events

In [2]:
# get a list of fluid input events (specified by the ml) that have a value != NaN 
fluid_events = fluid_df[
    (~fluid_df['amount'].isna()) & 
    (fluid_df['amountuom'].astype(str) == "ml")
]

# fluid_events
# len(list(fluid_events['starttime'].unique()))
final_fluid_events = fluid_events[
    fluid_events['statusdescription'].astype(str) == 'FinishedRunning'
]
# len(final_fluid_events)
# len(final_fluid_events['starttime'].unique())
fluid_events

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,starttime,endtime,storetime,itemid,amount,amountuom,...,ordercomponenttypedescription,ordercategorydescription,patientweight,totalamount,totalamountuom,isopenbag,continueinnextdept,statusdescription,originalamount,originalrate
8,10005817,20626031,32604416,6770,2132-12-16 06:23:00,2132-12-17 00:37:00,2132-12-17 00:16:00,220949,91.233010,ml,...,Main order parameter,Continuous IV,91.0,250.0,ml,0,0,FinishedRunning,91.233002,5.003638
9,10005817,20626031,32604416,92805,2132-12-15 18:58:00,2132-12-15 19:12:00,2132-12-15 18:56:00,220949,9.543986,ml,...,Mixed solution,Continuous Med,91.0,250.0,ml,0,0,ChangeDose/Rate,214.739685,40.946400
10,10005817,20626031,32604416,20310,2132-12-17 09:15:00,2132-12-17 09:16:00,2132-12-17 09:28:00,220949,500.000000,ml,...,Mixed solution,Drug Push,91.0,500.0,ml,0,0,FinishedRunning,500.000000,0.000000
11,10005817,20626031,32604416,92805,2132-12-15 16:07:00,2132-12-15 16:20:00,2132-12-15 16:30:00,220949,4.437870,ml,...,Mixed solution,Continuous Med,91.0,250.0,ml,0,0,ChangeDose/Rate,248.520706,20.473200
12,10005817,20626031,32604416,12929,2132-12-15 14:34:00,2132-12-16 06:23:00,2132-12-15 14:34:00,220949,158.166671,ml,...,Main order parameter,Continuous IV,91.0,250.0,ml,0,0,ChangeDose/Rate,250.000000,10.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20394,10019003,29279905,34107647,68979,2153-03-28 21:11:00,2153-03-28 21:58:00,2153-03-28 23:22:00,227522,50.000000,ml,...,Main order parameter,Continuous IV,96.0,50.0,ml,0,0,FinishedRunning,50.000000,63.829784
20395,10019003,29279905,34107647,60023,2153-03-28 11:00:00,2153-03-28 12:00:00,2153-03-28 12:21:00,227522,49.999999,ml,...,Main order parameter,Continuous IV,96.0,50.0,ml,0,0,FinishedRunning,50.000000,50.000000
20396,10019003,29279905,34107647,83144,2153-03-30 06:38:00,2153-03-30 07:38:00,2153-03-30 06:38:00,227523,49.999999,ml,...,Main order parameter,Continuous IV,96.0,50.0,ml,0,0,FinishedRunning,50.000000,50.000000
20397,10019003,29279905,34107647,8034,2153-03-31 05:35:00,2153-03-31 06:35:00,2153-03-31 06:03:00,227523,49.999999,ml,...,Main order parameter,Continuous IV,96.0,50.0,ml,0,0,FinishedRunning,50.000000,50.000000


In [7]:
# get a list of unique subject ids (patients) that have received fluid administration
unique_patients_with_fluid_admin = fluid_events['subject_id'].unique()

# unique_patients_with_fluid_admin

In [8]:
# get a list of patients who have received fluids AND are listed in the chart events
patients = vitals_events[
    vitals_events['subject_id'].isin(list(unique_patients_with_fluid_admin))
    ]
patients = patients['subject_id'].unique()

# NOTE: this list is the same as the one above, but it is just safer to do this
# patients

In [9]:
# get a list of the vitals/chart events only involving the specified patients
unique_patients_vitals_events = vitals_events[
    (vitals_events['subject_id'].isin(list(patients)))
]

# unique_patients_vitals_events

In [10]:
has_cvp = unique_patients_vitals_events[
    (unique_patients_vitals_events['itemid'].astype(str) == "220072")  |
    (unique_patients_vitals_events['itemid'].astype(str) == "220073")  |
    (unique_patients_vitals_events['itemid'].astype(str) == "220074") 
]
len(has_cvp)

1605

In [11]:
num_chart_times = len(unique_patients_vitals_events['charttime'].unique())
num_store_times = len(unique_patients_vitals_events['storetime'].unique())

print("number of unique chart times: ", num_chart_times)
print("number of unique store times: ", num_store_times)

number of unique chart times:  20366
number of unique store times:  13928


In [None]:
# TODO: attempt to clump the data into modules (data points for the ml) which will be used to train the model.
# Remeber, you have to somehow clump these by the time stamps in the data itself. These timestamps may no line up, so 
# you may have to do some sort of interpolation to get the data to line up.

# "cvp_mmHg"
# "ppv_percent"
# "heart_rate_bpm"
# "spo2_percent"
# "systolic_bp_mmHg"
# "diastolic_bp_mmHg"
# "map_mmHg"
# "label"


# filtered_items_df - contains the item ids for the features we want to use
# fluid_events - contains the actual vitals data from the filtered_items_df, all of the values are in mL
# unique_patients_with_fluid_admin - contains a unique list of patients who have received or removed fluids
# unique_patients_vitals_events - list of vitals/chart events involving only patients who have received/removed fluids


Before we do this step, we have to merge/condense/clump the vitals information from the chart df down into pairs that represent what we will train the data with. To do this we either need to clump the item ids into groups of specific vitals or choose specific item ids to query off of. 

In [None]:
# TODO: update this clumped data to include the summary statistics for each of the features. I am not sure how I want to do this yet

In [None]:
# Convert datetime safely using assign() to avoid modifying a slice
# unique_patients_vitals_events = unique_patients_vitals_events.assign(
#     storetime=pd.to_datetime(unique_patients_vitals_events['storetime'])
# )
# fluid_events = fluid_events.assign(
#     starttime=pd.to_datetime(fluid_events['starttime'])
# )

# # Sort and merge
# vitals_events_sorted_by_time = unique_patients_vitals_events.sort_values('storetime')
# fluid_events_sorted_by_time = fluid_events.sort_values('starttime')

# df_merged = pd.merge_asof(
#     vitals_events_sorted_by_time, 
#     fluid_events_sorted_by_time, 
#     left_on='storetime', 
#     right_on='starttime'
# )

# # df_merged

# columns = df_merged.columns
# columns

Index(['subject_id_x', 'hadm_id_x', 'stay_id_x', 'caregiver_id_x', 'charttime',
       'subject_id_y', 'hadm_id_y', 'stay_id_y', 'caregiver_id_y', 'starttime',
       'endtime', 'storetime_y', 'itemid_y', 'amount', 'amountuom', 'rate',
       'rateuom', 'orderid', 'linkorderid', 'ordercategoryname',
       'secondaryordercategoryname', 'ordercomponenttypedescription',
       'ordercategorydescription', 'patientweight', 'totalamount',
       'totalamountuom', 'isopenbag', 'continueinnextdept',
       'statusdescription', 'originalamount', 'originalrate'],
      dtype='object')