In [1]:
import pandas as pd
import os
import numpy as np
from datetime import datetime


In [2]:
def dataframe_from_csv(path, header= 0, index_col = 0):
    return pd.read_csv(path, header = header, index_col = index_col)

def read_table(path, file):
    all_rows = dataframe_from_csv(os.path.join(path, file), 
                                  header = 0, index_col= None)
    return all_rows

all_rows = read_table('./', 'invasive_ventilation.csv')


all_patients_with_dod = read_table('./', 'patients_with_dod.csv')

all_patients_with_dod = all_patients_with_dod[['subject_id', 'dod']]

print(all_rows[0:6])
print(all_patients_with_dod[:5])




   subject_id   hadm_id   stay_id            starttime              endtime  \
0    13859862  25015072  32980067  2152-04-07 01:50:00  2152-04-10 18:57:00   
1    18917458  28038802  30704530  2185-12-15 21:11:00  2185-12-20 09:59:00   
2    19704964  21790335  37623730  2140-03-10 04:44:00  2140-03-11 13:44:00   
3    19004463  24357541  39101627  2177-01-14 02:35:00  2177-02-10 14:00:00   
4    19350792  23255460  36419473  2172-11-17 23:53:00  2172-11-20 08:06:00   
5    11538389  22188993  37455091  2145-10-29 19:00:00  2145-11-04 18:40:00   

                 storetime  itemid                 label       category  \
0      2152-04-10 19:07:00  225792  Invasive Ventilation  2-Ventilation   
1      2185-12-21 08:31:00  225792  Invasive Ventilation  2-Ventilation   
2  2140-03-11 13:44:47.817  225792  Invasive Ventilation  2-Ventilation   
3      2177-02-11 13:33:00  225792  Invasive Ventilation  2-Ventilation   
4  2172-11-20 08:06:46.433  225792  Invasive Ventilation  2-Ventilation

In [3]:
def create_subject_dod_map(all_patients_with_dod):
    subject_id_2_dod = {}
    for index, row in all_patients_with_dod.iterrows():
        subject_id = row['subject_id']
        dod = datetime.fromisoformat(row['dod'])
        subject_id_2_dod[subject_id] = dod
        
    return subject_id_2_dod
        
subject_id_2_dod = create_subject_dod_map(all_patients_with_dod)


In [4]:
'''
stay_map structure:
{
   stay_id: [
       [invasive ventilation 1],
       [invasive ventilation 2],
       [invasive ventilation 3]
   ]
}

invasive ventilation structure: starttime, endtime, duration(in min)

example:
{
19321668-32652025   // subject_id-stay_id
[
[Timestamp('2144-03-10 17:40:00'), Timestamp('2144-03-16 13:10:00'), 8370.0], 
[Timestamp('2144-03-17 14:30:00'), Timestamp('2144-03-18 16:00:00'), 1530.0], 
[Timestamp('2144-03-18 20:31:00'), Timestamp('2144-03-24 19:09:00'), 8558.0], 
[Timestamp('2144-03-21 00:00:00'), Timestamp('2144-03-24 19:00:00'), 5460.0]]

}
'''
def split_rows(all_rows):
    stay_map = {}
    for index, row in all_rows.iterrows():
        stay_id = str(row['stay_id'])
        subject_id = str(row['subject_id'])
        key = subject_id + '-' + stay_id
        if key is not None and key not in stay_map.keys():
            stay_map[key] = []
        
        starttime = pd.to_datetime(row['starttime'])
        endtime = pd.to_datetime(row['endtime'])
        itemid = row['itemid']
        value = row['value']
        valueuom = row['valueuom'] 
        if itemid == 225792: # Invasive Ventilation
            if value == None:
                continue
            if valueuom == 'hour':
                value = value * 60
            if valueuom == 'day':
                value = value * 60 * 24
            stay_info = [starttime, endtime, value]
            stay_map[key].append(stay_info)
        
    return stay_map
        
stay_map = split_rows(all_rows)


count_0 = 0
count_1 = 0
count_2 = 0
count_3 = 0
for key, value in stay_map.items():
    if len(value) == 0:
        count_0 = count_0 + 1
    elif len(value) == 1:
        count_1 = count_1 + 1
    elif len(value) == 2:
        count_2 = count_2 + 1
    else:
        count_3 = count_3 + 1
        
print(count_0, count_1, count_2, count_3, count_1 + count_2 + count_3)

0 26591 2001 527 29119


In [5]:
def process_mv(stay_map):
    duplication_info = []
    re_intubation_info = []
    abnormal_info = []
    total_inv_count = 0
    temp = {}
    all_invasive_ventilation_info = {}
      
    iv_info = {}
    new_stay_map = {}
    for key, value in stay_map.items():     
        # dedeup
        value = sorted(value, key = lambda x: x[0])    
        dedup_value = []
        for i in range(0, len(value)):
            if len(dedup_value) == 0:
                dedup_value.append(value[i])
            else:
                if dedup_value[-1][0] == value[i][0] and dedup_value[-1][1] == value[i][1] and dedup_value[-1][2] == value[i][2]:
                    duplication_info.append([key, i])
                else:
                    dedup_value.append(value[i])
             
        # merge the overlapped intervals
        merged_value = []            
        for i in range(0, len(dedup_value)):
            if len(merged_value) == 0:
                merged_value.append(dedup_value[i])
            else:
                if (merged_value[-1][1] < dedup_value[i][0]):
                    # there is no overlap
                    merged_value.append(dedup_value[i])
                else:
                    new_start = merged_value[-1][0] if merged_value[-1][0] <= dedup_value[i][0] else dedup_value[i][0]
                    new_end = merged_value[-1][1] if merged_value[-1][1] <= dedup_value[i][1] else dedup_value[i][1]
                    new_diff = (new_end - new_start) / np.timedelta64(1,'m')
                    
                    merged_value[-1] = [new_start, new_end, new_diff]
                    

        iv_info[key] = merged_value
        
        
    return iv_info

clean_iv_info = process_mv(stay_map)



In [6]:
def filter_mv(clean_iv_info, subject_id_2_dod):
    filtered_iv_info = {}
    threshold_iv = 24 * 60
    for key, value in clean_iv_info.items():
        
        # filter the patient whose invasive ventilation less than 24 hours
        total_iv_time_min = 0
        
        for i in range(0, len(value)):
            total_iv_time_min = total_iv_time_min + value[i][2]
            
        if total_iv_time_min < threshold_iv:
            # skip this record
            continue
        
        # Exclude all patients who died during their initial session on the ventilator
        subject_id = int(key.split("-")[0])
        
        if subject_id in subject_id_2_dod.keys():
            dod = subject_id_2_dod[subject_id]
            #print(subject_id, "in dod", dod)
            if dod < value[0][1]:
                #print("dod", dod, "first ventilation", value[0][1])
                continue
            
        filtered_iv_info[key] = value
    
    return filtered_iv_info
        
        
filtered_iv_info = filter_mv(clean_iv_info, subject_id_2_dod)

print(len(filtered_iv_info.keys()))
    
    
    

12125


In [7]:

def analyze_iv(filtered_iv_info, threshold_in_hour = 24):
    re_intubation_info = []
    abnormal_info = []
    total_inv_count = 0
    threshold_in_min = threshold_in_hour * 60
    
    for key, value in filtered_iv_info.items():     
        total_inv_count = total_inv_count + len(value)
        for i in range(1, len(value)):
            diff_in_min = (value[i][0] - value[i - 1][1]) / np.timedelta64(1,'m')
            if diff_in_min < 0:
                print(key, i, diff_in_min)
            if diff_in_min > 0 and diff_in_min < threshold_in_min:
                re_intubation_info.append([key, i])
        
        
        
    print("------------------------------------------------------------------------------------")
    print("count of re-intubation within", threshold_in_hour, "hours: ", len(re_intubation_info))
    print("total invasive ventilation count: ",total_inv_count)
    print("extubation failure rate:", "{0:.2%}".format(len(re_intubation_info)/total_inv_count))
    print("------------------------------------------------------------------------------------")
    



analyze_iv(filtered_iv_info, 24)
analyze_iv(filtered_iv_info, 48)
analyze_iv(filtered_iv_info, 72)



------------------------------------------------------------------------------------
count of re-intubation within 24 hours:  1354
total invasive ventilation count:  14931
extubation failure rate: 9.07%
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
count of re-intubation within 48 hours:  1905
total invasive ventilation count:  14931
extubation failure rate: 12.76%
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
count of re-intubation within 72 hours:  2213
total invasive ventilation count:  14931
extubation failure rate: 14.82%
------------------------------------------------------------------------------------


In [28]:
# def write_all_invasive_ventilation_info(info):
   
#     rows = []
#     for key, value in info.items(): 
#         for v in value:
#             new_row = [key, v[0], v[1], v[2]]
#             rows.append(new_row)
# #     print(rows)
    
#     column_names = ["stay_id", "starttime", "endtime", "value"]
#     df = pd.DataFrame(rows, columns = column_names)
#     print(df)
#     df.to_csv('./merged_invasive_ventilation.csv')
    
# write_all_invasive_ventilation_info(all_invasive_ventilation_info)


# def write_dedup_info(temp):
   
#     rows = []
#     for key, value in temp.items(): 
#         for v in value:
#             new_row = [key, v[0], v[1], v[2]]
#             rows.append(new_row)
# #     print(rows)
    
#     column_names = ["stay_id", "starttime", "endtime", "value"]
#     df = pd.DataFrame(rows, columns = column_names)
#     print(df)
#     df.to_csv('./dedup.csv')

# write_dedup_info(temp)
    