In [1]:
import pandas as pd
import os
import numpy as np


In [2]:
def dataframe_from_csv(path, header= 0, index_col = 0):
    return pd.read_csv(path, header = header, index_col = index_col)

def read_table(path):
    all_rows = dataframe_from_csv(os.path.join(path, '3_and_more_invasive_v_with_sbt.csv'), 
                                  header = 0, index_col= None)
    return all_rows

all_rows = read_table('./')

print(all_rows[0:6])


   subject_id   hadm_id   stay_id       starttime         endtime  \
0    18756985  21715366  30006983  10/12/59 18:00   11/4/59 13:00   
1    18756985  21715366  30006983  10/12/59 20:07  10/12/59 20:08   
2    18756985  21715366  30006983   10/31/59 5:00             NaN   
3    18756985  21715366  30006983   10/31/59 5:00             NaN   
4    18756985  21715366  30006983   11/9/59 19:43   11/13/59 8:00   
5    18756985  21715366  30006983  11/18/59 21:15   11/22/59 9:29   

        storetime  itemid                       label  \
0   11/4/59 16:01  225792        Invasive Ventilation   
1   10/13/59 4:08  224385                  Intubation   
2   10/31/59 5:38  224715                 SBT Started   
3   10/31/59 5:39  224717  SBT Successfully Completed   
4  11/13/59 17:11  225792        Invasive Ventilation   
5         29:37.6  225792        Invasive Ventilation   

0            2-Ventilation  32820.0       NaN      min      NaN  
1  1-Intubation/Extubation      1.0       NaN     

In [3]:
'''
stay_map structure:
{
   stay_id: [
       [invasive ventilation 1],
       [invasive ventilation 2],
       [invasive ventilation 3]
   ]
}

invasive ventilation structure: starttime, endtime, duration(in min)

example:
{
30006983: [
[Timestamp('2059-10-12 18:00:00'), Timestamp('2059-11-04 13:00:00'), 32820.0], 
[Timestamp('2059-11-09 19:43:00'), Timestamp('2059-11-13 08:00:00'), 5057.0], 
[Timestamp('2059-11-18 21:15:00'), Timestamp('2059-11-22 09:29:00'), 5054.0]]

}
'''
def split_rows(all_rows):
    stay_map = {}
    for index, row in all_rows.iterrows():
        stay_id = row['stay_id']
        if stay_id is not None and stay_id not in stay_map.keys():
            stay_map[stay_id] = []
        
        starttime = pd.to_datetime(row['starttime'])
        endtime = pd.to_datetime(row['endtime'])
        itemid = row['itemid']
        value = row['value']
        valueuom = row['valueuom'] 
        if itemid == 225792: # Invasive Ventilation
            if value == None:
                continue
            if valueuom == 'hour':
                value = value * 60
            if valueuom == 'day':
                value = value * 60 * 24
            stay_info = [starttime, endtime, value]
            stay_map[stay_id].append(stay_info)
            
    return stay_map
        
stay_map = split_rows(all_rows)

In [8]:
def analyze_mv(stay_map, re_intub_threshold_in_hour = 6):
    duplication_info = []
    re_intubation_info = []
    abnormal_info = []
    threshold_in_min = re_intub_threshold_in_hour * 60
    total_inv_count = 0
    temp = {}
    all_invasive_ventilation_info = {}
    for key, value in stay_map.items():     
        # dedeup
        dedup_value = []
        for i in range(0, len(value)):
            if len(dedup_value) == 0:
                dedup_value.append(value[i])
            else:
                if dedup_value[-1][0] == value[i][0] and dedup_value[-1][1] == value[i][1] and dedup_value[-1][2] == value[i][2]:
                    duplication_info.append([key, i])
                else:
                    dedup_value.append(value[i])
             
        temp[key] = dedup_value
        # merge
        merged_value = []            
        for i in range(0, len(dedup_value)):
            if len(merged_value) == 0:
                merged_value.append(dedup_value[i])
            else:
                if (merged_value[-1][0] <= dedup_value[i][0] and merged_value[-1][1] >= dedup_value[i][1]) or (merged_value[-1][0] >= dedup_value[i][0] and merged_value[-1][1] <= dedup_value[i][1]):
                    print("merged: ", key, i)
                    # merge the two value
                    new_start = merged_value[-1][0] if merged_value[-1][0] <= dedup_value[i][0] else dedup_value[i][0]
                    new_end = merged_value[-1][1] if merged_value[-1][1] <= dedup_value[i][1] else dedup_value[i][1]
                    new_diff = (new_end - new_start) / np.timedelta64(1,'m')
                    
                    merged_value[-1] = [new_start, new_end, new_diff]
                    
                else:
                    merged_value.append(dedup_value[i])
#         if len(value) != len(merged_value):
#             print("key:", key,"value_len:", len(value), "dedup_len:", len(dedup_value), "merged_len:", len(merged_value))
        total_inv_count = total_inv_count + len(merged_value)
        for i in range(1, len(merged_value)):
        
            diff_in_min = (merged_value[i][0] - merged_value[i - 1][1]) / np.timedelta64(1,'m')
            if diff_in_min < 0:
                abnormal_info.append([key, i])
                #print(key, i, diff_in_min)
            if diff_in_min > 0 and diff_in_min < threshold_in_min:
                re_intubation_info.append([key, i])
        
        all_invasive_ventilation_info[key] = merged_value
    
    print("-----------------------------------")
    print("count of re-intubation within", re_intub_threshold_in_hour, "hours: ", len(re_intubation_info))
    print("total invasive ventilation count: ",total_inv_count)
    print("extubation failure rate:", len(re_intubation_info)/total_inv_count)
    print("abnormal info: ", len(abnormal_info))
    print("-----------------------------------")
    return re_intubation_info, total_inv_count, abnormal_info, all_invasive_ventilation_info, temp

_ = analyze_mv(stay_map, 6)

_ = analyze_mv(stay_map, 12)

_,_,_, all_invasive_ventilation_info, temp = analyze_mv(stay_map, 24)

# print(all_invasive_ventilation_info)


merged:  30015010 2
merged:  30244200 1
merged:  30340121 1
merged:  30373971 2
merged:  30391427 2
merged:  30551860 1
merged:  30560788 2
merged:  30613655 2
merged:  30739656 1
merged:  30949610 1
merged:  31072931 1
merged:  31141989 2
merged:  31217827 3
merged:  31217827 4
merged:  31234262 3
merged:  31272962 1
merged:  31320690 1
merged:  31320690 2
merged:  31320690 3
merged:  31320690 4
merged:  31320690 5
merged:  31612206 2
merged:  31701999 2
merged:  31732032 2
merged:  31773980 2
merged:  31801230 1
merged:  31803488 2
merged:  32080893 2
merged:  32147981 1
merged:  32285806 1
merged:  32470453 1
merged:  32652025 3
merged:  32685379 2
merged:  32697087 2
merged:  32719878 1
merged:  32905514 2
merged:  33230724 2
merged:  33547419 3
merged:  33641904 2
merged:  33976324 1
merged:  34259942 2
merged:  34664331 1
merged:  34681735 2
merged:  34711898 2
merged:  34841654 2
merged:  35102151 3
merged:  35235760 2
merged:  35315630 2
merged:  35589420 1
merged:  35611212 1


In [7]:
def write_all_invasive_ventilation_info(info):
   
    rows = []
    for key, value in info.items(): 
        for v in value:
            new_row = [key, v[0], v[1], v[2]]
            rows.append(new_row)
#     print(rows)
    
    column_names = ["stay_id", "starttime", "endtime", "value"]
    df = pd.DataFrame(rows, columns = column_names)
    print(df)
    df.to_csv('./merged_invasive_ventilation.csv')
    
write_all_invasive_ventilation_info(all_invasive_ventilation_info)


def write_dedup_info(temp):
   
    rows = []
    for key, value in temp.items(): 
        for v in value:
            new_row = [key, v[0], v[1], v[2]]
            rows.append(new_row)
#     print(rows)
    
    column_names = ["stay_id", "starttime", "endtime", "value"]
    df = pd.DataFrame(rows, columns = column_names)
    print(df)
    df.to_csv('./dedup.csv')

write_dedup_info(temp)
    

       stay_id           starttime             endtime    value
0     30006983 2059-10-12 18:00:00 2059-11-04 13:00:00  32820.0
1     30006983 2059-11-09 19:43:00 2059-11-13 08:00:00   5057.0
2     30006983 2059-11-18 21:15:00 2059-11-22 09:29:00   5054.0
3     30015010 2054-07-27 07:30:00 2054-07-27 08:16:00     46.0
4     30015010 2054-07-27 08:30:00 2054-07-27 08:32:00      2.0
...        ...                 ...                 ...      ...
1682  39965149 2047-12-11 13:00:00 2047-12-16 09:00:00   6960.0
1683  39986206 1983-06-20 00:40:00 1983-07-01 16:14:00  16774.0
1684  39986206 1983-07-04 01:54:00 1983-07-07 15:58:00   5164.0
1685  39986206 1983-07-09 15:37:00 1983-07-13 20:00:00   6023.0
1686  39986206 1983-07-16 00:00:00 1983-07-24 08:32:00  12032.0

[1687 rows x 4 columns]
       stay_id           starttime             endtime    value
0     30006983 2059-10-12 18:00:00 2059-11-04 13:00:00  32820.0
1     30006983 2059-11-09 19:43:00 2059-11-13 08:00:00   5057.0
2     30006983 