# Generate group data
- Generate group data **by hour** for three models:
    - A: vs_decision_model (doctor current decision-making): After seeing the vitalsign from the past n hours to the present, how should the doctor set the ventilator settings to ensure the final success of weaning?
        - input: baseline + current vitalsign + previous n hr vitalsign (e.g., n=1) + previous ventilator settings
		- output: current ventilator settings
		- cohort: only use label = 1 stay_id for training
    - B: vs_affect_model
        - input: baseline + current vitalsign + previous n hr vitalsign + current ventilator settings
		- output: next 1 hr vitalsign
		- cohort: all
    - C: weaning_prediction_model
        - input: 24 rows flatten into 1 row
		- output: weaning label 0 / 1
		- cohort: all

## Generate data for A: vs_decision_model
- Total **36 + (n * 9)** columns for each row:
    - id_info (6 cols): subject_id, stay_id, hadm_id, charttime, before_weaning_hr, label
    - baseline (9 cols): age, gender, insurance, race, first_careunit, admission_type, weight_kg, height_cm, tobacco
    - current vitalsign (9 cols): heart_rate, sbp, dbp, mbp, spO2, resp_rate, tidal_volume_observed, RSBI, minute_ventilation
    - previous n hr vitalsign (n * 9 cols): same with above
    - current ventilator settings (6 cols): peep, fio2, respiratory_rate_set, plateau_pressure, (ventilator_mode), ventilator_mode_group
    - previous ventilator settings (6 cols): same with above
- Total **24 - n** rows for each stay_id
- Only take label = 1 stay_id

In [4]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)


In [5]:
pre_24_rows_df = pd.read_csv("data/data_by_table/pre_24_merged_30_rows_12_07.csv") # TODO: change dataset
train_stay_id = pd.read_csv("data/split_cohort_stay_id/train_data_id.csv")
val_stay_id = pd.read_csv("data/split_cohort_stay_id/val_data_id.csv")
test_stay_id = pd.read_csv("data/split_cohort_stay_id/test_data_id.csv")
pre_24_rows_df

Unnamed: 0,subject_id,stay_id,hadm_id,charttime,before_weaning_hr,age_now,gender,insurance,race,first_careunit,admission_type,weight_kg,height_cm,tobacco,RSBI,minute_ventilation,ventilator_mode,ventilator_mode_group,peep,fio2,tidal_volume_observed,respiratory_rate_set,plateau_pressure,heart_rate,sbp,dbp,mbp,resp_rate,spo2,label
0,14569364,30005707,20870580,2144-01-11 20:00:00,23.0,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,20.754717,5.830,MMV/PSV/AutoFlow,Complete Support,5.0,40.0,530.0,10.0,17.0,67.0,115.0,54.0,74.0,11.0,94.0,1
1,14569364,30005707,20870580,2144-01-11 21:00:00,22.0,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,32.075472,9.010,MMV/PSV/AutoFlow,Complete Support,5.0,40.0,530.0,10.0,17.0,73.0,112.0,41.0,67.0,17.0,95.0,1
2,14569364,30005707,20870580,2144-01-11 22:00:00,21.0,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,28.301887,7.950,MMV/PSV/AutoFlow,Complete Support,5.0,40.0,530.0,10.0,17.0,72.0,117.0,51.0,72.0,15.0,96.0,1
3,14569364,30005707,20870580,2144-01-11 23:00:00,20.0,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,28.301887,7.950,MMV/PSV/AutoFlow,Complete Support,5.0,40.0,530.0,10.0,17.0,73.0,123.0,50.0,74.0,15.0,95.0,1
4,14569364,30005707,20870580,2144-01-12 00:00:00,19.0,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,34.136546,8.466,MMV/PSV/AutoFlow,Complete Support,5.0,40.0,498.0,10.0,11.0,72.0,133.0,57.0,83.0,17.0,97.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63907,11539827,39995735,21192405,2124-08-21 05:00:00,4.0,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,60.209424,8.786,CPAP/PSV,Minimal Support,5.0,40.0,382.0,18.0,20.0,80.0,133.0,61.0,84.0,23.0,95.0,1
63908,11539827,39995735,21192405,2124-08-21 06:00:00,3.0,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,70.680628,10.314,Standby,Minimal Support,5.0,50.0,382.0,18.0,20.0,80.0,130.0,57.0,80.0,27.0,94.0,1
63909,11539827,39995735,21192405,2124-08-21 07:00:00,2.0,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,54.973822,8.022,Standby,Minimal Support,5.0,50.0,382.0,18.0,20.0,80.0,131.0,56.0,79.0,21.0,95.0,1
63910,11539827,39995735,21192405,2124-08-21 08:00:00,1.0,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,65.445026,9.550,Standby,Minimal Support,5.0,40.0,382.0,18.0,20.0,82.0,141.0,58.0,82.0,25.0,97.0,1


In [6]:
pre_24_rows_train_df = pre_24_rows_df[pre_24_rows_df['stay_id'].isin(train_stay_id['stay_id'])]
pre_24_rows_val_df = pre_24_rows_df[pre_24_rows_df['stay_id'].isin(val_stay_id['stay_id'])]
pre_24_rows_test_df = pre_24_rows_df[pre_24_rows_df['stay_id'].isin(test_stay_id['stay_id'])]

In [7]:
len(pre_24_rows_df["stay_id"].unique())

2663

In [12]:
print(len(pre_24_rows_train_df["stay_id"].unique()))
print(len(pre_24_rows_val_df["stay_id"].unique()))
print(len(pre_24_rows_test_df["stay_id"].unique()))

1860
274
529


In [11]:
pre_24_rows_df.columns

Index(['subject_id', 'stay_id', 'hadm_id', 'charttime', 'before_weaning_hr',
       'age_now', 'gender', 'insurance', 'race', 'first_careunit',
       'admission_type', 'weight_kg', 'height_cm', 'tobacco', 'RSBI',
       'minute_ventilation', 'ventilator_mode', 'ventilator_mode_group',
       'peep', 'fio2', 'tidal_volume_observed', 'respiratory_rate_set',
       'plateau_pressure', 'heart_rate', 'sbp', 'dbp', 'mbp', 'resp_rate',
       'spo2', 'label'],
      dtype='object')

In [12]:
import pandas as pd

def group_pre_vitalsign_and_ventilator_settings(df, label, n=1):
    # Filter rows with label = 1
    df_label = df[df['label'] == label]

    new_rows = []
    for stay_id, group in df_label.groupby('stay_id'):
        # Sort the group by charttime
        group.sort_values(by='charttime', inplace=True)
        
        for i in range(len(group)):
            # Initialize row dictionary
            row_dict = {}
            
            # Add id_info
            id_info_cols = ['subject_id', 'stay_id', 'hadm_id', 'charttime', 'before_weaning_hr', 'label']
            row_dict.update(group.iloc[i][id_info_cols].to_dict())
            
            # Add baseline
            baseline_cols = ['age_now', 'gender', 'insurance', 'race', 'first_careunit',
                             'admission_type', 'weight_kg', 'height_cm', 'tobacco']
            row_dict.update(group.iloc[i][baseline_cols].to_dict())
            
            # Add current vitalsign
            vitalsign_cols = ['heart_rate', 'sbp', 'dbp', 'mbp', 'spo2', 'resp_rate',
                              'tidal_volume_observed', 'RSBI', 'minute_ventilation']
            row_dict.update(group.iloc[i][vitalsign_cols].to_dict())
            
            # Add previous n hr vitalsign
            for j in range(1, n + 1):
                if i - j >= 0:
                    # prev_vitalsign_cols = [f'heart_rate_prev_{j}h', f'sbp_prev_{j}h', f'dbp_prev_{j}h', f'mbp_prev_{j}h',
                    #                        f'spo2_prev_{j}h', f'resp_rate_prev_{j}h', f'tidal_volume_observed_prev_{j}h',
                    #                        f'RSBI_prev_{j}h', f'minute_ventilation_prev_{j}h']
                    row_dict.update(group.iloc[i - j][vitalsign_cols].rename(lambda x: f"{x}_prev_{j}h").to_dict())
            
            # Add current ventilator settings
            vent_settings_cols = ['peep', 'fio2', 'respiratory_rate_set', 'plateau_pressure',
                                  'ventilator_mode', 'ventilator_mode_group']
            row_dict.update(group.iloc[i][vent_settings_cols].to_dict())
            
            # Add previous ventilator settings
            for j in range(1, n + 1):
                if i - j >= 0:
                    # prev_vent_settings_cols = [f'peep_prev_{j}h', f'fio2_prev_{j}h', f'respiratory_rate_set_prev_{j}h',
                    #                            f'plateau_pressure_prev_{j}h', f'ventilator_mode_prev_{j}h',
                    #                            f'ventilator_mode_group_prev_{j}h']
                    row_dict.update(group.iloc[i - j][vent_settings_cols].rename(lambda x: f"{x}_prev_{j}h").to_dict())
            
            # Append the row dictionary to new_rows
            new_rows.append(row_dict)
    
    # Create a new DataFrame using the generated rows
    new_df = pd.DataFrame(new_rows)
    
    return new_df


In [13]:
group_pre_vitalsign_and_ventilator_settings_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_df, label=1, n=1)

In [14]:
group_pre_vitalsign_and_ventilator_settings_df.columns

Index(['subject_id', 'stay_id', 'hadm_id', 'charttime', 'before_weaning_hr',
       'label', 'age_now', 'gender', 'insurance', 'race', 'first_careunit',
       'admission_type', 'weight_kg', 'height_cm', 'tobacco', 'heart_rate',
       'sbp', 'dbp', 'mbp', 'spo2', 'resp_rate', 'tidal_volume_observed',
       'RSBI', 'minute_ventilation', 'peep', 'fio2', 'respiratory_rate_set',
       'plateau_pressure', 'ventilator_mode', 'ventilator_mode_group',
       'heart_rate_prev_1h', 'sbp_prev_1h', 'dbp_prev_1h', 'mbp_prev_1h',
       'spo2_prev_1h', 'resp_rate_prev_1h', 'tidal_volume_observed_prev_1h',
       'RSBI_prev_1h', 'minute_ventilation_prev_1h', 'peep_prev_1h',
       'fio2_prev_1h', 'respiratory_rate_set_prev_1h',
       'plateau_pressure_prev_1h', 'ventilator_mode_prev_1h',
       'ventilator_mode_group_prev_1h'],
      dtype='object')

In [15]:
pd.set_option('display.max_columns', None)
group_pre_vitalsign_and_ventilator_settings_df  # only select label

Unnamed: 0,subject_id,stay_id,hadm_id,charttime,before_weaning_hr,label,age_now,gender,insurance,race,first_careunit,admission_type,weight_kg,height_cm,tobacco,heart_rate,sbp,dbp,mbp,spo2,resp_rate,tidal_volume_observed,RSBI,minute_ventilation,peep,fio2,respiratory_rate_set,plateau_pressure,ventilator_mode,ventilator_mode_group,heart_rate_prev_1h,sbp_prev_1h,dbp_prev_1h,mbp_prev_1h,spo2_prev_1h,resp_rate_prev_1h,tidal_volume_observed_prev_1h,RSBI_prev_1h,minute_ventilation_prev_1h,peep_prev_1h,fio2_prev_1h,respiratory_rate_set_prev_1h,plateau_pressure_prev_1h,ventilator_mode_prev_1h,ventilator_mode_group_prev_1h
0,14569364,30005707,20870580,2144-01-11 20:00:00,23.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,67.0,115.0,54.0,74.0,94.0,11.0,530.0,20.754717,5.830,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,,,,,,,,,,,,,,,
1,14569364,30005707,20870580,2144-01-11 21:00:00,22.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,73.0,112.0,41.0,67.0,95.0,17.0,530.0,32.075472,9.010,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,67.0,115.0,54.0,74.0,94.0,11.0,530.0,20.754717,5.830,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
2,14569364,30005707,20870580,2144-01-11 22:00:00,21.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,72.0,117.0,51.0,72.0,96.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,73.0,112.0,41.0,67.0,95.0,17.0,530.0,32.075472,9.010,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
3,14569364,30005707,20870580,2144-01-11 23:00:00,20.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,73.0,123.0,50.0,74.0,95.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,72.0,117.0,51.0,72.0,96.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
4,14569364,30005707,20870580,2144-01-12 00:00:00,19.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,72.0,133.0,57.0,83.0,97.0,17.0,498.0,34.136546,8.466,5.0,40.0,10.0,11.0,MMV/PSV/AutoFlow,Complete Support,73.0,123.0,50.0,74.0,95.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40603,11539827,39995735,21192405,2124-08-21 05:00:00,4.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,80.0,133.0,61.0,84.0,95.0,23.0,382.0,60.209424,8.786,5.0,40.0,18.0,20.0,CPAP/PSV,Minimal Support,80.0,118.0,51.0,70.0,97.0,20.0,382.0,52.356021,7.640,5.0,40.0,18.0,20.0,CPAP/PSV,Minimal Support
40604,11539827,39995735,21192405,2124-08-21 06:00:00,3.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,80.0,130.0,57.0,80.0,94.0,27.0,382.0,70.680628,10.314,5.0,50.0,18.0,20.0,Standby,Minimal Support,80.0,133.0,61.0,84.0,95.0,23.0,382.0,60.209424,8.786,5.0,40.0,18.0,20.0,CPAP/PSV,Minimal Support
40605,11539827,39995735,21192405,2124-08-21 07:00:00,2.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,80.0,131.0,56.0,79.0,95.0,21.0,382.0,54.973822,8.022,5.0,50.0,18.0,20.0,Standby,Minimal Support,80.0,130.0,57.0,80.0,94.0,27.0,382.0,70.680628,10.314,5.0,50.0,18.0,20.0,Standby,Minimal Support
40606,11539827,39995735,21192405,2124-08-21 08:00:00,1.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,82.0,141.0,58.0,82.0,97.0,25.0,382.0,65.445026,9.550,5.0,40.0,18.0,20.0,Standby,Minimal Support,80.0,131.0,56.0,79.0,95.0,21.0,382.0,54.973822,8.022,5.0,50.0,18.0,20.0,Standby,Minimal Support


In [16]:
group_visi_vese_without_miss_df = group_pre_vitalsign_and_ventilator_settings_df.dropna()

In [23]:
group_pre_vitalsign_and_ventilator_settings_df

Unnamed: 0,subject_id,stay_id,hadm_id,charttime,before_weaning_hr,label,age_now,gender,insurance,race,first_careunit,admission_type,weight_kg,height_cm,tobacco,heart_rate,sbp,dbp,mbp,spo2,resp_rate,tidal_volume_observed,RSBI,minute_ventilation,peep,fio2,respiratory_rate_set,plateau_pressure,ventilator_mode,ventilator_mode_group,heart_rate_prev_1h,sbp_prev_1h,dbp_prev_1h,mbp_prev_1h,spo2_prev_1h,resp_rate_prev_1h,tidal_volume_observed_prev_1h,RSBI_prev_1h,minute_ventilation_prev_1h,peep_prev_1h,fio2_prev_1h,respiratory_rate_set_prev_1h,plateau_pressure_prev_1h,ventilator_mode_prev_1h,ventilator_mode_group_prev_1h
0,14569364,30005707,20870580,2144-01-11 20:00:00,23.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,67.0,115.0,54.0,74.0,94.0,11.0,530.0,20.754717,5.830,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,,,,,,,,,,,,,,,
1,14569364,30005707,20870580,2144-01-11 21:00:00,22.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,73.0,112.0,41.0,67.0,95.0,17.0,530.0,32.075472,9.010,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,67.0,115.0,54.0,74.0,94.0,11.0,530.0,20.754717,5.830,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
2,14569364,30005707,20870580,2144-01-11 22:00:00,21.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,72.0,117.0,51.0,72.0,96.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,73.0,112.0,41.0,67.0,95.0,17.0,530.0,32.075472,9.010,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
3,14569364,30005707,20870580,2144-01-11 23:00:00,20.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,73.0,123.0,50.0,74.0,95.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,72.0,117.0,51.0,72.0,96.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
4,14569364,30005707,20870580,2144-01-12 00:00:00,19.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,72.0,133.0,57.0,83.0,97.0,17.0,498.0,34.136546,8.466,5.0,40.0,10.0,11.0,MMV/PSV/AutoFlow,Complete Support,73.0,123.0,50.0,74.0,95.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40603,11539827,39995735,21192405,2124-08-21 05:00:00,4.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,80.0,133.0,61.0,84.0,95.0,23.0,382.0,60.209424,8.786,5.0,40.0,18.0,20.0,CPAP/PSV,Minimal Support,80.0,118.0,51.0,70.0,97.0,20.0,382.0,52.356021,7.640,5.0,40.0,18.0,20.0,CPAP/PSV,Minimal Support
40604,11539827,39995735,21192405,2124-08-21 06:00:00,3.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,80.0,130.0,57.0,80.0,94.0,27.0,382.0,70.680628,10.314,5.0,50.0,18.0,20.0,Standby,Minimal Support,80.0,133.0,61.0,84.0,95.0,23.0,382.0,60.209424,8.786,5.0,40.0,18.0,20.0,CPAP/PSV,Minimal Support
40605,11539827,39995735,21192405,2124-08-21 07:00:00,2.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,80.0,131.0,56.0,79.0,95.0,21.0,382.0,54.973822,8.022,5.0,50.0,18.0,20.0,Standby,Minimal Support,80.0,130.0,57.0,80.0,94.0,27.0,382.0,70.680628,10.314,5.0,50.0,18.0,20.0,Standby,Minimal Support
40606,11539827,39995735,21192405,2124-08-21 08:00:00,1.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,82.0,141.0,58.0,82.0,97.0,25.0,382.0,65.445026,9.550,5.0,40.0,18.0,20.0,Standby,Minimal Support,80.0,131.0,56.0,79.0,95.0,21.0,382.0,54.973822,8.022,5.0,50.0,18.0,20.0,Standby,Minimal Support


In [17]:
group_visi_vese_without_miss_df

Unnamed: 0,subject_id,stay_id,hadm_id,charttime,before_weaning_hr,label,age_now,gender,insurance,race,first_careunit,admission_type,weight_kg,height_cm,tobacco,heart_rate,sbp,dbp,mbp,spo2,resp_rate,tidal_volume_observed,RSBI,minute_ventilation,peep,fio2,respiratory_rate_set,plateau_pressure,ventilator_mode,ventilator_mode_group,heart_rate_prev_1h,sbp_prev_1h,dbp_prev_1h,mbp_prev_1h,spo2_prev_1h,resp_rate_prev_1h,tidal_volume_observed_prev_1h,RSBI_prev_1h,minute_ventilation_prev_1h,peep_prev_1h,fio2_prev_1h,respiratory_rate_set_prev_1h,plateau_pressure_prev_1h,ventilator_mode_prev_1h,ventilator_mode_group_prev_1h
1,14569364,30005707,20870580,2144-01-11 21:00:00,22.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,73.0,112.0,41.0,67.0,95.0,17.0,530.0,32.075472,9.010,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,67.0,115.0,54.0,74.0,94.0,11.0,530.0,20.754717,5.830,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
2,14569364,30005707,20870580,2144-01-11 22:00:00,21.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,72.0,117.0,51.0,72.0,96.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,73.0,112.0,41.0,67.0,95.0,17.0,530.0,32.075472,9.010,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
3,14569364,30005707,20870580,2144-01-11 23:00:00,20.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,73.0,123.0,50.0,74.0,95.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support,72.0,117.0,51.0,72.0,96.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
4,14569364,30005707,20870580,2144-01-12 00:00:00,19.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,72.0,133.0,57.0,83.0,97.0,17.0,498.0,34.136546,8.466,5.0,40.0,10.0,11.0,MMV/PSV/AutoFlow,Complete Support,73.0,123.0,50.0,74.0,95.0,15.0,530.0,28.301887,7.950,5.0,40.0,10.0,17.0,MMV/PSV/AutoFlow,Complete Support
5,14569364,30005707,20870580,2144-01-12 01:00:00,18.0,1,83,M,Medicare,WHITE,Surgical Intensive Care Unit (SICU),EW EMER.,72.0,169.333333,0,63.0,115.0,48.0,71.0,96.0,14.0,498.0,28.112450,6.972,5.0,40.0,10.0,11.0,MMV/PSV/AutoFlow,Complete Support,72.0,133.0,57.0,83.0,97.0,17.0,498.0,34.136546,8.466,5.0,40.0,10.0,11.0,MMV/PSV/AutoFlow,Complete Support
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40603,11539827,39995735,21192405,2124-08-21 05:00:00,4.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,80.0,133.0,61.0,84.0,95.0,23.0,382.0,60.209424,8.786,5.0,40.0,18.0,20.0,CPAP/PSV,Minimal Support,80.0,118.0,51.0,70.0,97.0,20.0,382.0,52.356021,7.640,5.0,40.0,18.0,20.0,CPAP/PSV,Minimal Support
40604,11539827,39995735,21192405,2124-08-21 06:00:00,3.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,80.0,130.0,57.0,80.0,94.0,27.0,382.0,70.680628,10.314,5.0,50.0,18.0,20.0,Standby,Minimal Support,80.0,133.0,61.0,84.0,95.0,23.0,382.0,60.209424,8.786,5.0,40.0,18.0,20.0,CPAP/PSV,Minimal Support
40605,11539827,39995735,21192405,2124-08-21 07:00:00,2.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,80.0,131.0,56.0,79.0,95.0,21.0,382.0,54.973822,8.022,5.0,50.0,18.0,20.0,Standby,Minimal Support,80.0,130.0,57.0,80.0,94.0,27.0,382.0,70.680628,10.314,5.0,50.0,18.0,20.0,Standby,Minimal Support
40606,11539827,39995735,21192405,2124-08-21 08:00:00,1.0,1,62,M,Other,WHITE,Cardiac Vascular Intensive Care Unit (CVICU),EW EMER.,86.4,173.000000,0,82.0,141.0,58.0,82.0,97.0,25.0,382.0,65.445026,9.550,5.0,40.0,18.0,20.0,Standby,Minimal Support,80.0,131.0,56.0,79.0,95.0,21.0,382.0,54.973822,8.022,5.0,50.0,18.0,20.0,Standby,Minimal Support


In [36]:
group_visi_vese_without_miss_df.to_csv("data/data_by_table/group_pre_1hr_visi_vese.csv", index=False) # TODO: change file name

generate the negative for the validation set

In [39]:
group_pre_vitalsign_and_ventilator_settings_negative_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_df, label=0, n=1)
group_visi_vese_negative_without_miss_df = group_pre_vitalsign_and_ventilator_settings_negative_df.dropna()
group_visi_vese_negative_without_miss_df.to_csv("data/data_by_table/group_pre_1hr_visi_vese_negative.csv", index=False) # TODO: change file name

In [47]:
group_pre_1hr_visi_vese_positive_train_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_train_df, label=1, n=1).dropna()
group_pre_1hr_visi_vese_negative_train_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_train_df, label=0, n=1).dropna()
group_pre_1hr_visi_vese_positive_val_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_val_df, label=1, n=1).dropna()
group_pre_1hr_visi_vese_negative_val_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_val_df, label=0, n=1).dropna()
group_pre_1hr_visi_vese_positive_test_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_test_df, label=1, n=1).dropna()
group_pre_1hr_visi_vese_negative_test_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_test_df, label=0, n=1).dropna()

In [56]:
group_pre_1hr_visi_vese_positive_train_df.to_csv("data/model_data/group_pre_1hr_visi_vese_positive_train.csv", index=False)
group_pre_1hr_visi_vese_negative_train_df.to_csv("data/model_data/group_pre_1hr_visi_vese_negative_train.csv", index=False)
group_pre_1hr_visi_vese_positive_val_df.to_csv("data/model_data/group_pre_1hr_visi_vese_positive_val.csv", index=False)
group_pre_1hr_visi_vese_negative_val_df.to_csv("data/model_data/group_pre_1hr_visi_vese_negative_val.csv", index=False)
group_pre_1hr_visi_vese_positive_test_df.to_csv("data/model_data/group_pre_1hr_visi_vese_positive_test.csv", index=False)
group_pre_1hr_visi_vese_negative_test_df.to_csv("data/model_data/group_pre_1hr_visi_vese_negative_test.csv", index=False)

## Generate data for B: vs_affect_model

In [48]:
group_pre_2hr_visi_vese_positive_train_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_train_df, label=1, n=2).dropna()
group_pre_2hr_visi_vese_negative_train_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_train_df, label=0, n=2).dropna()
group_pre_2hr_visi_vese_positive_val_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_val_df, label=1, n=2).dropna()
group_pre_2hr_visi_vese_negative_val_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_val_df, label=0, n=2).dropna()
group_pre_2hr_visi_vese_positive_test_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_test_df, label=1, n=2).dropna()
group_pre_2hr_visi_vese_negative_test_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_test_df, label=0, n=2).dropna()

group_pre_2hr_visi_vese_train_df = pd.concat([group_pre_2hr_visi_vese_positive_train_df, group_pre_2hr_visi_vese_negative_train_df], ignore_index=True)
group_pre_2hr_visi_vese_val_df = pd.concat([group_pre_2hr_visi_vese_positive_val_df, group_pre_2hr_visi_vese_negative_val_df], ignore_index=True)
group_pre_2hr_visi_vese_test_df = pd.concat([group_pre_2hr_visi_vese_positive_test_df, group_pre_2hr_visi_vese_negative_test_df], ignore_index=True)

In [57]:
group_pre_2hr_visi_vese_train_df.to_csv("data/model_data/group_pre_2hr_visi_vese_train.csv", index=False)
group_pre_2hr_visi_vese_val_df.to_csv("data/model_data/group_pre_2hr_visi_vese_val.csv", index=False)
group_pre_2hr_visi_vese_test_df.to_csv("data/model_data/group_pre_2hr_visi_vese_test.csv", index=False)
group_pre_2hr_visi_vese_positive_train_df.to_csv("data/model_data/group_pre_2hr_visi_vese_positive_train.csv", index=False)
group_pre_2hr_visi_vese_negative_train_df.to_csv("data/model_data/group_pre_2hr_visi_vese_negative_train.csv", index=False)
group_pre_2hr_visi_vese_positive_val_df.to_csv("data/model_data/group_pre_2hr_visi_vese_positive_val.csv", index=False)
group_pre_2hr_visi_vese_negative_val_df.to_csv("data/model_data/group_pre_2hr_visi_vese_negative_val.csv", index=False)
group_pre_2hr_visi_vese_positive_test_df.to_csv("data/model_data/group_pre_2hr_visi_vese_positive_test.csv", index=False)
group_pre_2hr_visi_vese_negative_test_df.to_csv("data/model_data/group_pre_2hr_visi_vese_negative_test.csv", index=False)

In [7]:
group_pre_vitalsign_and_ventilator_settings_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_df, label=1, n=2)
group_visi_vese_without_miss_df = group_pre_vitalsign_and_ventilator_settings_df.dropna()
group_pre_vitalsign_and_ventilator_settings_negative_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_df, label=0, n=2)
group_visi_vese_negative_without_miss_df = group_pre_vitalsign_and_ventilator_settings_negative_df.dropna()

In [None]:
# checking correctness
feature = "heart_rate"
group_visi_vese_without_miss_df[["stay_id", "before_weaning_hr", f"{feature}", f"{feature}_prev_1h", f"{feature}_prev_2h"]][:25]

In [None]:
group_visi_vese_2h_df = pd.concat([group_visi_vese_without_miss_df, group_visi_vese_negative_without_miss_df], ignore_index=True)
group_visi_vese_2h_df

In [8]:
group_visi_vese_without_miss_df.to_csv("data/data_by_table/group_pre_2hr_visi_vese_positive.csv", index=False) # TODO: change file name
group_visi_vese_negative_without_miss_df.to_csv("data/data_by_table/group_pre_2hr_visi_vese_negative.csv", index=False) # TODO: change file name
group_visi_vese_2h_df.to_csv("data/data_by_table/group_pre_2hr_visi_vese.csv", index=False) # TODO: change file name

In [None]:
group_pre_vitalsign_and_ventilator_settings_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_df, label=1, n=2)
group_visi_vese_without_miss_df = group_pre_vitalsign_and_ventilator_settings_df.dropna()
group_pre_vitalsign_and_ventilator_settings_negative_df = group_pre_vitalsign_and_ventilator_settings(pre_24_rows_df, label=0, n=2)
group_visi_vese_negative_without_miss_df = group_pre_vitalsign_and_ventilator_settings_negative_df.dropna()