In [1]:
import splits_generation
import utils
import numpy as np
import pandas as pd

## App2. Sliding window

In [2]:
# SEPSIS DEFINITION
# Question 1) How many hours do we propagate a True atb backwards and forwards? 
# Parameters defined as Nts_pre and Nts_post. 
# Question 2) How many hours do we propagate from sepsis_onset?
 
sep_def = {
    # Parameteres for antibiotic propagation
    'Nts_pre': 24, 'Nts_post': 24,
    # Parametere for sepsis propagation
    'N_prog_sep': 12, 
    # Parameter for determine sepsis onset
    'increm_sofa': 2,
    # Time step of reference: ICU admission (True) or pre-ICU admission (False)
    'ref_sofa_icu': False
}
 
keys_to_select_filter = ['stay_id', 'stay_time',
'hr_raw','o2sat_raw','temp_raw','sbp_raw','map_raw','dbp_raw','resp_raw','etco2_raw','fio2_raw',
'be_raw', 'bicar_raw','ph_raw','pco2_raw','cl_raw','mg_raw','phos_raw','k_raw','ast_raw','bun_raw',
'alp_raw','ca_raw','crea_raw','bildir_raw','glu_raw','lact_raw', 'bili_raw','tri_raw','hct_raw',
'hgb_raw','ptt_raw','wbc_raw','fgn_raw','plt_raw','age_static','female_static','cai_raw','na_raw',
'po2_raw','alb_raw','alt_raw','ck_raw','ckmb_raw','crp_raw','tnt_raw','urine_raw','basos_raw',
'bnd_raw','eos_raw','esr_raw','hbco_raw','inrpt_raw','lymph_raw','mch_raw','mchc_raw',
'mcv_raw','methb_raw','neut_raw','pt_raw','rbc_raw','rdw_raw','tco2_raw','weight_static','height_static', 'abx', 'sofa']

keys_to_select = ['stay_id', 'stay_time','hr_raw', 'o2sat_raw','dbp_raw', 'map_raw', 'resp_raw', 'fio2_raw',  'crp_raw',
                  'po2_raw','bili_raw', 'plt_raw', 'crea_raw', 'temp_raw',
                  #'age_static','female_static','weight_static','height_static', 
                  'sep_onset','sep_%2s' % str(sep_def['N_prog_sep'])]

 
params_to_configure = {
    # File to laod
    'path': '../datasets/hirid_0.5.6.parquet',
    'w_pre_onset':  None,  # Number of windows pre sep_onset = 1
    'w_post_onset':  None,  # Number of windows post sep_onset = 1
    'keys': keys_to_select,
    'label':  ['sep_onset','sep_'+str(sep_def['N_prog_sep'])],
    'f_tr_te':  ['stay_id', 'stay_time','sep_onset','sep_%2s' % str(sep_def['N_prog_sep']), 'w_id'],
    # sliding window
    'moving_span': 1,
    # min_length_pat
    'min_length_pat': 0, # default: 0
    # Type of imputation
    'imputationType': "LVCF",
    # filter or not patients with less of th information    
    "filter_pat_nans": False, # if true, fix a threshold (next)
    # Threshold: remove patients with less information that theshold value
    'th': 50,
    # Only select patients with sepsis
    "filter_pat": False,
    "length_window": 7,
}
 
seeds = [34, 56, 78]
folders = ["s1", "s2", "s3"]

In [3]:
df = pd.read_parquet(params_to_configure['path'])

In [7]:
df.keys()[30:120]

Index(['hgb_raw', 'ptt_raw', 'wbc_raw', 'fgn_raw', 'plt_raw', 'age_static',
       'female_static', 'cai_raw', 'na_raw', 'po2_raw', 'alb_raw', 'alt_raw',
       'ck_raw', 'ckmb_raw', 'crp_raw', 'tnt_raw', 'urine_raw', 'basos_raw',
       'bnd_raw', 'eos_raw', 'esr_raw', 'hbco_raw', 'inrpt_raw', 'lymph_raw',
       'mch_raw', 'mchc_raw', 'mcv_raw', 'methb_raw', 'neut_raw', 'pt_raw',
       'rbc_raw', 'rdw_raw', 'tco2_raw', 'weight_static', 'height_static',
       'sresp', 'scoag', 'sliver', 'scardio', 'scns', 'srenal', 'sofa',
       'qsofa', 'sirs', 'news', 'mews', 'death', 'abx', 'ventialtion',
       'vasopressors', 'dur_var', 'all_miss', 'ts_miss', 'ts_avail',
       'onset_ind', 'onset_delta', 'is_case', 'phys_pos_utility',
       'phys_neg_utility', 'phys_cum_utility', 'phys_opt_utility',
       'class_m6_p6', 'class_m6_inf', 'reg_m8_m1', 'reg_p4_m1', 'hr_ind',
       'o2sat_ind', 'temp_ind', 'sbp_ind', 'map_ind', 'dbp_ind', 'resp_ind',
       'etco2_ind', 'fio2_ind', 'be_ind', 'b

In [None]:
keys_by_split = []
idx_exp = 2

params_to_configure['min_length_pat'] = 0
df, min_length_pat = splits_generation.preprocessing(params_to_configure, 
                                                    sep_def,  
                                                    debug=False)

df_sw = utils.slidingWindow(df, params_to_configure['moving_span'], params_to_configure['length_window'])
df_filter = utils.filterWindows(df_sw)

In [None]:
df_filter.to_parquet('df_app2_mimic.parquet', index=False)

In [None]:
for i in range(len(seeds)):
    print("split...", folders[i])    
    params_to_configure['min_length_pat'] = params_to_configure['length_window']
    X_train, X_test, y_train, y_test, keys, w_id_tr, w_id_te = splits_generation.get_tr_te(df_filter,
                                                                        params_to_configure, 
                                                                        seeds[i])
    
    
    
    keys_by_split.append(keys)
    print("X_train:", X_train.shape)
    print("X_test:", X_test.shape)
    print(y_train.shape)
    print(y_test.shape)
    
    
    np.save(
       "./splits/mimic/App"+str(idx_exp)+"/"+ folders[i] + "/X_train_tensor.npy", X_train
    )
    np.save(
       "./splits/mimic/App"+str(idx_exp)+"/"+ folders[i] + "/y_train_tensor.npy", y_train
    )
    np.save(
       "./splits/mimic/App"+str(idx_exp)+"/"+ folders[i] + "/X_test_tensor.npy", X_test
    )
    np.save(
       "./splits/mimic/App"+str(idx_exp)+"/"+ folders[i] + "/y_test_tensor.npy", y_test
    )
    
    pd.DataFrame(keys).to_csv("./splits/mimic/App"+str(idx_exp)+"/"+ folders[i] + "/keys.csv")
    w_id_tr.to_csv("./splits/mimic/App"+str(idx_exp)+"/"+ folders[i] + "/w_id_tr.csv")
    w_id_te.to_csv("./splits/mimic/App"+str(idx_exp)+"/"+ folders[i] + "/w_id_te.csv")