In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm, tqdm_notebook

In [None]:
# Data cleaning
# input: dataframe
# output: dataframe
# filter_outliers: remove top % stays
def data_formating(file_name, filter_outliers=True):
    icu_df = pd.read_csv(file_name)
    
    # data cleaning
    icu_df = icu_df.astype({'outcome_time':'datetime64[ns]','recorded_time':'datetime64[ns]'})
    icu_df = icu_df.replace({'Female':1,'Male':0})
    icu_df1 = icu_df.copy()
    
    adm_t = icu_df1.groupby('dummy_encounter_id')['recorded_time'].aggregate('min')
    proxyend = icu_df1.groupby('dummy_encounter_id')['outcome_time'].aggregate('min')
    first_m = adm_t
    # admission time round down to nearest hour
    adm_t = (adm_t.astype('int') - adm_t.astype('int')%(60*60*10**9)).astype('datetime64[ns]')
    # outcome time round down to nearest hour
    proxyend = (proxyend.astype('int') - proxyend.astype('int')%(60*60*10**9)).astype('datetime64[ns]')
    
    # concate
    #
    icu_df2 = pd.merge(proxyend,icu_df1, left_index=True,right_on='dummy_encounter_id')
    icu_df2 = icu_df2.rename(columns={'outcome_time_x':'proxyend_time','outcome_time_y':'outcome_time'})
    #
    icu_df2 = pd.merge(adm_t,icu_df2, left_index=True,right_on='dummy_encounter_id')
    icu_df2 = icu_df2.rename(columns={'recorded_time_x':'adm_time','recorded_time_y':'recorded_time'})
    #
    icu_df2 = pd.merge(first_m,icu_df2, left_index=True,right_on='dummy_encounter_id')
    icu_df2 = icu_df2.rename(columns={'recorded_time_x':'first_m','recorded_time_y':'recorded_time'})

    # calculate los (original los)
    #icu_df2['los'] = icu_df2['outcome_time']-icu_df2['adm_time']
    icu_df2['los'] = icu_df2['outcome_time']-icu_df2['first_m']
    
    icu_df2 = icu_df2.drop(columns=['first_m'])

    
    # filter out top 1% longest stay
    if filter_outliers:
        los_table = icu_df2.groupby('dummy_encounter_id').first()[['outcome','los']]
        los_table['los'] = los_table['los'].astype('timedelta64[h]')
        ninty_nine_quantile = los_table['los'].quantile(q=0.99,interpolation='lower')
        icu_df2['los'] = icu_df2['los'].astype('timedelta64[h]')
        icu_df2 = icu_df2[icu_df2['los']<=ninty_nine_quantile]
    
    return icu_df2

In [None]:
# sample 24 hours of data from each encounter
# input: orinigal dataframe
# output: dataframe of 24 hours of data for each encounter
# start: "first": first 24 hours; "last": 24 hours xx hour before event; "random": random 24hours
# dt_end: window period before event

def slicing(df,start,duration=24,dt_end=12):
    #eligable
    icu_df = df[df['los']>=(duration+dt_end)]
    print('after slicing: ', len(icu_df['dummy_encounter_id'].unique()))
    # first 00hrs
    if start == 'first':
        icu_df['sample_start'] = icu_df['adm_time']
        icu_df = icu_df[(icu_df['recorded_time'] < (icu_df['sample_start'] + pd.Timedelta(hours=duration)))]
        icu_df = icu_df.sort_values(by=['dummy_encounter_id','recorded_time'])
    # last 00hrs
    elif start == 'last':
        icu_df['sample_start'] = icu_df['proxyend_time']- pd.Timedelta(hours=duration+dt_end)
        icu_df = icu_df[(icu_df['recorded_time']>= icu_df['sample_start'])
                         & (icu_df['recorded_time']< (icu_df['sample_start']+ pd.Timedelta(hours=duration)))]
        icu_df = icu_df.sort_values(by=['dummy_encounter_id','recorded_time'])
    # random slice
    elif start == 'random':
        # split into survival and outcome group
        icu_o = icu_df[icu_df['outcome']==1]
        icu_s = icu_df[icu_df['outcome']==0]
        
        # last 00hrs before 00hrs from outcomes for outcome groups
        icu_o['sample_start'] = icu_o['proxyend_time']- pd.Timedelta(hours=duration+dt_end)
        icu_o = icu_o[(icu_o['recorded_time']>= icu_o['sample_start'])
                       &(icu_o['recorded_time']< (icu_o['sample_start']+ pd.Timedelta(hours=duration)))]
        icu_o = icu_o.sort_values(by=['dummy_encounter_id','recorded_time'])
        
        # random slice before 00hrs from outcomes for survival groups
        icu_st = icu_s.groupby('dummy_encounter_id').first()
        icu_st['upper_bound'] = icu_st['proxyend_time'] - pd.Timedelta(hours=(duration+dt_end))
        icu_st['gap_unit'] = (icu_st['upper_bound'] - icu_st['adm_time'] )/pd.Timedelta(minutes=60)
        
        #Randomly draw start time of slices
        sample_start = []
        np.random.seed(0)
        for i,unit in enumerate(icu_st['gap_unit'].to_list()):
            try:
                starttime = icu_st['adm_time'].iloc[i] + np.random.choice(int(unit+1))*pd.Timedelta(minutes=60)
            except:
                print(unit)
                starttime = icu_st['adm_time'].iloc[i]
            sample_start.append(starttime)
        icu_st['sample_start'] = sample_start
        print('before concate; ', len(icu_st.index))
        icu_sm = pd.merge(icu_s,icu_st['sample_start'],right_index=True,left_on='dummy_encounter_id')
        print('after concate; ', len(icu_sm['dummy_encounter_id'].unique()))
        #slice
        icu_sm = icu_sm[(icu_sm['recorded_time']>=icu_sm['sample_start'])
                                &(icu_sm['recorded_time']<icu_sm['sample_start']+pd.Timedelta(hours=duration))]
        icu_sm = icu_sm.sort_values(by=['dummy_encounter_id','recorded_time'])
        print('after slicing; ', len(icu_sm['dummy_encounter_id'].unique()))
        #concat survival and outcome table
        icu_df = pd.concat([icu_o,icu_sm])
        
    return icu_df

In [None]:
# Features Configurate
# input: dataframe of 24 hours of data for each encounter
# output: samples with selected features
def transformer(df,dummies = True, j_block=15,base=True,vitals=True,comments=True,v_set=False,medication=False,notes=False,n_extract=False):
    if base:
        base=[
        'dummy_encounter_id',
            'adm_time',
         'dt_start',
         'outcome',
         'julian_minute_c']
    else: base=[]
        
    if vitals:
        vitals = [
         'hr_entered',
         'rr_entered',
         'bp_entered',
         'temp_entered',
         'spo2_entered']
    else: vitals=[]
        
    if comments:
        comments = [
         'hr_comment',
         'rr_comment',
         'bp_comment',
         'temp_comment',
         'spo2_comment']
    else: comments=[]
        
    if v_set:
        v_set = [
         'one_vital',
         'set_vital']
    else: v_set=[]
        
    if medication:
        medication = [
         'prn',
         'withheld']
    else: medication=[]
        
    if notes:
        notes = [
         'notes']
    else: notes=[]
        
    if n_extract:
        n_extract=[
         'Fall down',
         'Abnormal rate rhythm depth and effort of respirations',
         'Abnormal Mental State',
         'Communication problem',
         'cognitive defects',
         'Impaired blood oxygen',
         'Delusions',
         'General concern',
         'Hallucinations',
         'Chest Pain',
         'Mood disorder',
         'Abnormal Blood Pressure',
         'Abnormal Heart Rhythm',
         'Weight alteration',
         'Improper renal function',
         'abnormal rate rhythm depth and effort of respirations_1',
         'Violence Gesture',
         'Abnormal lab test',
         'Restraint',
         'Aspiration',
         'Suicide Risk',
         'Abnormal Temperature',
         'Monitoring',
         'Incisional pain',
         'cranial nerve palsies',
         'Musculoskeletal Pain',
         'Sign Symptoms of infection',
         'ataxic patterns',
         'hypocalcemia',
         'seizure',
         'pain duration',
         'Diagnosis related with Infection',
         'Improper Airway Clearance',
         'abnormal reflex',
         'Acute onset pain',
         'Abuse',
         'Localized pain',
         'pain killer',
         'Back Pain',
         'Fluid Volume Alteration',
         'Dysuria',
         'Arthralgia',
         'delirium',
         'Cutaneous Pain',
         'Oxygen response',
         'headache',
         'Medication related with Infection']
    else: n_extract=[]

    
    columns = base+vitals+comments+v_set+medication+notes+ n_extract
    
    # Calculate julian time
    df['julian_time'] = df['recorded_time'].dt.time

    a = list(df['julian_time'])
    minute_c = []
    for i in tqdm_notebook(a):
        h,m,s = str(i).split(':')
        count = (int(h)*60 + int(m))
        
        minute_c.append(count)

    # julian minute_c
    df['julian_minute_c'] = minute_c
    # calculate measurement time to sample start time
    df['dt_start'] = df['recorded_time'] - df['sample_start']

    # select columns
    cleaned = df.loc[:,columns]
    
    # create Time-of-date variables
    cleaned['jblock'] = pd.cut(cleaned.julian_minute_c,range(0,1441,j_block),right=False)
    if dummies:
        cleaned = pd.get_dummies(cleaned,prefix=['jblock'])
        
    
    #check number
    outcome = len(cleaned[cleaned['outcome']==1]['dummy_encounter_id'].unique())
    survival = len(cleaned[cleaned['outcome']==0]['dummy_encounter_id'].unique())
    print('with columns: ', columns)
    print('outcome group: ', outcome)
    print('survival group: ', survival)
    return cleaned

In [None]:
# Create dataset
# partitioned dataset into regularly spaced timesteps and converted the sequences of each features into a vector of binary variables.
# input: samples with selected features
# output: training data, training lable, holdout data, holdout lable (np.array of binary variables)
# freq: length of timestep (minutes)
def create_dataset(dataset, freq=15):
    periods = int(1440/freq)
    freq = str(freq)+'T'
    # create time base table,  freq min for one step
    Frame = pd.DataFrame(0,columns=dataset.columns, index=pd.timedelta_range(0, periods=periods, freq=freq))
    Frame = Frame.drop(columns=['dummy_encounter_id','adm_time','outcome','julian_minute_c','dt_start'])
    #print(periods)
    
    # split dataset into training and holdout set by predefined time
    ticu_stay = dataset[dataset['adm_time']<pd.to_datetime('2016-02-01')]['dummy_encounter_id'].unique()
    hicu_stay = dataset[dataset['adm_time']>=pd.to_datetime('2016-02-01')]['dummy_encounter_id'].unique()
    
    tseqs = []
    tlabels = []
    
    #n = 0
    #loop thru icu stays, training 
    for idx in tqdm_notebook(ticu_stay):
        df_time = dataset[dataset['dummy_encounter_id']==idx]
        label = df_time['outcome'].unique()[0]
        df_time = df_time.drop(columns=['dummy_encounter_id','adm_time','outcome','julian_minute_c'])
        df_time = df_time.set_index('dt_start')
        
        
        #concat with base table
        df_time = pd.concat([Frame,df_time])
        df_time = df_time.resample(freq).sum()
     
        # collapse count within each timesteps to 1
        df_time.iloc[:,:-1] = df_time.iloc[:,:-1] != 0
        #n+=1
        n_features = len(df_time.columns)
        #print(n_features)
        try:
            assert df_time.to_numpy(dtype='float64').shape == (periods,n_features)
        except:
            print(idx, df_time.to_numpy(dtype='float64').shape)
        tlabels.append(label)
        
        seq = df_time.to_numpy(dtype='float64')
        
        # impute Time-of-day variables
        #find first row with jb
        ss = df_time.iloc[:,-periods:]
        st = ss[ss.any(axis=1)].index[0]
        i = ss.index.get_loc(st)
        #find first col with jb
        z = ss.iloc[i,:]!=0
        js = z[z==True].index[0]
        j = ss.columns.get_loc(js)
        
        col = (j-i)
        rows = [n for n in range(periods)]
        cols = [(col+n)%periods for n in range(periods)]
        p = np.zeros((periods,periods))
        p[rows,cols]=1
        seq[:,-periods:] = p
        tseqs.append(seq)
        #
        
    
    hseqs = []
    hlabels = []
    #loop thru icu stays, holdout 
    for idx in tqdm_notebook(hicu_stay):
        df_time = dataset[dataset['dummy_encounter_id']==idx]
        label = df_time['outcome'].unique()[0]
        df_time = df_time.drop(columns=['dummy_encounter_id','adm_time','outcome','julian_minute_c'])
        df_time = df_time.set_index('dt_start')
        
        
        #concat with floor table
        df_time = pd.concat([Frame,df_time])
        df_time = df_time.resample(freq).sum()
     
        # collapse count within each time lapse to 1
        df_time.iloc[:,:-1] = df_time.iloc[:,:-1] != 0
        #n+=1
        n_features = len(df_time.columns)
        #print(n_features)
        try:
            assert df_time.to_numpy(dtype='float64').shape == (periods,n_features)
        except:
            print(idx, df_time.to_numpy(dtype='float64').shape)
        hlabels.append(label)
        seq = df_time.to_numpy(dtype='float64')
        # imput julian timesteps
        #find first row with jb
        ss = df_time.iloc[:,-periods:]
        st = ss[ss.any(axis=1)].index[0]
        i = ss.index.get_loc(st)
        #find first col with jb
        z = ss.iloc[i,:]!=0
        js = z[z==True].index[0]
        j = ss.columns.get_loc(js)
        #print(i,j)
        col = (j-i)
        rows = [n for n in range(periods)]
        cols = [(col+n)%periods for n in range(periods)]
        p = np.zeros((periods,periods))
        p[rows,cols]=1
        seq[:,-periods:] = p
        # imput julian timesteps
        hseqs.append(seq)
        
    training_data, training_labels, holdout_data, holdout_labels = np.array(tseqs,dtype='float64'),np.array(tlabels,dtype='float64'), np.array(hseqs,dtype='float64'),np.array(hlabels,dtype='float64')
    print("training_data: {}, training_labels: {}, holdout_data: {}, holdout_labels: {}".format(training_data.shape, training_labels.shape, holdout_data.shape, holdout_labels.shape))
    return training_data, training_labels, holdout_data, holdout_labels

In [None]:
# data configuration 

file_name = 'dataset_icu.csv'
# time unit 15 minutes
# length of time blocks
freq=60
# filter top %1 longest admission 
filter_outliers=True
icu_df = data_formating(file_name,freq, filter_outliers)

# sampling parameters
start='last'
duration=24
dt_end=12
j_block=freq

random = slicing(icu_df,start,duration,dt_end,j_block)
print('loaded successfully')

#directory
start='first24'

In [None]:
#Unit 15mins

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=15
j_block=15

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=False
v_set=False
medication=False
notes=False
n_extract=False

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df3,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_15Tr5.npy',training_data)
np.save(start+'/tlabels_15Tr5.npy',training_labels)
np.save(start+'/hseq_15Tr5.npy',holdout_data)
np.save(start+'/hlabels_15Tr5.npy',holdout_labels)

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=15
j_block=15

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=False
v_set=True
medication=True
notes=False
n_extract=False

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df_u15,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies,j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_15Tr10jd.npy',training_data)
np.save(start+'/tlabels_15Tr10jd.npy',training_labels)
np.save(start+'/hseq_15Tr10jd.npy',holdout_data)
np.save(start+'/hlabels_15Tr10jd.npy',holdout_labels)

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=15
j_block=15

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=True
v_set=True
medication=True
notes=True
n_extract=False

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df_u30,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_15Tr15jd.npy',training_data)
np.save(start+'/tlabels_15Tr15jd.npy',training_labels)
np.save(start+'/hseq_15Tr15jd.npy',holdout_data)
np.save(start+'/hlabels_15Tr15jd.npy',holdout_labels)

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=15
j_block=15

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=True
v_set=True
medication=True
notes=True
n_extract=True

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df_u60,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_15Tr_all.npy',training_data)
np.save(start+'/tlabels_15Tr_all.npy',training_labels)
np.save(start+'/hseq_15Tr_all.npy',holdout_data)
np.save(start+'/hlabels_15Tr_all.npy',holdout_labels)

In [None]:
# Unit 30mins

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=30
j_block=30

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=False
v_set=False
medication=False
notes=False
n_extract=False

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#random = slicing(icu_df3,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_30Tr5.npy',training_data)
np.save(start+'/tlabels_30Tr5.npy',training_labels)
np.save(start+'/hseq_30Tr5.npy',holdout_data)
np.save(start+'/hlabels_30Tr5.npy',holdout_labels)

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=30
j_block=30

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=False
v_set=True
medication=True
notes=False
n_extract=False

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df_u30,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_30Tr10jd.npy',training_data)
np.save(start+'/tlabels_30Tr10jd.npy',training_labels)
np.save(start+'/hseq_30Tr10jd.npy',holdout_data)
np.save(start+'/hlabels_30Tr10jd.npy',holdout_labels)

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=30
j_block=30

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=True
v_set=True
medication=True
notes=True
n_extract=False

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df_u30,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_30Tr15jd.npy',training_data)
np.save(start+'/tlabels_30Tr15jd.npy',training_labels)
np.save(start+'/hseq_30Tr15jd.npy',holdout_data)
np.save(start+'/hlabels_30Tr15jd.npy',holdout_labels)

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=30
j_block=30

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=True
v_set=True
medication=True
notes=True
n_extract=True

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df_u30,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_30Tr_all.npy',training_data)
np.save(start+'/tlabels_30Tr_all.npy',training_labels)
np.save(start+'/hseq_30Tr_all.npy',holdout_data)
np.save(start+'/hlabels_30Tr_all.npy',holdout_labels)

In [None]:
#Unit 60mins

In [None]:
file_name = 'dataset_icu.csv'

# length of time blocks
freq=60
j_block=60

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=False
v_set=False
medication=False
notes=False
n_extract=False

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df3,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_60Tr5jd.npy',training_data)
np.save(start+'/tlabels_60Tr5jd.npy',training_labels)
np.save(start+'/hseq_60Tr5jd.npy',holdout_data)
np.save(start+'/hlabels_60Tr5jd.npy',holdout_labels)

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=60
freq=60
# filter top %1 longest admission 

j_block=60

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=False
v_set=True
medication=True
notes=False
n_extract=False

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df_u60,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_60Tr10jd.npy',training_data)
np.save(start+'/tlabels_60Tr10jd.npy',training_labels)
np.save(start+'/hseq_60Tr10jd.npy',holdout_data)
np.save(start+'/hlabels_60Tr10jd.npy',holdout_labels)

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=60
j_block=60

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=True
v_set=True
medication=True
notes=True
n_extract=False

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df_u60,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_60Tr15jd.npy',training_data)
np.save(start+'/tlabels_60Tr15jd.npy',training_labels)
np.save(start+'/hseq_60Tr15jd.npy',holdout_data)
np.save(start+'/hlabels_60Tr15jd.npy',holdout_labels)

In [None]:
#file_name = 'dataset_icu.csv'

# length of time blocks
freq=60
j_block=60

# julian time dummified
dummies = True

# features selection
base=True
vitals=True
comments=True
v_set=True
medication=True
notes=True
n_extract=True

# Data Pipeline
#icu_df3 = data_formating(file_name,freq, filter_outliers)
#last = slicing(icu_df_u60,start,duration,dt_end,j_block)
cleaned = transformer(random,dummies, j_block,base,vitals,comments,v_set,medication,notes,n_extract)
training_data, training_labels, holdout_data, holdout_labels = create_dataset(cleaned,freq)

np.save(start+'/tseq_60Tr_all.npy',training_data)
np.save(start+'/tlabels_60Tr_all.npy',training_labels)
np.save(start+'/hseq_60Tr_all.npy',holdout_data)
np.save(start+'/hlabels_60Tr_all.npy',holdout_labels)