In [None]:
import pandas as pd
from itertools import islice
import random
import numpy as np
import json
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

seed = 42
random.seed(seed)
np.random.seed(seed)

import seaborn as sn
sn.set_theme(style="white", palette="rocket_r")

Mounted at /content/gdrive


In [None]:
labels = pd.read_csv('preprocessed_labels.csv', index_col='patient')

In [None]:
columns_that_matter = ['unitdischargeoffset', 'predictedhospitalmortality', 'actualhospitalmortality']
labels = labels[columns_that_matter]

In [None]:
flats = pd.read_csv('preprocessed_flat.csv', index_col='patient')

In [None]:
flats = flats[flats['> 89'] != 1]

In [None]:
flats = flats.drop(['nullweight', 'nullheight', '> 89'], axis=1)

In [None]:
# Load the diagnosis and post_diagnosis files
diagnoses = pd.read_csv('preprocessed_diagnoses.csv', index_col='patient')
post_diagnoses = pd.read_csv('preprocessed_diagnoses_post.csv', index_col='patient')

In [None]:
def diagnosis_processing_MI(diagnoses, post_diagnoses):
    # Relevant diagnoses
    list_diagnoses = ['cardiovascular|chest pain / ASHD|acute coronary syndrome|acute myocardial infarction (no ST elevation)',
                     'cardiovascular|chest pain / ASHD|acute coronary syndrome|acute myocardial infarction (with ST elevation)',
                     'cardiovascular|chest pain / ASHD|acute coronary syndrome|s/p PTCA',
                     'Acute MI location', 'Acute MI location|inferior', 'Acute MI location|non-Q',
                     'Non-operative|Diagnosis|Cardiovascular|Infarction, acute myocardial (MI)',
                     'Cardiovascular (R)|Myocardial Infarction', 'Cardiovascular (R)|Myocardial Infarction|MI - date unknown',
                     'Cardiovascular (R)|Myocardial Infarction|MI - remote', 'Cardiovascular (R)|Myocardial Infarction|MI - within 6 months']

    # Create MI feature
    diagnoses[list_diagnoses] = diagnoses[list_diagnoses].astype('bool')
    i = 0
    for col in list_diagnoses:
        if i == 0:
            MI_column = diagnoses[col]
        MI_column = MI_column | diagnoses[col]
        i += 1

    MI_column = MI_column.astype(float)
    diagnoses = diagnoses.assign(MI=MI_column)
    diagnoses[list_diagnoses] = diagnoses[list_diagnoses].astype('float')

    # Relevant diagnoses
    list_diagnoses = [
                      'cardiovascular|arrhythmias',
         'cardiovascular|arrhythmias|bradycardia',
 'cardiovascular|arrhythmias|sinus tachycardia',
 'cardiovascular|arrhythmias|ventricular tachycardia'
    ]
    # Create MI feature
    post_diagnoses[list_diagnoses] = post_diagnoses[list_diagnoses].astype('bool')
    i = 0
    for col in list_diagnoses:
        if i == 0:
            MI_column = post_diagnoses[col]
        MI_column = MI_column | post_diagnoses[col]
        i += 1

    MI_column = MI_column.astype(float)
    post_diagnoses = post_diagnoses.assign(ARR=MI_column)
    post_diagnoses[list_diagnoses] = post_diagnoses[list_diagnoses].astype('float')

    merged_diagnosis = post_diagnoses['ARR'].to_frame().merge(diagnoses, left_index=True, right_index=True)
    merged_diagnosis['ARR'] = merged_diagnosis['ARR'].astype(int)

       # Relevant diagnoses
    list_diagnoses = [
         'cardiovascular|arrhythmias|atrial fibrillation',
 'cardiovascular|arrhythmias|atrial fibrillation|with controlled ventricular response',
 'cardiovascular|arrhythmias|atrial fibrillation|with rapid ventricular response'
    ]
    # Create MI feature
    post_diagnoses[list_diagnoses] = post_diagnoses[list_diagnoses].astype('bool')
    i = 0
    for col in list_diagnoses:
        if i == 0:
            MI_column = post_diagnoses[col]
        MI_column = MI_column | post_diagnoses[col]
        i += 1

    MI_column = MI_column.astype(float)
    post_diagnoses = post_diagnoses.assign(AF=MI_column)
    post_diagnoses[list_diagnoses] = post_diagnoses[list_diagnoses].astype('float')

    merged_diagnosis = post_diagnoses['AF'].to_frame().merge(merged_diagnosis, left_index=True, right_index=True)
    merged_diagnosis['AF'] = merged_diagnosis['AF'].astype(int)

    # Relevant diagnoses
    list_diagnoses = [
        'cardiovascular|ventricular disorders|congestive heart failure',
                      'cardiovascular|shock / hypotension|cardiogenic shock',
 'cardiovascular|cardiac arrest',
 'cardiovascular|cardiac arrest|cardiac arrest|witnessed, < 15 minutes CPR'
    ]
    # Create MI feature
    post_diagnoses[list_diagnoses] = post_diagnoses[list_diagnoses].astype('bool')
    i = 0
    for col in list_diagnoses:
        if i == 0:
            MI_column = post_diagnoses[col]
        MI_column = MI_column | post_diagnoses[col]
        i += 1

    MI_column = MI_column.astype(float)
    post_diagnoses = post_diagnoses.assign(HF=MI_column)
    post_diagnoses[list_diagnoses] = post_diagnoses[list_diagnoses].astype('float')

    merged_diagnosis = post_diagnoses['HF'].to_frame().merge(merged_diagnosis, left_index=True, right_index=True)
    merged_diagnosis['HF'] = merged_diagnosis['HF'].astype(int)

    # Relevant diagnoses
    list_diagnoses = [
        'cardiovascular|vascular disorders|peripheral vascular ischemia'
    ]
    # Create MI feature
    post_diagnoses[list_diagnoses] = post_diagnoses[list_diagnoses].astype('bool')
    i = 0
    for col in list_diagnoses:
        if i == 0:
            MI_column = post_diagnoses[col]
        MI_column = MI_column | post_diagnoses[col]
        i += 1

    MI_column = MI_column.astype(float)
    post_diagnoses = post_diagnoses.assign(PAD=MI_column)
    post_diagnoses[list_diagnoses] = post_diagnoses[list_diagnoses].astype('float')

    merged_diagnosis = post_diagnoses['PAD'].to_frame().merge(merged_diagnosis, left_index=True, right_index=True)
    merged_diagnosis['PAD'] = merged_diagnosis['PAD'].astype(int)
    merged_diagnosis['MI'] = merged_diagnosis['MI'].astype(int)

    return merged_diagnosis[['MI', 'PAD', 'HF', 'AF', 'ARR']]

In [None]:
merged_diagnosis = diagnosis_processing_MI(diagnoses, post_diagnoses)

In [None]:
final = merged_diagnosis.merge(labels, left_index=True, right_index=True)

In [None]:
# Keep only those patients that have had MI
final = final.drop(final[final.MI == 0].index)
final = final.drop(columns=['MI'])

In [None]:
final = final.merge(flats, left_index=True, right_index=True)
final

Unnamed: 0_level_0,PAD,HF,AF,ARR,unitdischargeoffset,predictedhospitalmortality,actualhospitalmortality,gender,age,admissionheight,...,physicianspeciality_surgery-vascular,physicianspeciality_unknown,numbedscategory_100 - 249,numbedscategory_250 - 499,numbedscategory_<100,numbedscategory_>= 500,region_Midwest,region_Northeast,region_South,region_West
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
141203,0,0,0,0,1869,0.470973,0,0.0,77.0,160.0,...,0,0,1,0,0,0,1,0,0,0
141844,0,0,0,1,4110,0.001299,0,0.0,53.0,162.6,...,0,0,0,0,0,1,1,0,0,0
142173,0,0,0,0,37333,0.112978,0,0.0,59.0,162.6,...,0,0,0,0,0,0,1,0,0,0
143687,0,0,0,0,2594,0.017727,0,1.0,53.0,182.9,...,0,0,1,0,0,0,1,0,0,0
144108,0,0,1,1,10458,0.068377,0,1.0,55.0,170.2,...,0,0,0,0,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3352895,0,0,0,0,4172,0.225746,1,0.0,83.0,147.3,...,0,0,0,0,0,1,0,0,1,0
3352914,0,0,0,0,982,0.020733,0,1.0,68.0,180.3,...,0,0,0,0,0,1,0,0,1,0
3352962,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,...,0,0,0,0,0,1,0,0,1,0
3353117,0,1,1,1,26282,-1.000000,1,1.0,85.0,162.6,...,0,0,0,0,0,1,0,0,1,0


In [None]:
final.columns

Index(['PAD', 'HF', 'AF', 'ARR', 'unitdischargeoffset',
       'predictedhospitalmortality', 'actualhospitalmortality', 'gender',
       'age', 'admissionheight', 'admissionweight', 'hour', 'teachingstatus',
       'ethnicity_African American', 'ethnicity_Asian', 'ethnicity_Caucasian',
       'ethnicity_Hispanic', 'ethnicity_Native American',
       'ethnicity_Other/Unknown', 'unittype_CCU-CTICU', 'unittype_CSICU',
       'unittype_CTICU', 'unittype_Cardiac ICU', 'unittype_MICU',
       'unittype_Med-Surg ICU', 'unittype_Neuro ICU', 'unittype_SICU',
       'unitadmitsource_Acute Care/Floor', 'unitadmitsource_Direct Admit',
       'unitadmitsource_Emergency Department', 'unitadmitsource_Floor',
       'unitadmitsource_Operating Room', 'unitadmitsource_Other Hospital',
       'unitadmitsource_PACU', 'unitadmitsource_Recovery Room',
       'unitadmitsource_Step-Down Unit (SDU)', 'unitadmitsource_misc',
       'unitvisitnumber_1', 'unitvisitnumber_2', 'unitvisitnumber_3',
       'unitvisit

In [None]:
# Keep only those patients that have had CCU stays
final = final.loc[(final['unittype_CCU-CTICU'] == 1) | (final['unittype_CSICU'] == 1) | (final['unittype_CTICU'] == 1) | (final['unittype_Cardiac ICU'] == 1)]

In [None]:
final

Unnamed: 0_level_0,PAD,HF,AF,ARR,unitdischargeoffset,predictedhospitalmortality,actualhospitalmortality,gender,age,admissionheight,...,physicianspeciality_surgery-vascular,physicianspeciality_unknown,numbedscategory_100 - 249,numbedscategory_250 - 499,numbedscategory_<100,numbedscategory_>= 500,region_Midwest,region_Northeast,region_South,region_West
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
141844,0,0,0,1,4110,0.001299,0,0.0,53.0,162.6,...,0,0,0,0,0,1,1,0,0,0
144989,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,...,0,0,0,0,0,1,1,0,0,0
145049,0,0,0,0,1266,0.022707,0,1.0,60.0,185.4,...,0,0,0,0,0,1,1,0,0,0
146512,0,1,0,1,11270,-1.000000,0,0.0,62.0,170.2,...,0,0,0,0,0,1,1,0,0,0
146619,0,1,0,1,7110,-1.000000,0,1.0,74.0,182.8,...,0,0,0,0,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3352721,0,0,0,0,23834,0.636440,0,0.0,88.0,157.5,...,0,0,0,0,0,1,0,0,1,0
3352781,0,0,0,0,1658,0.022316,0,1.0,60.0,170.2,...,0,0,0,0,0,1,0,0,1,0
3352914,0,0,0,0,982,0.020733,0,1.0,68.0,180.3,...,0,0,0,0,0,1,0,0,1,0
3352962,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,...,0,0,0,0,0,1,0,0,1,0


In [None]:
timeseries_lab = pd.read_csv('timeseries_lab.csv', index_col=['patient', 'time'])

In [None]:
timeseries_lab

Unnamed: 0_level_0,Unnamed: 1_level_0,-basos,-eos,-lymphs,-monos,-polys,ALT (SGPT),AST (SGOT),BUN,Base Excess,FiO2,...,paCO2,paO2,phosphate,platelets x 1000,potassium,sodium,total bilirubin,total protein,troponin - I,urinary specific gravity
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
141168,0 days 03:51:00,,,,,,,,,,,...,,,,,,,,,,
141168,0 days 08:36:00,0.0,1.0,19.0,19.0,61.0,40.0,59.0,26.0,,,...,,,,209.0,4.0,139.0,2.6,7.1,,
141168,0 days 18:53:00,0.0,0.0,6.0,14.0,80.0,358.0,878.0,27.0,,,...,,,,213.0,4.2,139.0,4.1,7.1,,
141168,1 days 06:05:00,,,,,,,,,,28.0,...,46.0,41.0,,,,,,,,
141168,1 days 09:30:00,,,,,,,,,,100.0,...,44.0,42.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3353254,0 days 03:31:00,,,,,,,,,,,...,,,,,,,,,,
3353254,0 days 06:34:00,,,,,,,,,,,...,,,,,,,,,,
3353263,-1 days +23:23:00,,,,,,,,,,,...,,,,,,,,,,
3353263,-1 days +23:53:00,0.0,1.0,24.0,10.0,,,,13.0,,,...,,,2.5,162.5,4.1,135.0,,,,


In [None]:
# take the mean of any duplicate index entries for unstacking
timeseries_lab = timeseries_lab.groupby(level=[0, 1]).mean()

# Round up the time-stamps to the next hour
timeseries_lab.reset_index(level=1, inplace=True)
start = pd.to_datetime('2000-01-01 00:00:00')
timeseries_lab.time = pd.to_timedelta(timeseries_lab.time, errors='coerce') + start
timeseries_lab.time = timeseries_lab.time.dt.ceil(freq='H')
timeseries_lab.time = timeseries_lab.time - start
timeseries_lab.time = pd.to_timedelta(timeseries_lab.time, unit='T')
timeseries_lab.set_index('time', append=True, inplace=True)
timeseries_lab.reset_index(level=0, inplace=True)
resampled = timeseries_lab.groupby('patient').resample('H', closed='right', label='right').mean().drop(columns='patient')

In [None]:
resampled

Unnamed: 0_level_0,Unnamed: 1_level_0,-basos,-eos,-lymphs,-monos,-polys,ALT (SGPT),AST (SGOT),BUN,Base Excess,FiO2,...,paCO2,paO2,phosphate,platelets x 1000,potassium,sodium,total bilirubin,total protein,troponin - I,urinary specific gravity
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
141168,0 days 04:00:00,,,,,,,,,,,...,,,,,,,,,,
141168,0 days 05:00:00,,,,,,,,,,,...,,,,,,,,,,
141168,0 days 06:00:00,,,,,,,,,,,...,,,,,,,,,,
141168,0 days 07:00:00,,,,,,,,,,,...,,,,,,,,,,
141168,0 days 08:00:00,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3353263,0 days 09:00:00,,,,,,,,,,,...,,,,,,,,,,
3353263,0 days 10:00:00,,,,,,,,,,,...,,,,,,,,,,
3353263,0 days 11:00:00,,,,,,,,,,,...,,,,,,,,,,
3353263,0 days 12:00:00,,,,,,,,,,,...,,,,,,,,,,


In [None]:
resampled.update(resampled.groupby(level=0).ffill())

In [None]:
timeseries_lab = resampled

In [None]:
timeseries_lab.reset_index(level=1, inplace=True)
timeseries_lab.time = pd.to_timedelta(timeseries_lab.time, errors='coerce')

# Convert the time-stamps into minutes
timeseries_lab.time = timeseries_lab.time.astype(int)/(1000000000*60)

timeseries_lab.reset_index(inplace=True)
timeseries_lab.set_index(['patient', 'time'], inplace=True)

In [None]:
timeseries_lab

Unnamed: 0_level_0,Unnamed: 1_level_0,-basos,-eos,-lymphs,-monos,-polys,ALT (SGPT),AST (SGOT),BUN,Base Excess,FiO2,...,paCO2,paO2,phosphate,platelets x 1000,potassium,sodium,total bilirubin,total protein,troponin - I,urinary specific gravity
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
141168,240.0,,,,,,,,,,,...,,,,,,,,,,
141168,300.0,,,,,,,,,,,...,,,,,,,,,,
141168,360.0,,,,,,,,,,,...,,,,,,,,,,
141168,420.0,,,,,,,,,,,...,,,,,,,,,,
141168,480.0,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3353263,540.0,0.0,1.0,24.0,10.0,,,,13.0,,,...,,,2.5,162.5,4.1,135.0,,,,
3353263,600.0,0.0,1.0,24.0,10.0,,,,13.0,,,...,,,2.5,162.5,4.1,135.0,,,,
3353263,660.0,0.0,1.0,24.0,10.0,,,,13.0,,,...,,,2.5,162.5,4.1,135.0,,,,
3353263,720.0,0.0,1.0,24.0,10.0,,,,13.0,,,...,,,2.5,162.5,4.1,135.0,,,,


In [None]:
pd.set_option('display.max_columns', 1000)  # or 1000
pd.set_option('display.max_rows', 1000)  # or 1000
pd.set_option('display.max_colwidth', 1000)  # or 199

# Missing values per feature
timeseries_lab.isnull().sum(axis = 0)/10412737*100

-basos                      33.802938
-eos                        29.989089
-lymphs                     26.924785
-monos                      27.711485
-polys                      33.987222
ALT (SGPT)                  29.987380
AST (SGOT)                  29.233131
BUN                          5.427814
Base Excess                 50.143656
FiO2                        43.093454
HCO3                        42.950110
Hct                          4.906760
Hgb                          5.054406
MCH                         12.028336
MCHC                         8.010392
MCV                          7.998531
MPV                         34.484094
O2 Sat (%)                  48.858096
PT                          34.954585
PT - INR                    33.039911
PTT                         48.192488
RBC                          5.931812
RDW                         11.776894
WBC x 1000                   5.706223
albumin                     27.908580
alkaline phos.              30.670678
anion gap   

In [None]:
timeseries_lab = timeseries_lab.drop(['urinary specific gravity', 'troponin - I', 'total bilirubin', 'total protein', 'pH',
                                      'paCO2', 'paO2', 'phosphate', 'PTT', 'O2 Sat (%)', 'HCO3', 'FiO2', 'Base Excess',
                                      '-basos', '-eos', '-lymphs', '-monos', '-polys', 'ALT (SGPT)', 'AST (SGOT)',
                                      'MPV', 'PT', 'PT - INR', 'lactate', 'bedside glucose', 'magnesium' ,'albumin',
                                      'alkaline phos.', 'anion gap', 'MCH','RDW', 'bicarbonate'], axis=1)

In [None]:
# Drop missing values samples
missing_samples = timeseries_lab[timeseries_lab.isnull().any(axis=1)].index.get_level_values(0).tolist()
timeseries_lab.drop(missing_samples, level=0, axis=0, inplace=True)

In [None]:
timeseries_lab

Unnamed: 0_level_0,Unnamed: 1_level_0,BUN,Hct,Hgb,MCHC,MCV,RBC,WBC x 1000,calcium,chloride,creatinine,glucose,platelets x 1000,potassium,sodium
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
141194,-900.0,41.0,29.2,9.80,33.6,80.2,3.640,11.80,9.9,102.0,2.94,138.0,298.0,4.6,134.0
141194,-840.0,41.0,29.2,9.80,33.6,80.2,3.640,11.80,9.9,102.0,2.94,138.0,298.0,4.6,134.0
141194,-780.0,41.0,29.2,9.80,33.6,80.2,3.640,11.80,9.9,102.0,2.94,138.0,298.0,4.6,134.0
141194,-720.0,41.0,29.2,9.80,33.6,80.2,3.640,11.80,9.9,102.0,2.94,138.0,298.0,4.6,134.0
141194,-660.0,41.0,29.2,9.80,33.6,80.2,3.640,11.80,9.9,102.0,2.94,138.0,298.0,4.6,134.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3353263,540.0,13.0,44.5,14.85,33.0,82.0,5.455,6.45,9.5,101.0,1.06,100.0,162.5,4.1,135.0
3353263,600.0,13.0,44.5,14.85,33.0,82.0,5.455,6.45,9.5,101.0,1.06,100.0,162.5,4.1,135.0
3353263,660.0,13.0,44.5,14.85,33.0,82.0,5.455,6.45,9.5,101.0,1.06,100.0,162.5,4.1,135.0
3353263,720.0,13.0,44.5,14.85,33.0,82.0,5.455,6.45,9.5,101.0,1.06,100.0,162.5,4.1,135.0


In [None]:
len(pd.unique(timeseries_lab.index.get_level_values(0)))

72184

In [None]:
final = final.merge(timeseries_lab, left_index=True, right_index=True)

In [None]:
final

Unnamed: 0_level_0,Unnamed: 1_level_0,PAD,HF,AF,ARR,unitdischargeoffset,predictedhospitalmortality,actualhospitalmortality,gender,age,admissionheight,admissionweight,hour,teachingstatus,ethnicity_African American,ethnicity_Asian,ethnicity_Caucasian,ethnicity_Hispanic,ethnicity_Native American,ethnicity_Other/Unknown,unittype_CCU-CTICU,unittype_CSICU,unittype_CTICU,unittype_Cardiac ICU,unittype_MICU,unittype_Med-Surg ICU,unittype_Neuro ICU,unittype_SICU,unitadmitsource_Acute Care/Floor,unitadmitsource_Direct Admit,unitadmitsource_Emergency Department,unitadmitsource_Floor,unitadmitsource_Operating Room,unitadmitsource_Other Hospital,unitadmitsource_PACU,unitadmitsource_Recovery Room,unitadmitsource_Step-Down Unit (SDU),unitadmitsource_misc,unitvisitnumber_1,unitvisitnumber_2,unitvisitnumber_3,unitvisitnumber_misc,unitstaytype_admit,unitstaytype_readmit,unitstaytype_transfer,physicianspeciality_Specialty Not Specified,physicianspeciality_cardiology,physicianspeciality_critical care medicine (CCM),physicianspeciality_family practice,physicianspeciality_hospitalist,physicianspeciality_internal medicine,physicianspeciality_misc,physicianspeciality_neurology,physicianspeciality_other,physicianspeciality_pulmonary,physicianspeciality_pulmonary/CCM,physicianspeciality_surgery-cardiac,physicianspeciality_surgery-general,physicianspeciality_surgery-neuro,physicianspeciality_surgery-trauma,physicianspeciality_surgery-vascular,physicianspeciality_unknown,numbedscategory_100 - 249,numbedscategory_250 - 499,numbedscategory_<100,numbedscategory_>= 500,region_Midwest,region_Northeast,region_South,region_West,BUN,Hct,Hgb,MCHC,MCV,RBC,WBC x 1000,calcium,chloride,creatinine,glucose,platelets x 1000,potassium,sodium
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
144989,-660.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,37.8,13.6,36.0,89.4,4.23,4.7,8.9,97.0,0.86,111.0,228.0,3.3,132.0
144989,-600.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,37.8,13.6,36.0,89.4,4.23,4.7,8.9,97.0,0.86,111.0,228.0,3.3,132.0
144989,-540.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,37.8,13.6,36.0,89.4,4.23,4.7,8.9,97.0,0.86,111.0,228.0,3.3,132.0
144989,-480.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,37.8,13.6,36.0,89.4,4.23,4.7,8.9,97.0,0.86,111.0,228.0,3.3,132.0
144989,-420.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,37.8,13.6,36.0,89.4,4.23,4.7,8.9,97.0,0.86,111.0,228.0,3.3,132.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3352962,1440.0,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,94.5,7,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,16.0,45.0,15.0,34.0,94.0,4.78,11.5,8.9,103.0,0.94,169.0,134.0,4.9,134.0
3352962,1500.0,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,94.5,7,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,16.0,45.0,15.0,34.0,94.0,4.78,11.5,8.9,103.0,0.94,169.0,134.0,4.9,134.0
3352962,1560.0,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,94.5,7,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,16.0,45.0,15.0,34.0,94.0,4.78,11.5,8.9,103.0,0.94,169.0,134.0,4.9,134.0
3352962,1620.0,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,94.5,7,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,11.0,45.0,15.1,34.0,93.0,4.80,9.7,8.8,105.0,0.82,103.0,116.0,4.1,137.0


In [None]:
timeseries_aperiodic = pd.read_csv('timeseries_aperiodic.csv', index_col=['patient', 'time'])

In [None]:
timeseries_aperiodic

Unnamed: 0_level_0,Unnamed: 1_level_0,noninvasivesystolic,noninvasivediastolic,noninvasivemean
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
141168,0 days 02:03:00,106.0,68.0,81.0
141168,0 days 02:18:00,111.0,62.0,82.0
141168,0 days 05:49:00,,,79.0
141168,0 days 07:21:00,,,62.0
141168,0 days 23:18:00,,,27.0
...,...,...,...,...
3353263,0 days 11:50:00,118.0,85.0,98.0
3353263,0 days 12:50:00,128.0,91.0,106.0
3353263,0 days 13:50:00,131.0,88.0,107.0
3353263,0 days 14:50:00,147.0,98.0,118.0


In [None]:
# take the mean of any duplicate index entries for unstacking
timeseries_aperiodic = timeseries_aperiodic.groupby(level=[0, 1]).mean()

# Round up the time-stamps to the next hour
timeseries_aperiodic.reset_index(level=1, inplace=True)
start = pd.to_datetime('2000-01-01 00:00:00')
timeseries_aperiodic.time = pd.to_timedelta(timeseries_aperiodic.time, errors='coerce') + start
timeseries_aperiodic.time = timeseries_aperiodic.time.dt.ceil(freq='H')
timeseries_aperiodic.time = timeseries_aperiodic.time - start
timeseries_aperiodic.time = pd.to_timedelta(timeseries_aperiodic.time, unit='T')
timeseries_aperiodic.set_index('time', append=True, inplace=True)
timeseries_aperiodic.reset_index(level=0, inplace=True)
resampled = timeseries_aperiodic.groupby('patient').resample('H', closed='right', label='right').mean().drop(columns='patient')

In [None]:
resampled

Unnamed: 0_level_0,Unnamed: 1_level_0,noninvasivesystolic,noninvasivediastolic,noninvasivemean
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
141168,0 days 03:00:00,108.5,65.0,81.5
141168,0 days 04:00:00,,,
141168,0 days 05:00:00,,,
141168,0 days 06:00:00,,,79.0
141168,0 days 07:00:00,,,
...,...,...,...,...
3353263,0 days 13:00:00,128.0,91.0,106.0
3353263,0 days 14:00:00,131.0,88.0,107.0
3353263,0 days 15:00:00,147.0,98.0,118.0
3353263,0 days 16:00:00,144.0,95.0,114.0


In [None]:
resampled.update(resampled.groupby(level=0).ffill())

In [None]:
timeseries_aperiodic = resampled

In [None]:
timeseries_aperiodic.reset_index(level=1, inplace=True)
timeseries_aperiodic.time = pd.to_timedelta(timeseries_aperiodic.time, errors='coerce')

# Convert the time-stamps into minutes
timeseries_aperiodic.time = timeseries_aperiodic.time.astype(int)/(1000000000*60)

timeseries_aperiodic.reset_index(inplace=True)
timeseries_aperiodic.set_index(['patient', 'time'], inplace=True)

In [None]:
timeseries_aperiodic

Unnamed: 0_level_0,Unnamed: 1_level_0,noninvasivesystolic,noninvasivediastolic,noninvasivemean
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
141168,180.0,108.5,65.0,81.5
141168,240.0,108.5,65.0,81.5
141168,300.0,108.5,65.0,81.5
141168,360.0,108.5,65.0,79.0
141168,420.0,108.5,65.0,79.0
...,...,...,...,...
3353263,780.0,128.0,91.0,106.0
3353263,840.0,131.0,88.0,107.0
3353263,900.0,147.0,98.0,118.0
3353263,960.0,144.0,95.0,114.0


In [None]:
pd.set_option('display.max_columns', 1000)  # or 1000
pd.set_option('display.max_rows', 1000)  # or 1000
pd.set_option('display.max_colwidth', 1000)  # or 199

# Missing values per feature
timeseries_aperiodic.isnull().sum(axis = 0)/10243399*100

noninvasivesystolic     0.345823
noninvasivediastolic    0.344866
noninvasivemean         0.332653
dtype: float64

In [None]:
len(pd.unique(timeseries_aperiodic.index.get_level_values(0)))

145754

In [None]:
# Drop missing values samples
missing_samples = timeseries_aperiodic[timeseries_aperiodic.isnull().any(axis=1)].index.get_level_values(0).tolist()
timeseries_aperiodic.drop(missing_samples, level=0, axis=0, inplace=True)

In [None]:
final = final.merge(timeseries_aperiodic, left_index=True, right_index=True)

In [None]:
final

Unnamed: 0_level_0,Unnamed: 1_level_0,PAD,HF,AF,ARR,unitdischargeoffset,predictedhospitalmortality,actualhospitalmortality,gender,age,admissionheight,admissionweight,hour,teachingstatus,ethnicity_African American,ethnicity_Asian,ethnicity_Caucasian,ethnicity_Hispanic,ethnicity_Native American,ethnicity_Other/Unknown,unittype_CCU-CTICU,unittype_CSICU,unittype_CTICU,unittype_Cardiac ICU,unittype_MICU,unittype_Med-Surg ICU,unittype_Neuro ICU,unittype_SICU,unitadmitsource_Acute Care/Floor,unitadmitsource_Direct Admit,unitadmitsource_Emergency Department,unitadmitsource_Floor,unitadmitsource_Operating Room,unitadmitsource_Other Hospital,unitadmitsource_PACU,unitadmitsource_Recovery Room,unitadmitsource_Step-Down Unit (SDU),unitadmitsource_misc,unitvisitnumber_1,unitvisitnumber_2,unitvisitnumber_3,unitvisitnumber_misc,unitstaytype_admit,unitstaytype_readmit,unitstaytype_transfer,physicianspeciality_Specialty Not Specified,physicianspeciality_cardiology,physicianspeciality_critical care medicine (CCM),physicianspeciality_family practice,physicianspeciality_hospitalist,physicianspeciality_internal medicine,physicianspeciality_misc,physicianspeciality_neurology,physicianspeciality_other,physicianspeciality_pulmonary,physicianspeciality_pulmonary/CCM,physicianspeciality_surgery-cardiac,physicianspeciality_surgery-general,physicianspeciality_surgery-neuro,physicianspeciality_surgery-trauma,physicianspeciality_surgery-vascular,physicianspeciality_unknown,numbedscategory_100 - 249,numbedscategory_250 - 499,numbedscategory_<100,numbedscategory_>= 500,region_Midwest,region_Northeast,region_South,region_West,BUN,Hct,Hgb,MCHC,MCV,RBC,WBC x 1000,calcium,chloride,creatinine,glucose,platelets x 1000,potassium,sodium,noninvasivesystolic,noninvasivediastolic,noninvasivemean
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1
144989,60.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,40.3,14.3,35.5,91.2,4.42,5.2,9.3,99.0,0.95,105.0,236.0,3.8,136.0,111.00,79.0,91.00
144989,120.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,40.3,14.3,35.5,91.2,4.42,5.2,9.3,99.0,0.95,105.0,236.0,3.8,136.0,110.75,75.0,88.25
144989,180.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,40.3,14.3,35.5,91.2,4.42,5.2,9.3,99.0,0.95,105.0,236.0,3.8,136.0,122.00,70.0,88.75
144989,240.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,40.3,14.3,35.5,91.2,4.42,5.2,9.3,99.0,0.95,105.0,236.0,3.8,136.0,103.00,66.0,78.00
144989,300.0,0,0,0,0,2760,0.092861,0,0.0,78.0,158.0,60.5,3,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,15.0,40.3,14.3,35.5,91.2,4.42,5.2,9.3,99.0,0.95,105.0,236.0,3.8,136.0,103.00,62.0,77.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3352962,1440.0,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,94.5,7,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,16.0,45.0,15.0,34.0,94.0,4.78,11.5,8.9,103.0,0.94,169.0,134.0,4.9,134.0,101.00,72.0,82.00
3352962,1500.0,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,94.5,7,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,16.0,45.0,15.0,34.0,94.0,4.78,11.5,8.9,103.0,0.94,169.0,134.0,4.9,134.0,101.00,72.0,82.00
3352962,1560.0,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,94.5,7,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,16.0,45.0,15.0,34.0,94.0,4.78,11.5,8.9,103.0,0.94,169.0,134.0,4.9,134.0,100.00,70.0,80.00
3352962,1620.0,0,0,0,0,2241,0.030529,0,1.0,60.0,182.9,94.5,7,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,11.0,45.0,15.1,34.0,93.0,4.80,9.7,8.8,105.0,0.82,103.0,116.0,4.1,137.0,100.00,70.0,80.00


In [None]:
(final < 0)

Unnamed: 0_level_0,Unnamed: 1_level_0,PAD,HF,AF,ARR,unitdischargeoffset,predictedhospitalmortality,actualhospitalmortality,gender,age,admissionheight,admissionweight,hour,teachingstatus,ethnicity_African American,ethnicity_Asian,ethnicity_Caucasian,ethnicity_Hispanic,ethnicity_Native American,ethnicity_Other/Unknown,unittype_CCU-CTICU,unittype_CSICU,unittype_CTICU,unittype_Cardiac ICU,unittype_MICU,unittype_Med-Surg ICU,unittype_Neuro ICU,unittype_SICU,unitadmitsource_Acute Care/Floor,unitadmitsource_Direct Admit,unitadmitsource_Emergency Department,unitadmitsource_Floor,unitadmitsource_Operating Room,unitadmitsource_Other Hospital,unitadmitsource_PACU,unitadmitsource_Recovery Room,unitadmitsource_Step-Down Unit (SDU),unitadmitsource_misc,unitvisitnumber_1,unitvisitnumber_2,unitvisitnumber_3,unitvisitnumber_misc,unitstaytype_admit,unitstaytype_readmit,unitstaytype_transfer,physicianspeciality_Specialty Not Specified,physicianspeciality_cardiology,physicianspeciality_critical care medicine (CCM),physicianspeciality_family practice,physicianspeciality_hospitalist,physicianspeciality_internal medicine,physicianspeciality_misc,physicianspeciality_neurology,physicianspeciality_other,physicianspeciality_pulmonary,physicianspeciality_pulmonary/CCM,physicianspeciality_surgery-cardiac,physicianspeciality_surgery-general,physicianspeciality_surgery-neuro,physicianspeciality_surgery-trauma,physicianspeciality_surgery-vascular,physicianspeciality_unknown,numbedscategory_100 - 249,numbedscategory_250 - 499,numbedscategory_<100,numbedscategory_>= 500,region_Midwest,region_Northeast,region_South,region_West,BUN,Hct,Hgb,MCHC,MCV,RBC,WBC x 1000,calcium,chloride,creatinine,glucose,platelets x 1000,potassium,sodium,noninvasivesystolic,noninvasivediastolic,noninvasivemean
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1
144989,60.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
144989,120.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
144989,180.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
144989,240.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
144989,300.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3352962,1440.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3352962,1500.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3352962,1560.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3352962,1620.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [None]:
(final < 0).apply(lambda x: any(x))

PAD                                                 False
HF                                                  False
AF                                                  False
ARR                                                 False
unitdischargeoffset                                 False
predictedhospitalmortality                           True
actualhospitalmortality                             False
gender                                              False
age                                                 False
admissionheight                                     False
admissionweight                                     False
hour                                                False
teachingstatus                                      False
ethnicity_African American                          False
ethnicity_Asian                                     False
ethnicity_Caucasian                                 False
ethnicity_Hispanic                                  False
ethnicity_Nati

In [None]:
final.loc[final['predictedhospitalmortality'] <= 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,PAD,HF,AF,ARR,unitdischargeoffset,predictedhospitalmortality,actualhospitalmortality,gender,age,admissionheight,admissionweight,hour,teachingstatus,ethnicity_African American,ethnicity_Asian,ethnicity_Caucasian,ethnicity_Hispanic,ethnicity_Native American,ethnicity_Other/Unknown,unittype_CCU-CTICU,unittype_CSICU,unittype_CTICU,unittype_Cardiac ICU,unittype_MICU,unittype_Med-Surg ICU,unittype_Neuro ICU,unittype_SICU,unitadmitsource_Acute Care/Floor,unitadmitsource_Direct Admit,unitadmitsource_Emergency Department,unitadmitsource_Floor,unitadmitsource_Operating Room,unitadmitsource_Other Hospital,unitadmitsource_PACU,unitadmitsource_Recovery Room,unitadmitsource_Step-Down Unit (SDU),unitadmitsource_misc,unitvisitnumber_1,unitvisitnumber_2,unitvisitnumber_3,unitvisitnumber_misc,unitstaytype_admit,unitstaytype_readmit,unitstaytype_transfer,physicianspeciality_Specialty Not Specified,physicianspeciality_cardiology,physicianspeciality_critical care medicine (CCM),physicianspeciality_family practice,physicianspeciality_hospitalist,physicianspeciality_internal medicine,physicianspeciality_misc,physicianspeciality_neurology,physicianspeciality_other,physicianspeciality_pulmonary,physicianspeciality_pulmonary/CCM,physicianspeciality_surgery-cardiac,physicianspeciality_surgery-general,physicianspeciality_surgery-neuro,physicianspeciality_surgery-trauma,physicianspeciality_surgery-vascular,physicianspeciality_unknown,numbedscategory_100 - 249,numbedscategory_250 - 499,numbedscategory_<100,numbedscategory_>= 500,region_Midwest,region_Northeast,region_South,region_West,BUN,Hct,Hgb,MCHC,MCV,RBC,WBC x 1000,calcium,chloride,creatinine,glucose,platelets x 1000,potassium,sodium,noninvasivesystolic,noninvasivediastolic,noninvasivemean
patient,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1
146709,120.0,0,0,1,1,15993,-1.0,0,0.0,61.0,172.7,139.0,17,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,16.0,23.1,7.6,32.9,91.7,2.52,14.4,8.2,104.0,2.49,118.0,385.0,3.6,140.0,129.333333,75.000000,93.333333
146709,180.0,0,0,1,1,15993,-1.0,0,0.0,61.0,172.7,139.0,17,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,16.0,23.1,7.6,32.9,91.7,2.52,14.4,8.2,104.0,2.49,118.0,385.0,3.6,140.0,141.750000,67.250000,89.083333
146709,240.0,0,0,1,1,15993,-1.0,0,0.0,61.0,172.7,139.0,17,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,16.0,23.1,7.6,32.9,91.7,2.52,14.4,8.2,104.0,2.49,118.0,385.0,3.6,140.0,150.500000,60.666667,88.500000
146709,300.0,0,0,1,1,15993,-1.0,0,0.0,61.0,172.7,139.0,17,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,16.0,23.1,7.6,32.9,91.7,2.52,14.4,8.2,104.0,2.49,118.0,385.0,3.6,140.0,162.166667,61.250000,91.333333
146709,360.0,0,0,1,1,15993,-1.0,0,0.0,61.0,172.7,139.0,17,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,16.0,23.1,7.6,32.9,91.7,2.52,14.4,8.2,104.0,2.49,118.0,385.0,3.6,140.0,144.500000,59.000000,86.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3348321,16680.0,0,1,0,0,17917,-1.0,1,0.0,67.0,172.7,108.0,3,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,33.0,32.0,10.2,32.0,78.0,4.15,9.1,7.1,98.0,3.70,114.0,257.0,4.2,127.0,79.000000,48.000000,58.000000
3348321,16740.0,0,1,0,0,17917,-1.0,1,0.0,67.0,172.7,108.0,3,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,33.0,32.0,10.2,32.0,78.0,4.15,9.1,7.1,98.0,3.70,114.0,257.0,4.2,127.0,84.000000,52.000000,63.000000
3348321,16800.0,0,1,0,0,17917,-1.0,1,0.0,67.0,172.7,108.0,3,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,33.0,32.0,10.2,32.0,78.0,4.15,9.1,7.1,98.0,3.70,114.0,257.0,4.2,127.0,94.000000,57.000000,70.000000
3348321,16860.0,0,1,0,0,17917,-1.0,1,0.0,67.0,172.7,108.0,3,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,33.0,32.0,10.2,32.0,78.0,4.15,9.1,7.1,98.0,3.70,114.0,257.0,4.2,127.0,82.000000,48.000000,58.000000


In [None]:
final.predictedhospitalmortality.replace([-1], [0], inplace=True)

In [None]:
final.columns

Index(['PAD', 'HF', 'AF', 'ARR', 'unitdischargeoffset',
       'predictedhospitalmortality', 'actualhospitalmortality', 'gender',
       'age', 'admissionheight', 'admissionweight', 'hour', 'teachingstatus',
       'ethnicity_African American', 'ethnicity_Asian', 'ethnicity_Caucasian',
       'ethnicity_Hispanic', 'ethnicity_Native American',
       'ethnicity_Other/Unknown', 'unittype_CCU-CTICU', 'unittype_CSICU',
       'unittype_CTICU', 'unittype_Cardiac ICU', 'unittype_MICU',
       'unittype_Med-Surg ICU', 'unittype_Neuro ICU', 'unittype_SICU',
       'unitadmitsource_Acute Care/Floor', 'unitadmitsource_Direct Admit',
       'unitadmitsource_Emergency Department', 'unitadmitsource_Floor',
       'unitadmitsource_Operating Room', 'unitadmitsource_Other Hospital',
       'unitadmitsource_PACU', 'unitadmitsource_Recovery Room',
       'unitadmitsource_Step-Down Unit (SDU)', 'unitadmitsource_misc',
       'unitvisitnumber_1', 'unitvisitnumber_2', 'unitvisitnumber_3',
       'unitvisit

In [None]:
len(pd.unique(final.index.get_level_values(0)))

1433

In [None]:
!pip install scikit-multilearn
from skmultilearn.model_selection import iterative_train_test_split



In [None]:
y = final[['actualhospitalmortality', 'PAD', 'HF', 'ARR', 'AF']]
y = y.droplevel(1)
y = y[~y.index.duplicated(keep='first')]

In [None]:
X = final.drop(labels=['unitdischargeoffset', 'actualhospitalmortality', 'PAD', 'HF', 'ARR', 'AF'], axis=1, inplace=False)

In [None]:
X_ind = X.index.get_level_values(0)
X_ind = X_ind[~X_ind.duplicated(keep='first')]
X_ind = np.tile(X_ind, (2, 1))
X_ind = X_ind.transpose()

In [None]:
# Split the data into train and test

y = y.to_numpy()

X_train_ind, y_train, X_test_ind, y_test = iterative_train_test_split(X_ind, y, test_size = 0.2)
X_train_ind, y_train, X_val_ind, y_val = iterative_train_test_split(X_train_ind, y_train, test_size = 0.2)

In [None]:
X.reset_index(level=1, inplace=True)
X_train = X.loc[X_train_ind[:, 0].tolist()]
X_val = X.loc[X_val_ind[:, 0].tolist()]
X_test = X.loc[X_test_ind[:, 0].tolist()]

In [None]:
X_train.reset_index(inplace=True)
X_val.reset_index(inplace=True)
X_test.reset_index(inplace=True)

X_train.set_index(['patient', 'time'], inplace=True)
X_val.set_index(['patient', 'time'], inplace=True)
X_test.set_index(['patient', 'time'], inplace=True)

In [None]:
# create time-series input for LSTM of shape [n, timestep, features]
def split_sequence(dataframe, n_steps):
    lstm_input = np.empty((len(dataframe.index.levels[0]), n_steps, 80))
    lstm_input[:] = np.nan
    for i in range(len(dataframe.index.levels[0])):
        sample = dataframe.loc[dataframe.index.levels[0][i].tolist()]
        sequence = sample.to_numpy()
        n_features = sequence.shape[1]
        time_length = sequence.shape[0]

        if n_steps > time_length:
            a = np.empty((n_steps-time_length,n_features))
            for j in range((n_steps-time_length)):
                a[j, :] = sequence[0, :]
            sequence = np.vstack((a,sequence))
        else: sequence = sequence[-n_steps:, :]
        lstm_input[i, :, :] = sequence

    return lstm_input

In [None]:
# Extract data into LSTM timeseries format with 24 1-hour timesteps
X_train = split_sequence(X_train, 24)
X_val = split_sequence(X_val, 24)
X_test = split_sequence(X_test, 24)

In [None]:
X_test.shape

(287, 24, 80)

In [None]:
np.isnan(X_test).any()

False

In [None]:
np.any(X_train < 0)

False

In [None]:
np.save('X_train_multilabel_full_42', X_train)
np.save('X_val_multilabel_full_42', X_val)
np.save('X_test_multilabel_full_42', X_test)
np.save('y_train_multilabel_full_42', y_train)
np.save('y_val_multilabel_full_42', y_val)
np.save('y_test_multilabel_full_42', y_test)