In [120]:
import psycopg2
from datetime import timedelta
from sqlalchemy import create_engine
import psycopg2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import cross_val_score
from sklearn.metrics import precision_score, recall_score,f1_score, accuracy_score
from sklearn.metrics import confusion_matrix

from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket
from sklearn.linear_model import LogisticRegression

In [3]:
import psycopg2

conn = psycopg2.connect(
    host="localhost",
    database="mimic",
    user="postgres",
    password="postgres"
)

cur = conn.cursor()

cur.execute("SELECT version();")
print(cur.fetchone())

('PostgreSQL 15.2, compiled by Visual C++ build 1914, 64-bit',)


In [4]:
# Connect to db
conn = psycopg2.connect(host='localhost', dbname='mimic', user='postgres', password='postgres', options='-c search_path=mimiciii')
#conn = psycopg2.connect(dbname='mimic', user='postgres')
cur = conn.cursor() 

# Read in table with patients & admissions (inner join on subject_id) and icu_stays (inner joinon subject_id and hadm_id)
icustay_details = pd.read_sql_query("SELECT * FROM mimiciii.flicu_icustay_detail;", conn)

# Read in vital and lab signs
pivoted_vital = pd.read_sql_query("SELECT * FROM mimiciii.pivoted_vital;", conn)
pivoted_lab = pd.read_sql_query("SELECT * FROM mimiciii.ckd_pivoted_lab;", conn)

# Read in lab measurements
# Use flicu_pivoted_lab (as it only takes the lab tests during ICU stay)
#query = "SELECT * FROM mimiciii.flicu_pivoted_lab;"
# Alternative:  Use the lab values recorded previous to the ICU stay (although during same hospital admission!), 
# then sample them  (8h intervalls) and then forward fill plus cap at either icu admission time of first vital sign recorded
#query = "SELECT * FROM mimiciii.pivoted_lab;"
#pivoted_lab = pd.read_sql_query(query, conn)

# Close the cursor and connection to so the server can allocate bandwidth to other requests
cur.close()
conn.close()

  icustay_details = pd.read_sql_query("SELECT * FROM mimiciii.flicu_icustay_detail;", conn)
  pivoted_vital = pd.read_sql_query("SELECT * FROM mimiciii.pivoted_vital;", conn)
  pivoted_lab = pd.read_sql_query("SELECT * FROM mimiciii.ckd_pivoted_lab;", conn)


In [5]:
pivoted_vital['pedaledema'].unique()

array([nan,  3.,  2.])

In [6]:
pivoted_vital.shape

(9207039, 15)

In [7]:
pivoted_vital.columns

Index(['icustay_id', 'charttime', 'heartrate', 'sysbp', 'diasbp', 'meanbp',
       'resprate', 'tempc', 'spo2', 'glucose', 'rbc', 'specificgravity',
       'pedaledema', 'appetite_median', 'ckd'],
      dtype='object')

In [8]:
pivoted_lab.columns

Index(['icustay_id', 'subject_id', 'charttime', 'aniongap', 'albumin', 'bands',
       'bicarbonate', 'bilirubin', 'creatinine', 'chloride', 'glucose',
       'hematocrit', 'hemoglobin', 'lactate', 'platelet', 'potassium', 'ptt',
       'inr', 'pt', 'sodium', 'bun', 'wbc', 'bacteria', 'ckd'],
      dtype='object')

In [9]:
icustay_details.describe()

Unnamed: 0,subject_id,hadm_id,icustay_id,los_hospital,admission_age,hospital_expire_flag,hospstay_seq,los_icu,icustay_seq,label_death_icu,label_cor_art,diabetes_mellitus,ckd,anemia_flag
count,61051.0,61051.0,61051.0,61051.0,61051.0,61051.0,61051.0,61041.0,61051.0,61051.0,61051.0,61051.0,61051.0,61051.0
mean,33961.698989,149946.928945,249968.598696,11.320283,64.856674,0.107975,1.418568,4.931644,1.070908,0.073774,0.212838,0.170693,0.082849,0.125682
std,28153.637888,28899.070114,28891.923533,14.301661,56.970061,0.310352,1.510997,9.664428,0.301838,0.261406,0.409318,0.376244,0.275656,0.331493
min,2.0,100001.0,200001.0,-0.945139,7e-06,0.0,1.0,0.000139,1.0,0.0,0.0,0.0,0.0,0.0
25%,12085.5,124949.0,224951.0,3.910069,44.281191,0.0,1.0,1.109491,1.0,0.0,0.0,0.0,0.0,0.0
50%,24352.0,149883.0,249949.0,6.945833,62.054949,0.0,1.0,2.094815,1.0,0.0,0.0,0.0,0.0,0.0
75%,54366.0,174997.5,274974.5,13.059722,76.068514,0.0,1.0,4.502199,1.0,0.0,0.0,0.0,0.0,0.0
max,99999.0,199999.0,299999.0,294.660417,311.561027,1.0,41.0,173.072512,7.0,1.0,1.0,1.0,1.0,1.0


#### Setting window length 

In [10]:
WINDOW_LENGTH = 24*4

### Keeping records that are atleast window length

In [11]:
data= icustay_details.copy()
data = data[data.los_icu >= WINDOW_LENGTH/24.0]

In [12]:
filtered_icustay_ids = pd.DataFrame(data['icustay_id'].unique(), columns=['icustay_id'])

In [13]:
# Drop measurements with no belonging icustay_id
pivoted_vital = pivoted_vital.dropna(subset=['icustay_id'])
pivoted_lab = pivoted_lab.dropna(subset=['icustay_id'])

#check the shape 
print(pivoted_vital.shape)

# Cast icustay_id types to int
pivoted_vital['icustay_id'] = pivoted_vital['icustay_id'].astype(int)
pivoted_lab['icustay_id'] = pivoted_lab['icustay_id'].astype(int)

# Keep only values of patients in previously filtered icustay_ids in labs and vitals
pivoted_vital = pivoted_vital.merge(filtered_icustay_ids, on='icustay_id', how='right').drop_duplicates()
pivoted_lab = pivoted_lab.merge(filtered_icustay_ids, on='icustay_id', how='right').drop_duplicates()

(9207039, 15)


In [14]:
print(pivoted_vital.shape)

(6724403, 15)


In [15]:
# Min of each lab and vitals
icustay_ids_charttime_min_lab = pivoted_lab[["icustay_id", "charttime"]][pivoted_lab.groupby("icustay_id")["charttime"].rank(ascending=1,method='dense') == 1]
icustay_ids_charttime_min_vital = pivoted_vital[["icustay_id", "charttime"]][pivoted_vital.groupby("icustay_id")["charttime"].rank(ascending=1,method='dense') == 1]
# Min of both combined
icustay_ids_charttime_min_vital_lab = pd.concat([icustay_ids_charttime_min_lab, icustay_ids_charttime_min_vital], ignore_index=True)
icustay_ids_charttime_min_vital_lab = icustay_ids_charttime_min_vital_lab[["icustay_id", "charttime"]][icustay_ids_charttime_min_vital_lab.groupby("icustay_id")["charttime"].rank(ascending=1,method='dense') == 1]

# Max of each lab and vitals
icustay_ids_charttime_max_lab = pivoted_lab[["icustay_id", "charttime"]][pivoted_lab.groupby("icustay_id")["charttime"].rank(ascending=0,method='dense') == 1]
icustay_ids_charttime_max_vital = pivoted_vital[["icustay_id", "charttime"]][pivoted_vital.groupby("icustay_id")["charttime"].rank(ascending=0,method='dense') == 1]
# Max of both combined
icustay_ids_charttime_max_vital_lab = pd.concat([icustay_ids_charttime_max_lab, icustay_ids_charttime_max_vital], ignore_index=True)
icustay_ids_charttime_max_vital_lab = icustay_ids_charttime_max_vital_lab[["icustay_id", "charttime"]][icustay_ids_charttime_max_vital_lab.groupby("icustay_id")["charttime"].rank(ascending=0,method='dense') == 1]


In [16]:
# Find for which icustay_ids there exist at least WINDOW_LENGTH of data
icustay_ids_vital_lab_charttime_min_max = pd.concat([icustay_ids_charttime_max_vital_lab, icustay_ids_charttime_min_vital_lab], ignore_index=True)
time_window = timedelta(days=4, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=WINDOW_LENGTH, weeks=0)
is_time_diff_bigger_window_lab = icustay_ids_vital_lab_charttime_min_max.groupby(['icustay_id'])['charttime'].transform(lambda x: (x.max()-x.min())) >= time_window

icustay_ids_vital_lab_charttime_min_max_filtered = icustay_ids_vital_lab_charttime_min_max[is_time_diff_bigger_window_lab]
print("Unique icu stays in icustay_ids_vital_lab_charttime_min_max_filtered after filtering", icustay_ids_vital_lab_charttime_min_max_filtered['icustay_id'].nunique())

# Keep only icustay ids for which at least WINDOW_LENGTH of data exists
icustay_ids_time_filtered = pd.DataFrame(icustay_ids_vital_lab_charttime_min_max_filtered['icustay_id'].unique(), columns=['icustay_id'])
print("Unique icu stays in icustay_ids_time_filtered: ", icustay_ids_time_filtered['icustay_id'].nunique())

Unique icu stays in icustay_ids_vital_lab_charttime_min_max_filtered after filtering 8409
Unique icu stays in icustay_ids_time_filtered:  8409


In [17]:
filtered_icustay_ids = filtered_icustay_ids.merge(icustay_ids_time_filtered, on='icustay_id', how='inner').drop_duplicates()

In [18]:
demographics_filtered = data.merge(filtered_icustay_ids, on='icustay_id', how='right').drop_duplicates()
print("Number of ICU stays demographics: ", demographics_filtered['icustay_id'].nunique())

vital_filtered = pivoted_vital.merge(filtered_icustay_ids, on='icustay_id', how='right').drop_duplicates()
print("Number of ICU stays vitals: ", vital_filtered['icustay_id'].nunique())

lab_filtered = pivoted_lab.merge(filtered_icustay_ids, on='icustay_id', how='right').drop_duplicates()
print("Number of ICU stays labs: ", lab_filtered['icustay_id'].nunique())

Number of ICU stays demographics:  8409
Number of ICU stays vitals:  8409
Number of ICU stays labs:  8409


In [19]:
lab_filtered['icustay_id'].unique()

array([285731, 284866, 205010, ..., 215595, 271752, 214236], dtype=int64)

In [20]:
vital_filtered = vital_filtered.merge(lab_filtered[['icustay_id', 'charttime']], on=['icustay_id', 'charttime'], how='outer').drop_duplicates()
print("Number of ICU stays in lab_filtered: ", vital_filtered['icustay_id'].nunique())
lab_filtered = lab_filtered.merge(vital_filtered[['icustay_id', 'charttime']], on=['icustay_id', 'charttime'], how='outer').drop_duplicates()
print("Number of ICU stays in lab_filtered: ", lab_filtered['icustay_id'].nunique())

Number of ICU stays in lab_filtered:  8409
Number of ICU stays in lab_filtered:  8409


In [21]:
vital_resampled = vital_filtered.copy()

# Resample from the end of the time series (how="last")
vital_resampled = vital_resampled.assign(charttime=vital_resampled.charttime.dt.round('H'))
#vital_resampled = vital_resampled.set_index('charttime').groupby('icustay_id').resample('1H', origin="end").median().drop(['icustay_id'], axis = 1).reset_index()
# Resample from the beginning of the time series
vital_resampled = vital_resampled.set_index('charttime').groupby('icustay_id').resample('1H', origin="start").median().drop(['icustay_id'], axis = 1).reset_index()

# Forward and backwards fill (use lambda function instead of directly applying it to groupby otherwise results from one group are carreid forward to another group...BAD)
# Fill NaNs (-1)
vital_col = vital_resampled.columns.drop(['icustay_id', 'charttime'])
vital_resampled = vital_resampled.set_index(['icustay_id', 'charttime']).groupby('icustay_id')[vital_col].transform(lambda x: x.ffill().bfill()).fillna(value=vital_resampled[['icustay_id', 'charttime', 'heartrate', 'sysbp', 'diasbp', 'meanbp','resprate', 'tempc', 'spo2', 'glucose', 'rbc', 'specificgravity','pedaledema', 'appetite_median']].median()).reset_index()
#.fillna(value=vital_resampled[vital_columns].mean())#.fillna(0)#.fillna(-1)


  vital_resampled = vital_resampled.set_index(['icustay_id', 'charttime']).groupby('icustay_id')[vital_col].transform(lambda x: x.ffill().bfill()).fillna(value=vital_resampled[['icustay_id', 'charttime', 'heartrate', 'sysbp', 'diasbp', 'meanbp','resprate', 'tempc', 'spo2', 'glucose', 'rbc', 'specificgravity','pedaledema', 'appetite_median']].median()).reset_index()


In [22]:
lab_resampled = lab_filtered.copy()
# Cut out minutes and hours, so that the resampling of the 8h takes the same time span as the 1h samples (for vitals)
lab_resampled = lab_resampled.assign(charttime=lab_resampled.charttime.dt.round('H'))
# Resample from the end of the time series 
#lab_resampled = lab_resampled.set_index('charttime').groupby('icustay_id').resample('8h', origin="end").median().drop(['icustay_id'], axis = 1).reset_index()
lab_resampled = lab_resampled.set_index('charttime').groupby('icustay_id').resample('8h', origin="start").median().drop(['icustay_id'], axis = 1).reset_index()

# Forward and backwards fill (use transform instead of direct groupby otherwise results from one group are carreid forward to another group...BAD)
# Fill NaNs (-1 or 0 or mean!?)
lab_col = lab_resampled.columns.drop(['icustay_id', 'charttime'])
lab_resampled = lab_resampled.set_index(['icustay_id', 'charttime']).groupby('icustay_id')[lab_col].transform(lambda x: x.ffill().bfill()).fillna(value=lab_resampled[['icustay_id', 'subject_id', 'charttime', 'aniongap', 'albumin', 'bands','bicarbonate', 'bilirubin', 'creatinine', 'chloride', 'glucose','hematocrit', 'hemoglobin', 'lactate', 'platelet', 'potassium', 'ptt','inr', 'pt', 'sodium', 'bun', 'wbc', 'bacteria']].median()).reset_index()

print(lab_resampled.isnull().sum().sum())

  lab_resampled = lab_resampled.set_index(['icustay_id', 'charttime']).groupby('icustay_id')[lab_col].transform(lambda x: x.ffill().bfill()).fillna(value=lab_resampled[['icustay_id', 'subject_id', 'charttime', 'aniongap', 'albumin', 'bands','bicarbonate', 'bilirubin', 'creatinine', 'chloride', 'glucose','hematocrit', 'hemoglobin', 'lactate', 'platelet', 'potassium', 'ptt','inr', 'pt', 'sodium', 'bun', 'wbc', 'bacteria']].median()).reset_index()


730


### keep only uptil 4 days data

In [23]:
delta_t_data = timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=WINDOW_LENGTH, weeks=0)
demographics_windowed = demographics_filtered.copy()
demographics_windowed['predtime'] = demographics_windowed.intime + delta_t_data
demographics_windowed['delta_t_pred'] = demographics_windowed.outtime - demographics_windowed.predtime

demographics_windowed[['subject_id', 'icustay_id', 'intime', 'predtime', 'delta_t_pred']].head(5)

Unnamed: 0,subject_id,icustay_id,intime,predtime,delta_t_pred
0,2005,285731,2163-06-23 11:28:06,2163-06-27 11:28:06,5 days 08:45:56
1,12174,284866,2118-10-30 16:48:57,2118-11-03 16:48:57,13 days 00:44:12
2,13535,205010,2196-10-10 22:03:14,2196-10-14 22:03:14,88 days 19:52:36
3,21824,241223,2107-07-07 20:58:00,2107-07-11 20:58:00,31 days 15:33:00
4,24868,282673,2176-11-10 23:55:37,2176-11-14 23:55:37,13 days 21:34:58


In [24]:
cut_icustay_ids = pd.DataFrame(demographics_windowed['icustay_id'].unique(), columns=['icustay_id'])
print("Number of ICU stays: ", cut_icustay_ids['icustay_id'].count())

vitals_cut = vital_resampled.merge(cut_icustay_ids, on='icustay_id', how='right')
print("Number of ICU stays in vitals_cut: ", vitals_cut['icustay_id'].nunique())

labs_cut = lab_resampled.merge(cut_icustay_ids, on='icustay_id', how='right')
print("Number of ICU stays in labs_cut: ", labs_cut['icustay_id'].nunique())


Number of ICU stays:  8409
Number of ICU stays in vitals_cut:  8409
Number of ICU stays in labs_cut:  8409


In [25]:
print(delta_t_data)

4 days, 0:00:00


In [26]:
vitals_windowed = vital_resampled.merge(demographics_windowed[['icustay_id', 'predtime', 'delta_t_pred']], on='icustay_id', how='right')
vitals_windowed = vitals_windowed[vitals_windowed.charttime < vitals_windowed.predtime]
print("Number of ICU stays in vitals_windowed: ", vitals_windowed['icustay_id'].nunique())

labs_windowed = lab_resampled.merge(demographics_windowed[['icustay_id', 'predtime', 'delta_t_pred']], on='icustay_id', how='right')
labs_windowed = labs_windowed[labs_windowed.charttime < labs_windowed.predtime]
print("Number of ICU stays in labs_windowed: ", labs_windowed['icustay_id'].nunique())

windowed_icustay_ids = pd.DataFrame(pd.concat([vitals_windowed['icustay_id'], labs_windowed['icustay_id']]).unique(), columns=['icustay_id'])
demographics_windowed = demographics_windowed.merge(windowed_icustay_ids, on='icustay_id', how='right')

Number of ICU stays in vitals_windowed:  8405
Number of ICU stays in labs_windowed:  8405


In [27]:
labs_windowed.isna().sum()

icustay_id        0
charttime         0
subject_id        0
aniongap          0
albumin           0
bands             0
bicarbonate       0
bilirubin         0
creatinine        0
chloride          0
glucose           0
hematocrit        0
hemoglobin        0
lactate           0
platelet          0
potassium         0
ptt               0
inr               0
pt                0
sodium            0
bun               0
wbc               0
bacteria          0
ckd             162
predtime          0
delta_t_pred      0
dtype: int64

In [28]:
vitals_windowed.isna().sum()

icustay_id              0
charttime               0
heartrate               0
sysbp                   0
diasbp                  0
meanbp                  0
resprate                0
tempc                   0
spo2                    0
glucose                 0
rbc                     0
specificgravity         0
pedaledema         812438
appetite_median         0
ckd                  1481
predtime                0
delta_t_pred            0
dtype: int64

#### using icustay_id from demographics to fill missing ckd in vitals and labs

In [29]:
vitals_windowed['ckd'] = vitals_windowed['icustay_id'].map(demographics_windowed.set_index('icustay_id')['ckd'])

In [30]:
vitals_windowed.isna().sum()

icustay_id              0
charttime               0
heartrate               0
sysbp                   0
diasbp                  0
meanbp                  0
resprate                0
tempc                   0
spo2                    0
glucose                 0
rbc                     0
specificgravity         0
pedaledema         812438
appetite_median         0
ckd                     0
predtime                0
delta_t_pred            0
dtype: int64

In [31]:
labs_windowed['ckd'] = labs_windowed['icustay_id'].map(demographics_windowed.set_index('icustay_id')['ckd'])

In [32]:
labs_windowed.isna().sum()

icustay_id      0
charttime       0
subject_id      0
aniongap        0
albumin         0
bands           0
bicarbonate     0
bilirubin       0
creatinine      0
chloride        0
glucose         0
hematocrit      0
hemoglobin      0
lactate         0
platelet        0
potassium       0
ptt             0
inr             0
pt              0
sodium          0
bun             0
wbc             0
bacteria        0
ckd             0
predtime        0
delta_t_pred    0
dtype: int64

#### Some patients might not have any value for pedaledema and hence we are filling those with -1

In [33]:
demographics_windowed.isna().sum()

subject_id                        0
hadm_id                           0
icustay_id                        0
gender                            0
dod                            4663
admittime                         0
dischtime                         0
los_hospital                      0
admission_age                     0
ethnicity                         0
ethnicity_grouped                 0
hospital_expire_flag              0
hospstay_seq                      0
first_hosp_stay                   0
intime                            0
outtime                           0
los_icu                           0
icustay_seq                       0
first_icu_stay_current_hosp       0
first_icu_stay_patient            0
first_careunit                    0
deathtime_icu                  7283
label_death_icu                   0
label_cor_art                     0
diabetes_mellitus                 0
ckd                               0
anemia_flag                       0
predtime                    

In [34]:
vitals_windowed =vitals_windowed.set_index(['icustay_id', 'charttime']).groupby('icustay_id')[vital_col].transform(lambda x: x.ffill().bfill()).fillna(-1).reset_index()

In [35]:
vitals_windowed.isna().sum()

icustay_id         0
charttime          0
heartrate          0
sysbp              0
diasbp             0
meanbp             0
resprate           0
tempc              0
spo2               0
glucose            0
rbc                0
specificgravity    0
pedaledema         0
appetite_median    0
ckd                0
dtype: int64

In [36]:
print("Number of ICU stays demographics: ", demographics_windowed['icustay_id'].nunique())
print("Number of CKD demographics:\n", demographics_windowed['ckd'].value_counts())

print("Number of ICU stays vitals: ", vitals_windowed['icustay_id'].nunique())
print("Number of CKD vitals:\n", vitals_windowed['ckd'].value_counts())

print("Number of ICU stays labs: ", labs_windowed['icustay_id'].nunique())
print("Number of CKD labs:\n", labs_windowed['ckd'].value_counts())

Number of ICU stays demographics:  8405
Number of CKD demographics:
 0    7868
1     537
Name: ckd, dtype: int64
Number of ICU stays vitals:  8405
Number of CKD vitals:
 0    760776
1     51662
Name: ckd, dtype: int64
Number of ICU stays labs:  8405
Number of CKD labs:
 0    98884
1     6658
Name: ckd, dtype: int64


### Aggregating time series for static model- Random Forest

In [37]:
def aggregate_dataframe(df, groupby_key, columns_to_aggregate):
    df = df.replace(-1, np.nan)
    result = df.groupby(groupby_key)[columns_to_aggregate].mean().reset_index()    
    return result

In [38]:
columns_to_merge = ['icustay_id', 'ckd','ethnicity_grouped']

In [39]:
df_cols_vitals = ['heartrate', 'sysbp','diasbp','meanbp','resprate','tempc','spo2','specificgravity','pedaledema','appetite_median']
df_agg_vitals = aggregate_dataframe(vitals_windowed, 'icustay_id', df_cols_vitals)

df_agg_vitals = df_agg_vitals.merge(demographics_windowed[columns_to_merge], on='icustay_id', how='inner')
df_agg_vitals['ckd_ethnicity'] = df_agg_vitals['ckd'].astype(str).str.cat(df_agg_vitals['ethnicity_grouped'].astype(str))

In [40]:
df_cols_labs = ['albumin','bacteria','glucose','bun','creatinine','sodium','potassium','hemoglobin','wbc','hematocrit','platelet','ptt']
df_agg_labs = aggregate_dataframe(labs_windowed, 'icustay_id', df_cols_labs)

df_agg_labs = df_agg_labs.merge(demographics_windowed[columns_to_merge], on='icustay_id', how='inner')
df_agg_labs['ckd_ethnicity'] = df_agg_labs['ckd'].astype(str).str.cat(df_agg_labs['ethnicity_grouped'].astype(str))

In [41]:
df_agg_vitals.head()

Unnamed: 0,icustay_id,heartrate,sysbp,diasbp,meanbp,resprate,tempc,spo2,specificgravity,pedaledema,appetite_median,ckd,ethnicity_grouped,ckd_ethnicity
0,200017,155.052083,120.0,58.5,78.0,20.5,37.111111,98.0,1.02,,3.0,0,white,0white
1,200033,79.126316,122.147368,71.007895,87.844737,17.113158,36.819298,96.394737,1.02,,2.0,0,white,0white
2,200037,142.943878,120.0,58.5,78.0,20.5,37.111111,98.0,1.02,,3.0,0,white,0white
3,200045,87.118557,129.762887,47.337629,75.81186,21.729381,36.797824,98.525773,1.02,,3.0,0,white,0white
4,200046,147.221649,120.0,58.5,78.0,20.5,37.111111,98.0,1.02,,3.0,0,black,0black


In [42]:
df_agg_vitals['ckd_ethnicity'].value_counts()

0white               5376
0unknown             1251
0black                681
1white                405
0hispanic             279
0asian                245
1black                 64
1unknown               35
0alaska_native         17
1hispanic              16
1asian                 13
0portuguese            12
0middle_eastern         5
0pacific_islander       2
1middle_eastern         2
1portuguese             1
1alaska_native          1
Name: ckd_ethnicity, dtype: int64

In [43]:
df_agg_labs.head()

Unnamed: 0,icustay_id,albumin,bacteria,glucose,bun,creatinine,sodium,potassium,hemoglobin,wbc,hematocrit,platelet,ptt,ckd,ethnicity_grouped,ckd_ethnicity
0,200017,2.6,2.0,125.0,29.0,1.0,141.416667,5.066667,16.8,5.2,53.1,267.0,36.0,0,white,0white
1,200033,2.7,1.0,151.916667,15.833333,0.675,135.916667,3.670833,12.729167,8.266667,36.725,140.208333,25.1,0,white,0white
2,200037,2.6,2.0,125.0,29.0,1.0,145.230769,5.484615,9.8,11.5,29.1,212.0,36.0,0,white,0white
3,200045,2.5,2.0,74.846154,23.923077,0.861538,147.461538,4.623077,10.369231,14.730769,31.3,237.230769,26.5,0,white,0white
4,200046,2.6,2.0,125.0,29.0,1.0,138.384615,5.023077,16.392308,3.403846,50.569231,263.307692,36.0,0,black,0black


In [44]:
df_agg_vitals.shape

(8405, 14)

In [45]:
df_agg_labs.shape

(8405, 16)

In [46]:
demographics_windowed.shape

(8405, 29)

In [47]:
print("Vitals unique icustay id: ",len(df_agg_vitals['icustay_id'].unique()),"\nLabs unique icustay id: ",len(df_agg_labs['icustay_id'].unique()),"\nDemographics unique icustay id: ",len(demographics_windowed['icustay_id'].unique()))

Vitals unique icustay id:  8405 
Labs unique icustay id:  8405 
Demographics unique icustay id:  8405


In [48]:
df_agg_vitals.head()

Unnamed: 0,icustay_id,heartrate,sysbp,diasbp,meanbp,resprate,tempc,spo2,specificgravity,pedaledema,appetite_median,ckd,ethnicity_grouped,ckd_ethnicity
0,200017,155.052083,120.0,58.5,78.0,20.5,37.111111,98.0,1.02,,3.0,0,white,0white
1,200033,79.126316,122.147368,71.007895,87.844737,17.113158,36.819298,96.394737,1.02,,2.0,0,white,0white
2,200037,142.943878,120.0,58.5,78.0,20.5,37.111111,98.0,1.02,,3.0,0,white,0white
3,200045,87.118557,129.762887,47.337629,75.81186,21.729381,36.797824,98.525773,1.02,,3.0,0,white,0white
4,200046,147.221649,120.0,58.5,78.0,20.5,37.111111,98.0,1.02,,3.0,0,black,0black


In [49]:
df_agg_vitals_new=df_agg_vitals.drop(['ckd','ethnicity_grouped','ckd_ethnicity'],axis=1)

In [50]:
df_agg_labs_new=df_agg_labs.drop(['ckd','ethnicity_grouped'],axis=1)

In [51]:
df_agg_vitals_new.head()

Unnamed: 0,icustay_id,heartrate,sysbp,diasbp,meanbp,resprate,tempc,spo2,specificgravity,pedaledema,appetite_median
0,200017,155.052083,120.0,58.5,78.0,20.5,37.111111,98.0,1.02,,3.0
1,200033,79.126316,122.147368,71.007895,87.844737,17.113158,36.819298,96.394737,1.02,,2.0
2,200037,142.943878,120.0,58.5,78.0,20.5,37.111111,98.0,1.02,,3.0
3,200045,87.118557,129.762887,47.337629,75.81186,21.729381,36.797824,98.525773,1.02,,3.0
4,200046,147.221649,120.0,58.5,78.0,20.5,37.111111,98.0,1.02,,3.0


In [52]:
df_agg_labs_new.head()

Unnamed: 0,icustay_id,albumin,bacteria,glucose,bun,creatinine,sodium,potassium,hemoglobin,wbc,hematocrit,platelet,ptt,ckd_ethnicity
0,200017,2.6,2.0,125.0,29.0,1.0,141.416667,5.066667,16.8,5.2,53.1,267.0,36.0,0white
1,200033,2.7,1.0,151.916667,15.833333,0.675,135.916667,3.670833,12.729167,8.266667,36.725,140.208333,25.1,0white
2,200037,2.6,2.0,125.0,29.0,1.0,145.230769,5.484615,9.8,11.5,29.1,212.0,36.0,0white
3,200045,2.5,2.0,74.846154,23.923077,0.861538,147.461538,4.623077,10.369231,14.730769,31.3,237.230769,26.5,0white
4,200046,2.6,2.0,125.0,29.0,1.0,138.384615,5.023077,16.392308,3.403846,50.569231,263.307692,36.0,0black


#### Merging all 3 tables together

In [53]:
merged_table = df_agg_labs_new.merge(df_agg_vitals_new, on='icustay_id', how='inner').merge(demographics_windowed, on='icustay_id', how='inner')

In [54]:
merged_table.columns

Index(['icustay_id', 'albumin', 'bacteria', 'glucose', 'bun', 'creatinine',
       'sodium', 'potassium', 'hemoglobin', 'wbc', 'hematocrit', 'platelet',
       'ptt', 'ckd_ethnicity', 'heartrate', 'sysbp', 'diasbp', 'meanbp',
       'resprate', 'tempc', 'spo2', 'specificgravity', 'pedaledema',
       'appetite_median', 'subject_id', 'hadm_id', 'gender', 'dod',
       'admittime', 'dischtime', 'los_hospital', 'admission_age', 'ethnicity',
       'ethnicity_grouped', 'hospital_expire_flag', 'hospstay_seq',
       'first_hosp_stay', 'intime', 'outtime', 'los_icu', 'icustay_seq',
       'first_icu_stay_current_hosp', 'first_icu_stay_patient',
       'first_careunit', 'deathtime_icu', 'label_death_icu', 'label_cor_art',
       'diabetes_mellitus', 'ckd', 'anemia_flag', 'predtime', 'delta_t_pred'],
      dtype='object')

In [55]:
merged_table.head()

Unnamed: 0,icustay_id,albumin,bacteria,glucose,bun,creatinine,sodium,potassium,hemoglobin,wbc,...,first_icu_stay_patient,first_careunit,deathtime_icu,label_death_icu,label_cor_art,diabetes_mellitus,ckd,anemia_flag,predtime,delta_t_pred
0,200017,2.6,2.0,125.0,29.0,1.0,141.416667,5.066667,16.8,5.2,...,True,NICU,NaT,0,0,0,0,0,2138-03-21 21:54:36,53 days 19:18:05
1,200033,2.7,1.0,151.916667,15.833333,0.675,135.916667,3.670833,12.729167,8.266667,...,True,SICU,2198-08-21 11:15:00,1,0,1,0,0,2198-08-11 17:56:17,9 days 21:03:01
2,200037,2.6,2.0,125.0,29.0,1.0,145.230769,5.484615,9.8,11.5,...,True,NICU,NaT,0,0,0,0,0,2141-08-11 09:29:48,14 days 07:47:08
3,200045,2.5,2.0,74.846154,23.923077,0.861538,147.461538,4.623077,10.369231,14.730769,...,False,SICU,NaT,0,0,1,0,0,2116-07-14 15:40:58,16 days 00:56:02
4,200046,2.6,2.0,125.0,29.0,1.0,138.384615,5.023077,16.392308,3.403846,...,True,NICU,NaT,0,0,0,0,0,2154-05-05 15:52:33,82 days 03:21:51


#### Dropping other irrelevent columns

In [56]:
merged_table=merged_table.drop(['subject_id','hadm_id','dod','admittime', 'dischtime','los_hospital','ethnicity','hospital_expire_flag','hospstay_seq', 'first_hosp_stay', 'intime','outtime', 'los_icu', 'icustay_seq', 'first_icu_stay_current_hosp','first_icu_stay_patient', 'first_careunit', 'deathtime_icu','label_death_icu', 'predtime', 'delta_t_pred'],axis=1)

In [57]:
merged_table.head()

Unnamed: 0,icustay_id,albumin,bacteria,glucose,bun,creatinine,sodium,potassium,hemoglobin,wbc,...,specificgravity,pedaledema,appetite_median,gender,admission_age,ethnicity_grouped,label_cor_art,diabetes_mellitus,ckd,anemia_flag
0,200017,2.6,2.0,125.0,29.0,1.0,141.416667,5.066667,16.8,5.2,...,1.02,,3.0,M,0.002499,white,0,0,0,0
1,200033,2.7,1.0,151.916667,15.833333,0.675,135.916667,3.670833,12.729167,8.266667,...,1.02,,2.0,M,67.14657,white,0,1,0,0
2,200037,2.6,2.0,125.0,29.0,1.0,145.230769,5.484615,9.8,11.5,...,1.02,,3.0,F,0.001083,white,0,0,0,0
3,200045,2.5,2.0,74.846154,23.923077,0.861538,147.461538,4.623077,10.369231,14.730769,...,1.02,,3.0,F,73.941807,white,0,1,0,0
4,200046,2.6,2.0,125.0,29.0,1.0,138.384615,5.023077,16.392308,3.403846,...,1.02,,3.0,F,0.001811,black,0,0,0,0


In [58]:
merged_table.columns

Index(['icustay_id', 'albumin', 'bacteria', 'glucose', 'bun', 'creatinine',
       'sodium', 'potassium', 'hemoglobin', 'wbc', 'hematocrit', 'platelet',
       'ptt', 'ckd_ethnicity', 'heartrate', 'sysbp', 'diasbp', 'meanbp',
       'resprate', 'tempc', 'spo2', 'specificgravity', 'pedaledema',
       'appetite_median', 'gender', 'admission_age', 'ethnicity_grouped',
       'label_cor_art', 'diabetes_mellitus', 'ckd', 'anemia_flag'],
      dtype='object')

#### Dropping rows which have ethnicity_grouped "middle_eastern", "portuguese", "alaska_native", "pacific_islander"

In [59]:
ethnicities_to_drop = ["middle_eastern", "portuguese", "alaska_native", "pacific_islander"]
merged_table = merged_table[~merged_table['ethnicity_grouped'].isin(ethnicities_to_drop)]

In [60]:
merged_table['ckd_ethnicity'].value_counts()

0white       5376
0unknown     1251
0black        681
1white        405
0hispanic     279
0asian        245
1black         64
1unknown       35
1hispanic      16
1asian         13
Name: ckd_ethnicity, dtype: int64

Removing pedaledema because it has 8365 missing values out of total 8405 rows

In [61]:
merged_table=merged_table.drop('pedaledema',axis=1)

In [62]:
merged_table.isna().sum().sum()

0

#### Grouping Ages 

In [63]:
age_ranges = [0, 9, 19, 29, 39, 49, 59, 69, 79, 89, 400]

age_labels = ['0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-89', '90+']

merged_table['age_group'] = pd.cut(merged_table['admission_age'], bins=age_ranges, labels=age_labels, right=False)

In [64]:
merged_table.columns

Index(['icustay_id', 'albumin', 'bacteria', 'glucose', 'bun', 'creatinine',
       'sodium', 'potassium', 'hemoglobin', 'wbc', 'hematocrit', 'platelet',
       'ptt', 'ckd_ethnicity', 'heartrate', 'sysbp', 'diasbp', 'meanbp',
       'resprate', 'tempc', 'spo2', 'specificgravity', 'appetite_median',
       'gender', 'admission_age', 'ethnicity_grouped', 'label_cor_art',
       'diabetes_mellitus', 'ckd', 'anemia_flag', 'age_group'],
      dtype='object')

In [65]:
merged_table.head()

Unnamed: 0,icustay_id,albumin,bacteria,glucose,bun,creatinine,sodium,potassium,hemoglobin,wbc,...,specificgravity,appetite_median,gender,admission_age,ethnicity_grouped,label_cor_art,diabetes_mellitus,ckd,anemia_flag,age_group
0,200017,2.6,2.0,125.0,29.0,1.0,141.416667,5.066667,16.8,5.2,...,1.02,3.0,M,0.002499,white,0,0,0,0,0-9
1,200033,2.7,1.0,151.916667,15.833333,0.675,135.916667,3.670833,12.729167,8.266667,...,1.02,2.0,M,67.14657,white,0,1,0,0,60-69
2,200037,2.6,2.0,125.0,29.0,1.0,145.230769,5.484615,9.8,11.5,...,1.02,3.0,F,0.001083,white,0,0,0,0,0-9
3,200045,2.5,2.0,74.846154,23.923077,0.861538,147.461538,4.623077,10.369231,14.730769,...,1.02,3.0,F,73.941807,white,0,1,0,0,70-79
4,200046,2.6,2.0,125.0,29.0,1.0,138.384615,5.023077,16.392308,3.403846,...,1.02,3.0,F,0.001811,black,0,0,0,0,0-9


In [66]:
merged_table['age_group']

0         0-9
1       60-69
2         0-9
3       70-79
4         0-9
        ...  
8400    50-59
8401      0-9
8402    70-79
8403    30-39
8404    40-49
Name: age_group, Length: 8365, dtype: category
Categories (10, object): ['0-9' < '10-19' < '20-29' < '30-39' ... '60-69' < '70-79' < '80-89' < '90+']

In [67]:
merged_table=merged_table.drop('admission_age',axis=1)

#### Train Test Split

In [68]:
X= merged_table.drop(['ckd','ckd_ethnicity'],axis=1)
y=merged_table['ckd_ethnicity']

In [69]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y)

In [70]:
y_train.value_counts()

0white       4031
0unknown      938
0black        511
1white        304
0hispanic     209
0asian        184
1black         48
1unknown       26
1hispanic      12
1asian         10
Name: ckd_ethnicity, dtype: int64

In [71]:
y_test.value_counts()

0white       1345
0unknown      313
0black        170
1white        101
0hispanic      70
0asian         61
1black         16
1unknown        9
1hispanic       4
1asian          3
Name: ckd_ethnicity, dtype: int64

#### Random Under Sampling because of white 

In [72]:
rus = RandomUnderSampler(sampling_strategy={'0white':304,'0unknown':26,'0asian':10,'0hispanic':12,'0black':48})
X_train_rus, y_train_rus = rus.fit_resample(X_train, y_train)

In [73]:
 y_train_rus.value_counts()

0white       304
1white       304
0black        48
1black        48
0unknown      26
1unknown      26
0hispanic     12
1hispanic     12
0asian        10
1asian        10
Name: ckd_ethnicity, dtype: int64

In [74]:
X_train_rus.shape

(800, 28)

#### sepearating ckd_ethnicity again now that it has been stratified 

In [75]:
X_train_rus.columns

Index(['icustay_id', 'albumin', 'bacteria', 'glucose', 'bun', 'creatinine',
       'sodium', 'potassium', 'hemoglobin', 'wbc', 'hematocrit', 'platelet',
       'ptt', 'heartrate', 'sysbp', 'diasbp', 'meanbp', 'resprate', 'tempc',
       'spo2', 'specificgravity', 'appetite_median', 'gender',
       'ethnicity_grouped', 'label_cor_art', 'diabetes_mellitus',
       'anemia_flag', 'age_group'],
      dtype='object')

In [76]:
X_train_rus = X_train_rus.merge(merged_table[['icustay_id', 'ckd']], on='icustay_id', how='inner')

In [77]:
X_train_rus.shape

(800, 29)

In [78]:
X_train_rus.columns

Index(['icustay_id', 'albumin', 'bacteria', 'glucose', 'bun', 'creatinine',
       'sodium', 'potassium', 'hemoglobin', 'wbc', 'hematocrit', 'platelet',
       'ptt', 'heartrate', 'sysbp', 'diasbp', 'meanbp', 'resprate', 'tempc',
       'spo2', 'specificgravity', 'appetite_median', 'gender',
       'ethnicity_grouped', 'label_cor_art', 'diabetes_mellitus',
       'anemia_flag', 'age_group', 'ckd'],
      dtype='object')

In [79]:
y_train_rus= X_train_rus['ckd']
X_train_rus= X_train_rus.drop('ckd',axis=1)

In [80]:
X_train_rus=X_train_rus.drop('icustay_id',axis=1)

#### Encoding

In [81]:
X_onehot_train = pd.get_dummies(X_train_rus)

In [82]:
X_onehot_train.columns

Index(['albumin', 'bacteria', 'glucose', 'bun', 'creatinine', 'sodium',
       'potassium', 'hemoglobin', 'wbc', 'hematocrit', 'platelet', 'ptt',
       'heartrate', 'sysbp', 'diasbp', 'meanbp', 'resprate', 'tempc', 'spo2',
       'specificgravity', 'appetite_median', 'label_cor_art',
       'diabetes_mellitus', 'anemia_flag', 'gender_F', 'gender_M',
       'ethnicity_grouped_asian', 'ethnicity_grouped_black',
       'ethnicity_grouped_hispanic', 'ethnicity_grouped_unknown',
       'ethnicity_grouped_white', 'age_group_0-9', 'age_group_10-19',
       'age_group_20-29', 'age_group_30-39', 'age_group_40-49',
       'age_group_50-59', 'age_group_60-69', 'age_group_70-79',
       'age_group_80-89', 'age_group_90+'],
      dtype='object')

#### Random Forest Model

In [83]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

In [84]:
rf = RandomForestClassifier()

In [85]:
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='f1')
grid_search.fit(X_onehot_train, y_train_rus)

In [86]:
grid_search.best_estimator_

In [87]:
grid_search.best_score_

0.8305438061713366

#### Using best model with cross validation with ethnicity

In [88]:
rf_same = RandomForestClassifier(max_features='log2', min_samples_leaf=2,n_estimators=200)
cv_scores = cross_val_score(rf_same, X_onehot_train, y_train_rus, cv=5, scoring='f1')
cv_scores.mean()

0.8250558776663375

#### Now same model, without ethnicity 

In [89]:
rf_same = RandomForestClassifier(max_features='log2', min_samples_leaf=2,n_estimators=200)

In [90]:
X_train_no_eth= X_onehot_train.drop(['ethnicity_grouped_asian', 'ethnicity_grouped_black',
       'ethnicity_grouped_hispanic', 'ethnicity_grouped_unknown',
       'ethnicity_grouped_white'],axis=1)

In [91]:
X_train_no_eth.columns

Index(['albumin', 'bacteria', 'glucose', 'bun', 'creatinine', 'sodium',
       'potassium', 'hemoglobin', 'wbc', 'hematocrit', 'platelet', 'ptt',
       'heartrate', 'sysbp', 'diasbp', 'meanbp', 'resprate', 'tempc', 'spo2',
       'specificgravity', 'appetite_median', 'label_cor_art',
       'diabetes_mellitus', 'anemia_flag', 'gender_F', 'gender_M',
       'age_group_0-9', 'age_group_10-19', 'age_group_20-29',
       'age_group_30-39', 'age_group_40-49', 'age_group_50-59',
       'age_group_60-69', 'age_group_70-79', 'age_group_80-89',
       'age_group_90+'],
      dtype='object')

In [92]:
cv_scores = cross_val_score(rf_same, X_train_no_eth, y_train_rus, cv=5, scoring='f1')

In [93]:
cv_scores

array([0.85549133, 0.85714286, 0.80952381, 0.81395349, 0.78787879])

In [94]:
cv_scores.mean()

0.8247980544794633

#### Test Score with ethnicity

In [95]:
rf_same.fit(X_onehot_train,y_train_rus)

In [96]:
X_test_onehot=pd.get_dummies(X_test)

In [97]:
X_test_onehot.columns

Index(['icustay_id', 'albumin', 'bacteria', 'glucose', 'bun', 'creatinine',
       'sodium', 'potassium', 'hemoglobin', 'wbc', 'hematocrit', 'platelet',
       'ptt', 'heartrate', 'sysbp', 'diasbp', 'meanbp', 'resprate', 'tempc',
       'spo2', 'specificgravity', 'appetite_median', 'label_cor_art',
       'diabetes_mellitus', 'anemia_flag', 'gender_F', 'gender_M',
       'ethnicity_grouped_asian', 'ethnicity_grouped_black',
       'ethnicity_grouped_hispanic', 'ethnicity_grouped_unknown',
       'ethnicity_grouped_white', 'age_group_0-9', 'age_group_10-19',
       'age_group_20-29', 'age_group_30-39', 'age_group_40-49',
       'age_group_50-59', 'age_group_60-69', 'age_group_70-79',
       'age_group_80-89', 'age_group_90+'],
      dtype='object')

In [98]:
X_test_onehot = X_test_onehot.merge(merged_table[['icustay_id', 'ckd']], on='icustay_id', how='inner')

In [99]:
y_test= X_test_onehot['ckd']
X_test_onehot= X_test_onehot.drop('ckd',axis=1)

In [100]:
X_test_onehot=X_test_onehot.drop('icustay_id',axis=1)

In [101]:
y_pred=rf_same.predict(X_test_onehot)

In [102]:
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

In [103]:
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)

Precision: 0.1805111821086262
Recall: 0.849624060150376
F1 Score: 0.2977602108036891
Accuracy: 0.7452198852772467


In [104]:
#Observation 458 means high false positive but very low false negative which is good, true positive are very high too

#The diagonal elements of the matrix represent the number of correctly classified samples (true positives and true negatives), while the off-diagonal elements represent the number of misclassified samples (false positives and false negatives).

cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1446  513]
 [  20  113]]


#### Test score without ethnicity

In [105]:
X_test_onehot_noeth=X_test_onehot.drop(['ethnicity_grouped_asian', 'ethnicity_grouped_black',
       'ethnicity_grouped_hispanic', 'ethnicity_grouped_unknown',
       'ethnicity_grouped_white'],axis=1)

In [106]:
rf_same.fit(X_train_no_eth,y_train_rus)

In [107]:
y_pred_noeth=rf_same.predict(X_test_onehot_noeth)

In [108]:
precision = precision_score(y_test, y_pred_noeth)
recall = recall_score(y_test, y_pred_noeth)
f1 = f1_score(y_test, y_pred_noeth)
accuracy = accuracy_score(y_test, y_pred_noeth)

In [109]:
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)

Precision: 0.18241042345276873
Recall: 0.8421052631578947
F1 Score: 0.29986613119143235
Accuracy: 0.75


In [110]:
cm = confusion_matrix(y_test,y_pred_noeth)
print(cm)

[[1457  502]
 [  21  112]]


### Time series on Vitals

In [203]:
ct=0
for icu_id in vitals_windowed['icustay_id'].unique():
    icu_data =  vitals_windowed[vitals_windowed['icustay_id'] == icu_id]
    diff = icu_data['charttime'].max() - icu_data['charttime'].min()
    if diff == pd.Timedelta(hours=96):
        ct+=1
        print(f"ICU stay ID {icu_id}: {diff}")
print(ct)

ICU stay ID 241223: 4 days 00:00:00
ICU stay ID 268471: 4 days 00:00:00
ICU stay ID 237632: 4 days 00:00:00
ICU stay ID 241427: 4 days 00:00:00
ICU stay ID 222922: 4 days 00:00:00
ICU stay ID 297937: 4 days 00:00:00
ICU stay ID 273316: 4 days 00:00:00
ICU stay ID 243747: 4 days 00:00:00
ICU stay ID 233463: 4 days 00:00:00
ICU stay ID 278749: 4 days 00:00:00
ICU stay ID 267595: 4 days 00:00:00
ICU stay ID 272965: 4 days 00:00:00
ICU stay ID 256818: 4 days 00:00:00
ICU stay ID 244893: 4 days 00:00:00
ICU stay ID 289064: 4 days 00:00:00
ICU stay ID 201611: 4 days 00:00:00
ICU stay ID 206136: 4 days 00:00:00
ICU stay ID 221592: 4 days 00:00:00
ICU stay ID 236471: 4 days 00:00:00
ICU stay ID 211602: 4 days 00:00:00
ICU stay ID 282294: 4 days 00:00:00
ICU stay ID 219880: 4 days 00:00:00
ICU stay ID 247022: 4 days 00:00:00
ICU stay ID 268435: 4 days 00:00:00
ICU stay ID 219996: 4 days 00:00:00
ICU stay ID 254030: 4 days 00:00:00
ICU stay ID 243656: 4 days 00:00:00
ICU stay ID 254792: 4 days 0

ICU stay ID 203781: 4 days 00:00:00
ICU stay ID 244629: 4 days 00:00:00
ICU stay ID 290237: 4 days 00:00:00
ICU stay ID 246169: 4 days 00:00:00
ICU stay ID 232178: 4 days 00:00:00
ICU stay ID 268364: 4 days 00:00:00
ICU stay ID 234413: 4 days 00:00:00
ICU stay ID 234617: 4 days 00:00:00
ICU stay ID 258214: 4 days 00:00:00
ICU stay ID 262557: 4 days 00:00:00
ICU stay ID 241318: 4 days 00:00:00
ICU stay ID 299956: 4 days 00:00:00
ICU stay ID 269109: 4 days 00:00:00
ICU stay ID 258689: 4 days 00:00:00
ICU stay ID 240294: 4 days 00:00:00
ICU stay ID 250762: 4 days 00:00:00
ICU stay ID 290261: 4 days 00:00:00
ICU stay ID 231490: 4 days 00:00:00
ICU stay ID 208813: 4 days 00:00:00
ICU stay ID 220585: 4 days 00:00:00
ICU stay ID 250276: 4 days 00:00:00
ICU stay ID 241850: 4 days 00:00:00
ICU stay ID 279446: 4 days 00:00:00
ICU stay ID 217557: 4 days 00:00:00
ICU stay ID 257693: 4 days 00:00:00
ICU stay ID 276539: 4 days 00:00:00
ICU stay ID 262244: 4 days 00:00:00
ICU stay ID 212616: 4 days 0

ICU stay ID 265397: 4 days 00:00:00
ICU stay ID 255268: 4 days 00:00:00
ICU stay ID 220469: 4 days 00:00:00
ICU stay ID 266517: 4 days 00:00:00
ICU stay ID 299674: 4 days 00:00:00
ICU stay ID 219266: 4 days 00:00:00
ICU stay ID 280310: 4 days 00:00:00
ICU stay ID 221377: 4 days 00:00:00
ICU stay ID 234272: 4 days 00:00:00
ICU stay ID 297340: 4 days 00:00:00
ICU stay ID 290865: 4 days 00:00:00
ICU stay ID 272217: 4 days 00:00:00
ICU stay ID 221778: 4 days 00:00:00
ICU stay ID 273749: 4 days 00:00:00
ICU stay ID 225206: 4 days 00:00:00
ICU stay ID 217654: 4 days 00:00:00
ICU stay ID 237626: 4 days 00:00:00
ICU stay ID 238842: 4 days 00:00:00
ICU stay ID 247137: 4 days 00:00:00
ICU stay ID 226266: 4 days 00:00:00
ICU stay ID 279495: 4 days 00:00:00
ICU stay ID 216373: 4 days 00:00:00
ICU stay ID 227631: 4 days 00:00:00
ICU stay ID 285684: 4 days 00:00:00
ICU stay ID 220559: 4 days 00:00:00
ICU stay ID 232981: 4 days 00:00:00
ICU stay ID 275655: 4 days 00:00:00
ICU stay ID 215945: 4 days 0

ICU stay ID 276047: 4 days 00:00:00
ICU stay ID 244417: 4 days 00:00:00
ICU stay ID 291557: 4 days 00:00:00
ICU stay ID 263596: 4 days 00:00:00
ICU stay ID 293960: 4 days 00:00:00
ICU stay ID 245346: 4 days 00:00:00
ICU stay ID 230395: 4 days 00:00:00
ICU stay ID 295804: 4 days 00:00:00
ICU stay ID 235557: 4 days 00:00:00
ICU stay ID 260041: 4 days 00:00:00
ICU stay ID 229040: 4 days 00:00:00
ICU stay ID 278062: 4 days 00:00:00
ICU stay ID 200912: 4 days 00:00:00
ICU stay ID 272300: 4 days 00:00:00
ICU stay ID 250668: 4 days 00:00:00
ICU stay ID 258943: 4 days 00:00:00
ICU stay ID 246977: 4 days 00:00:00
ICU stay ID 250857: 4 days 00:00:00
ICU stay ID 202033: 4 days 00:00:00
ICU stay ID 257448: 4 days 00:00:00
ICU stay ID 214654: 4 days 00:00:00
ICU stay ID 226443: 4 days 00:00:00
ICU stay ID 293947: 4 days 00:00:00
ICU stay ID 274169: 4 days 00:00:00
ICU stay ID 239117: 4 days 00:00:00
ICU stay ID 225578: 4 days 00:00:00
ICU stay ID 209038: 4 days 00:00:00
ICU stay ID 205334: 4 days 0

ICU stay ID 228436: 4 days 00:00:00
ICU stay ID 296431: 4 days 00:00:00
ICU stay ID 276506: 4 days 00:00:00
ICU stay ID 276996: 4 days 00:00:00
ICU stay ID 202824: 4 days 00:00:00
ICU stay ID 274614: 4 days 00:00:00
ICU stay ID 201649: 4 days 00:00:00
ICU stay ID 251301: 4 days 00:00:00
ICU stay ID 219750: 4 days 00:00:00
ICU stay ID 218569: 4 days 00:00:00
ICU stay ID 238952: 4 days 00:00:00
ICU stay ID 267226: 4 days 00:00:00
ICU stay ID 258386: 4 days 00:00:00
ICU stay ID 207242: 4 days 00:00:00
ICU stay ID 226807: 4 days 00:00:00
ICU stay ID 287185: 4 days 00:00:00
ICU stay ID 274727: 4 days 00:00:00
ICU stay ID 269632: 4 days 00:00:00
ICU stay ID 240351: 4 days 00:00:00
ICU stay ID 297670: 4 days 00:00:00
ICU stay ID 211965: 4 days 00:00:00
ICU stay ID 238522: 4 days 00:00:00
ICU stay ID 206376: 4 days 00:00:00
ICU stay ID 287879: 4 days 00:00:00
ICU stay ID 214731: 4 days 00:00:00
ICU stay ID 246229: 4 days 00:00:00
ICU stay ID 229355: 4 days 00:00:00
ICU stay ID 273306: 4 days 0

ICU stay ID 264271: 4 days 00:00:00
ICU stay ID 235185: 4 days 00:00:00
ICU stay ID 268729: 4 days 00:00:00
ICU stay ID 291295: 4 days 00:00:00
ICU stay ID 206247: 4 days 00:00:00
ICU stay ID 234114: 4 days 00:00:00
ICU stay ID 225332: 4 days 00:00:00
ICU stay ID 244528: 4 days 00:00:00
ICU stay ID 293738: 4 days 00:00:00
ICU stay ID 284380: 4 days 00:00:00
ICU stay ID 239575: 4 days 00:00:00
ICU stay ID 240740: 4 days 00:00:00
ICU stay ID 221009: 4 days 00:00:00
ICU stay ID 251772: 4 days 00:00:00
ICU stay ID 221591: 4 days 00:00:00
ICU stay ID 215126: 4 days 00:00:00
ICU stay ID 268712: 4 days 00:00:00
ICU stay ID 235464: 4 days 00:00:00
ICU stay ID 289598: 4 days 00:00:00
ICU stay ID 272110: 4 days 00:00:00
ICU stay ID 250887: 4 days 00:00:00
ICU stay ID 288239: 4 days 00:00:00
ICU stay ID 210682: 4 days 00:00:00
ICU stay ID 203499: 4 days 00:00:00
ICU stay ID 212078: 4 days 00:00:00
ICU stay ID 299182: 4 days 00:00:00
ICU stay ID 243011: 4 days 00:00:00
ICU stay ID 284499: 4 days 0

ICU stay ID 256942: 4 days 00:00:00
ICU stay ID 217159: 4 days 00:00:00
ICU stay ID 235337: 4 days 00:00:00
ICU stay ID 271893: 4 days 00:00:00
ICU stay ID 245039: 4 days 00:00:00
ICU stay ID 209314: 4 days 00:00:00
ICU stay ID 293051: 4 days 00:00:00
ICU stay ID 232068: 4 days 00:00:00
ICU stay ID 299884: 4 days 00:00:00
ICU stay ID 200339: 4 days 00:00:00
ICU stay ID 264669: 4 days 00:00:00
ICU stay ID 265020: 4 days 00:00:00
ICU stay ID 245644: 4 days 00:00:00
ICU stay ID 202495: 4 days 00:00:00
ICU stay ID 283359: 4 days 00:00:00
ICU stay ID 235891: 4 days 00:00:00
ICU stay ID 203942: 4 days 00:00:00
ICU stay ID 218288: 4 days 00:00:00
ICU stay ID 293931: 4 days 00:00:00
ICU stay ID 200109: 4 days 00:00:00
ICU stay ID 207502: 4 days 00:00:00
ICU stay ID 282528: 4 days 00:00:00
ICU stay ID 243515: 4 days 00:00:00
ICU stay ID 230084: 4 days 00:00:00
ICU stay ID 213317: 4 days 00:00:00
ICU stay ID 216265: 4 days 00:00:00
ICU stay ID 285317: 4 days 00:00:00
ICU stay ID 281905: 4 days 0

ICU stay ID 266930: 4 days 00:00:00
ICU stay ID 202639: 4 days 00:00:00
ICU stay ID 259860: 4 days 00:00:00
ICU stay ID 251520: 4 days 00:00:00
ICU stay ID 250151: 4 days 00:00:00
ICU stay ID 212123: 4 days 00:00:00
ICU stay ID 267420: 4 days 00:00:00
ICU stay ID 264718: 4 days 00:00:00
ICU stay ID 205017: 4 days 00:00:00
ICU stay ID 275288: 4 days 00:00:00
ICU stay ID 250338: 4 days 00:00:00
ICU stay ID 222154: 4 days 00:00:00
ICU stay ID 270135: 4 days 00:00:00
ICU stay ID 220443: 4 days 00:00:00
ICU stay ID 244131: 4 days 00:00:00
ICU stay ID 267863: 4 days 00:00:00
ICU stay ID 249199: 4 days 00:00:00
ICU stay ID 209343: 4 days 00:00:00
ICU stay ID 281572: 4 days 00:00:00
ICU stay ID 234677: 4 days 00:00:00
ICU stay ID 282747: 4 days 00:00:00
ICU stay ID 298538: 4 days 00:00:00
ICU stay ID 267918: 4 days 00:00:00
ICU stay ID 299210: 4 days 00:00:00
ICU stay ID 285264: 4 days 00:00:00
ICU stay ID 245682: 4 days 00:00:00
ICU stay ID 277323: 4 days 00:00:00
ICU stay ID 252440: 4 days 0

ICU stay ID 255570: 4 days 00:00:00
ICU stay ID 228144: 4 days 00:00:00
ICU stay ID 256911: 4 days 00:00:00
ICU stay ID 260026: 4 days 00:00:00
ICU stay ID 202631: 4 days 00:00:00
ICU stay ID 227186: 4 days 00:00:00
ICU stay ID 288621: 4 days 00:00:00
ICU stay ID 215803: 4 days 00:00:00
ICU stay ID 269154: 4 days 00:00:00
ICU stay ID 234342: 4 days 00:00:00
ICU stay ID 264625: 4 days 00:00:00
ICU stay ID 289784: 4 days 00:00:00
ICU stay ID 217356: 4 days 00:00:00
ICU stay ID 218722: 4 days 00:00:00
ICU stay ID 264273: 4 days 00:00:00
ICU stay ID 257506: 4 days 00:00:00
ICU stay ID 224510: 4 days 00:00:00
ICU stay ID 252741: 4 days 00:00:00
ICU stay ID 240312: 4 days 00:00:00
ICU stay ID 204787: 4 days 00:00:00
ICU stay ID 201386: 4 days 00:00:00
ICU stay ID 202941: 4 days 00:00:00
ICU stay ID 217821: 4 days 00:00:00
ICU stay ID 236169: 4 days 00:00:00
ICU stay ID 231236: 4 days 00:00:00
ICU stay ID 298367: 4 days 00:00:00
ICU stay ID 230386: 4 days 00:00:00
ICU stay ID 214206: 4 days 0

ICU stay ID 242198: 4 days 00:00:00
ICU stay ID 292997: 4 days 00:00:00
ICU stay ID 233810: 4 days 00:00:00
ICU stay ID 299424: 4 days 00:00:00
ICU stay ID 207193: 4 days 00:00:00
ICU stay ID 253009: 4 days 00:00:00
ICU stay ID 272132: 4 days 00:00:00
ICU stay ID 210695: 4 days 00:00:00
ICU stay ID 292595: 4 days 00:00:00
ICU stay ID 218547: 4 days 00:00:00
ICU stay ID 230144: 4 days 00:00:00
ICU stay ID 201076: 4 days 00:00:00
ICU stay ID 266008: 4 days 00:00:00
ICU stay ID 238636: 4 days 00:00:00
ICU stay ID 233792: 4 days 00:00:00
ICU stay ID 226146: 4 days 00:00:00
ICU stay ID 293607: 4 days 00:00:00
ICU stay ID 220052: 4 days 00:00:00
ICU stay ID 205173: 4 days 00:00:00
ICU stay ID 236597: 4 days 00:00:00
ICU stay ID 238528: 4 days 00:00:00
ICU stay ID 231635: 4 days 00:00:00
ICU stay ID 233596: 4 days 00:00:00
ICU stay ID 201241: 4 days 00:00:00
ICU stay ID 266648: 4 days 00:00:00
ICU stay ID 251715: 4 days 00:00:00
ICU stay ID 289643: 4 days 00:00:00
ICU stay ID 244429: 4 days 0

ICU stay ID 230366: 4 days 00:00:00
ICU stay ID 239991: 4 days 00:00:00
ICU stay ID 244821: 4 days 00:00:00
ICU stay ID 255295: 4 days 00:00:00
ICU stay ID 229332: 4 days 00:00:00
ICU stay ID 222581: 4 days 00:00:00
ICU stay ID 280570: 4 days 00:00:00
ICU stay ID 248910: 4 days 00:00:00
ICU stay ID 282222: 4 days 00:00:00
ICU stay ID 294786: 4 days 00:00:00
ICU stay ID 272655: 4 days 00:00:00
ICU stay ID 217488: 4 days 00:00:00
ICU stay ID 247848: 4 days 00:00:00
ICU stay ID 251820: 4 days 00:00:00
ICU stay ID 207381: 4 days 00:00:00
ICU stay ID 278283: 4 days 00:00:00
ICU stay ID 250054: 4 days 00:00:00
ICU stay ID 286939: 4 days 00:00:00
ICU stay ID 235327: 4 days 00:00:00
ICU stay ID 291207: 4 days 00:00:00
ICU stay ID 235312: 4 days 00:00:00
ICU stay ID 225479: 4 days 00:00:00
ICU stay ID 232405: 4 days 00:00:00
ICU stay ID 251695: 4 days 00:00:00
ICU stay ID 288891: 4 days 00:00:00
ICU stay ID 272443: 4 days 00:00:00
ICU stay ID 299834: 4 days 00:00:00
ICU stay ID 251637: 4 days 0

ICU stay ID 265412: 4 days 00:00:00
ICU stay ID 293721: 4 days 00:00:00
ICU stay ID 257835: 4 days 00:00:00
ICU stay ID 211475: 4 days 00:00:00
ICU stay ID 202252: 4 days 00:00:00
ICU stay ID 285921: 4 days 00:00:00
ICU stay ID 226481: 4 days 00:00:00
ICU stay ID 250411: 4 days 00:00:00
ICU stay ID 248982: 4 days 00:00:00
ICU stay ID 297212: 4 days 00:00:00
ICU stay ID 250506: 4 days 00:00:00
ICU stay ID 262074: 4 days 00:00:00
ICU stay ID 217448: 4 days 00:00:00
ICU stay ID 221878: 4 days 00:00:00
ICU stay ID 229297: 4 days 00:00:00
ICU stay ID 272439: 4 days 00:00:00
ICU stay ID 241160: 4 days 00:00:00
ICU stay ID 223952: 4 days 00:00:00
ICU stay ID 245292: 4 days 00:00:00
ICU stay ID 276926: 4 days 00:00:00
ICU stay ID 221438: 4 days 00:00:00
ICU stay ID 283806: 4 days 00:00:00
ICU stay ID 290198: 4 days 00:00:00
ICU stay ID 291951: 4 days 00:00:00
ICU stay ID 215757: 4 days 00:00:00
ICU stay ID 299133: 4 days 00:00:00
ICU stay ID 253734: 4 days 00:00:00
ICU stay ID 287650: 4 days 0

In [118]:
vitals_windowed.head()

Unnamed: 0,icustay_id,charttime,heartrate,sysbp,diasbp,meanbp,resprate,tempc,spo2,glucose,rbc,specificgravity,pedaledema,appetite_median,ckd
0,285731,2163-06-23 10:00:00,107.0,122.0,64.0,81.0,31.0,36.194445,100.0,312.0,3.99,1.02,-1.0,3.0,0
1,285731,2163-06-23 11:00:00,107.0,122.0,64.0,81.0,31.0,36.194445,100.0,312.0,3.99,1.02,-1.0,3.0,0
2,285731,2163-06-23 12:00:00,107.0,122.0,64.0,81.0,30.5,36.194445,100.0,312.0,3.99,1.02,-1.0,3.0,0
3,285731,2163-06-23 13:00:00,94.0,114.0,66.0,84.0,23.0,36.194445,98.0,306.5,3.99,1.02,-1.0,3.0,0
4,285731,2163-06-23 14:00:00,89.0,150.0,93.0,115.0,24.5,36.194445,98.0,306.5,3.99,1.02,-1.0,3.0,0


In [164]:
vv = vitals_windowed
vv['charttime'] = pd.to_datetime(vv['charttime'])
vv['time_diff'] = vv.groupby('icustay_id')['charttime'].diff().dt.total_seconds() / 3600
vv['time_diff'].fillna(0, inplace=True)

column_sum = vv.groupby('icustay_id')['time_diff'].sum()
column_sum_df = column_sum.to_frame().reset_index()
column_sum_df.columns = ['icustay_id', 'time_diff']
#filtered_df = vitals_windowed[vitals_windowed['icustay_id'] == 286937]
column_sum_df.describe()

Unnamed: 0,icustay_id,time_diff
count,8405.0,8405.0
mean,249984.620345,95.661273
std,28585.253432,1.806917
min,200017.0,71.0
25%,225578.0,95.0
50%,250040.0,96.0
75%,274430.0,96.0
max,299992.0,107.0


In [166]:
filtered_df = vitals_windowed[vitals_windowed['icustay_id'] == 286937]
filtered_df

Unnamed: 0,icustay_id,charttime,heartrate,sysbp,diasbp,meanbp,resprate,tempc,spo2,glucose,rbc,specificgravity,pedaledema,appetite_median,ckd,time_diff
398194,286937,2142-08-06 03:00:00,67.0,112.0,39.0,62.0,21.0,37.900002,99.0,173.0,3.99,1.02,-1.0,3.0,0,0.0
398195,286937,2142-08-06 04:00:00,67.0,112.0,39.0,62.0,21.0,37.900002,99.0,173.0,3.99,1.02,-1.0,3.0,0,1.0
398196,286937,2142-08-06 05:00:00,67.0,112.0,39.0,62.0,21.0,37.900002,99.0,173.0,3.99,1.02,-1.0,3.0,0,1.0
398197,286937,2142-08-06 06:00:00,67.0,112.0,39.0,62.0,21.0,37.900002,99.0,173.0,3.99,1.02,-1.0,3.0,0,1.0
398198,286937,2142-08-06 07:00:00,67.0,112.0,39.0,62.0,21.0,37.900002,99.0,173.0,3.99,1.02,-1.0,3.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398261,286937,2142-08-08 22:00:00,67.0,112.0,39.0,62.0,21.0,37.900002,99.0,173.0,3.99,1.02,-1.0,3.0,0,1.0
398262,286937,2142-08-08 23:00:00,67.0,112.0,39.0,62.0,21.0,37.900002,99.0,173.0,3.99,1.02,-1.0,3.0,0,1.0
398263,286937,2142-08-09 00:00:00,67.0,112.0,39.0,62.0,21.0,37.900002,99.0,173.0,3.99,1.02,-1.0,3.0,0,1.0
398264,286937,2142-08-09 01:00:00,67.0,112.0,39.0,62.0,21.0,37.900002,99.0,173.0,3.99,1.02,-1.0,3.0,0,1.0


In [200]:
def RocketForVitals(vitals_windowed):
    vitals_windowed['charttime'] = pd.to_datetime(vitals_windowed['charttime'])
    feature_columns = ['heartrate', 'sysbp', 'diasbp', 'meanbp', 'resprate', 'tempc', 'spo2', 'glucose', 'rbc', 'specificgravity', 'pedaledema', 'appetite_median']
    
    grouped_data = vitals_windowed.sort_values(['icustay_id', 'charttime']).groupby('icustay_id')[feature_columns + ['ckd']]
    
    X = []
    y = []
    for _, group in grouped_data:
        group_values = group[feature_columns].values.T
        num_timestamps = group_values.shape[1]
        
        if num_timestamps < 96:
            padded_values = np.pad(group_values, ((0, 0), (0, 96 - num_timestamps)), 'constant', constant_values=0)
        elif num_timestamps > 96:        
            padded_values = group_values[:, :96]
        else:        
            padded_values = group_values
        
        X.append(padded_values)
        y.append(group['ckd'].iloc[0])
    
    X = np.array(X)
    y = np.array(y)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    rocket = Rocket(num_kernels=100, random_state=42)
    rocket.fit(X_train)
    X_train_transformed = rocket.transform(X_train)
    X_test_transformed = rocket.transform(X_test)
    return X_train_transformed, X_test_transformed

In [201]:
X_train_transformed, X_test_transformed = RocketForVitals(vitals_windowed)

In [202]:
def printEvaluationScores(X_train_transformed, X_test_transformed, y_train, y_test):
    clf = LogisticRegression(random_state=42, max_iter=1000)
    cv_scores = cross_val_score(clf, X_train_transformed, y_train, cv=5)
    print("Cross-validation scores:", cv_scores)
    print("Mean cross-validation score:", np.mean(cv_scores))
    
    clf.fit(X_train_transformed, y_train)
    y_pred = clf.predict(X_test_transformed)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    print("\nClassification report:")
    print(classification_report(y_test, y_pred))

In [195]:
printEvaluationScores(X_train_transformed, X_test_transformed, y_train, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Cross-validation scores: [0.93531599 0.93531599 0.933829   0.93605948 0.9360119 ]
Mean cross-validation score: 0.9353064701717118
Accuracy: 0.9357525282569898

Classification report:
              precision    recall  f1-score   support

           0       0.94      1.00      0.97      1574
           1       0.00      0.00      0.00       107

    accuracy                           0.94      1681
   macro avg       0.47      0.50      0.48      1681
weighted avg       0.88      0.94      0.91      1681



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
