In [1]:
import numpy as np
import pandas as pd
import csv
import os
from SepsisCheck import sepsischeck_utilities_for_pkl as su
from sklearn.metrics import precision_recall_fscore_support as score
#from sklearn.metrics import average_precision_score as score
from sklearn.metrics import classification_report as report
from sklearn.metrics import roc_auc_score as auroc
from sklearn.metrics import confusion_matrix

In [2]:
# load data, mean_stds, predictions, feature mappings
path = "../data/exp1/forecasting_exp1/mimic_iii_preprocessed_forecasting1.pkl"
preds = "../data/exp1/forecasting_preds/forecasting_preds_test/content/4OBS12forecasting_preds_test.pkl"
data = pd.read_pickle(path)
mean_stds = data[0][["variable", "mean", "std"]].drop_duplicates("variable")
preds = pd.read_pickle(preds)
map = list(preds[1].keys())
#sort by ts_ind as that is how the results are sorted
data2 = preds[0].sort_values(by=["ts_ind"]).drop_duplicates("ts_ind")
data1 = data2
IDs = list(data1["ts_ind"].unique())
#make ground truth for scoring, reset index after sorting. Index 0 -> ts_ind 0
ground_truth = data1[["sepsis_label", "ts_ind"]].reset_index(drop=True) #10498 labels sorted by ts_ind from low to high

ground_truth = ground_truth.sort_values(by=["ts_ind"])

# df for holding results
col = ["experiment", "t_sepsis_mean", "24_hour_window", "t_ident", "AUROC", "AUROC_adj","precision_raw", "precision_adj", "recall_raw", "recall_adj", "f1_raw", "f1_adj", "support", "support_adj", "cm", "cm_adj"]
df = pd.DataFrame(columns=col)

In [4]:
from SepsisCheck_forecast import sepsischeck_utilities_for_pkl_forecast_pred_cutoff as scu

In [5]:
feats = []
with open("./features.txt") as file:
    for line in file:
        line = line.strip()
        feats.append(line)
feats.append("Dobutamine")
IDs = scu.get_unique_admissions(preds)

feats

['ALP',
 'ALT',
 'AST',
 'Albumin',
 'Albumin 25%',
 'Albumin 5%',
 'Amiodarone',
 'Anion Gap',
 'Antibiotics',
 'BUN',
 'Base Excess',
 'Basophils',
 'Bicarbonate',
 'Bilirubin (Direct)',
 'Bilirubin (Indirect)',
 'Bilirubin (Total)',
 'Blood Culture',
 'CRR',
 'Calcium Free',
 'Calcium Gluconate',
 'Calcium Total',
 'Cefazolin',
 'Chest Tube',
 'Chloride',
 'Colloid',
 'Creatinine Blood',
 'Creatinine Urine',
 'D5W',
 'DBP',
 'Dextrose Other',
 'Dopamine',
 'EBL',
 'Emesis',
 'Eoisinophils',
 'Epinephrine',
 'Famotidine',
 'Fentanyl',
 'FiO2',
 'Fiber',
 'Free Water',
 'Fresh Frozen Plasma',
 'Furosemide',
 'GCS_eye',
 'GCS_motor',
 'GCS_verbal',
 'GT Flush',
 'Gastric',
 'Gastric Meds',
 'Glucose (Blood)',
 'Glucose (Serum)',
 'Glucose (Whole Blood)',
 'HR',
 'Half Normal Saline',
 'Hct',
 'Height',
 'Heparin',
 'Hgb',
 'Hydralazine',
 'Hydromorphone',
 'INR',
 'Insulin Humalog',
 'Insulin NPH',
 'Insulin Regular',
 'Insulin largine',
 'Intubated',
 'Jackson-Pratt',
 'KCl',
 'KCl (B

In [None]:
data[0]

Unnamed: 0,ts_ind,hour,variable,value,TABLE,mean,std
0,0,0.000000,Age,66.000000,,64.053647,56.625699
1,0,0.000000,Gender,1.000000,,0.438951,0.496263
2,0,0.033333,DBP,-0.517967,chart,59.766756,14.994705
3,0,0.033333,GCS_eye,0.679313,chart,3.274060,1.068640
4,0,0.033333,GCS_motor,0.515191,chart,5.271144,1.414728
...,...,...,...,...,...,...,...
81478793,57281,20.400000,MBP,0.195381,chart,78.552377,17.645628
81478794,57281,20.400000,O2 Saturation,-0.678068,chart,96.820961,4.160290
81478795,57281,20.400000,RR,0.179866,chart,26.278501,15.130729
81478796,57281,20.400000,SBP,-0.404061,chart,120.239648,25.341836


In [30]:
feats = scu.get_features_for_sepsischeck()
#only run on ids that we have predictions for
IDs = scu.get_unique_admissions(preds)
feats = ['GCS_motor',
 'GCS_eye',
 'GCS_verbal','Platelet Count',
 'Bilirubin (Total)',
 'Creatinine Urine',
 'DBP',
 'SBP',
 'Urine']

In [7]:
list(ground_truth["sepsis_label"][:10])

[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]

In [6]:
def restore_values(normalized, mean, std):
    """
    the preprocessing script normalizes values by '(ts.loc[ii, 'value']-ts.loc[ii, 'mean'])/ts.loc[ii, 'std'] -> normalized = (value - mean) / std -> value = normalized * std + mean'
    """
    return (normalized * std) + mean

def restore_predictions(predictions, mapping=map, mean_stds=mean_stds):
    l = []
    #print(len(predictions[0]), len(predictions[0]) / 133 )
    leng = len(predictions.iloc[0]) / 133
    #reshape the predictions into sets of 133 variables per hour
    for i in range(len(predictions)):
        arr = np.asarray(predictions.iloc[i]).reshape((int(leng),133))
        l.append(arr)
    df = pd.concat([pd.DataFrame(arr) for arr in l], keys=np.arange(len(l)))
    
    """#renormalize values per variable
    for j in range(133):#, value in enumerate(pred): #for each predicted variable within predicted hour within observation window
        var = mapping[j]
        #print(var)
        mean = mean_stds["mean"].loc[mean_stds["variable"] == var].item()
        std = mean_stds["std"].loc[mean_stds["variable"] == var].item()
        df[j] = df[j].apply(restore_values, args=(mean, std))"""
    df.columns=mapping
    return df

In [9]:
# load data full patient data as singular sequence of variables (for classification on time series) 
datalist2 = []
for ts_ind in scu.tqdm(IDs[:7000], leave=True):
    # get patient data
    df_raw = data[0].loc[data[0]["ts_ind"] == ts_ind]   
    predicted = df_raw[["obs_window", "forecasting_pred"]]
    predictions = restore_predictions(
            predicted["forecasting_pred"], mapping=map, mean_stds=mean_stds
        )
    
    #df = scu.prepare_strats_for_sepsis(df_raw, feats)
    #df = scu.fill_data(df, False)
    #df = df.drop(["blood_culture", "anti", "mech", "text"], axis=1)
    #df = df.reindex(sorted(df.columns), axis=1)
    #df.index = np.rint(df.index)
    #df.groupby("hour").mean()
    #datalist2.append(list(df.values.flatten()))
    predictions = predictions.reindex(sorted(predictions.columns), axis=1)
    datalist2.append(list(predictions.values.flatten()))
# deal with true and false    
mapping = {"True": 1, "False": 0}
datalist = []
for string in datalist2:
    datalist.append([mapping.get(x, x) for x in string])
predictions

  0%|          | 0/7000 [00:00<?, ?it/s]

100%|██████████| 7000/7000 [00:08<00:00, 833.43it/s]


Unnamed: 0,Unnamed: 1,ALP,ALT,AST,Albumin,Albumin 25%,Albumin 5%,Amiodarone,Anion Gap,Antibiotics,BUN,...,Total CO2,Ultrafiltrate,Unknown,Urine,Vancomycin,Vasopressin,WBC,Weight,pH Blood,pH Urine
0,0,-1.101284,-1.014427,-0.52373,0.420757,1.200858,-0.98842,2.510878,-0.371443,0.998616,-0.41581,...,1.056103,8.69489,-0.052206,0.681915,0.028165,1.427152,-0.534683,-0.450255,-1.15307,0.518613
0,1,-1.002865,-0.663647,-0.386386,0.313671,0.604375,-0.974044,2.235154,-0.820001,0.998133,-0.795485,...,1.209445,7.175538,-0.431301,0.850762,-0.073725,0.924427,-0.358182,-0.393979,-0.834589,0.390203
0,2,-0.991415,-0.574903,-0.346113,0.329304,0.562178,-1.014654,2.154635,-0.900549,0.998101,-0.93981,...,1.296959,7.102904,-0.577562,0.868316,-0.084471,0.757153,-0.362649,-0.479172,-0.78735,0.423409
0,3,-0.993957,-0.486125,-0.275525,0.393127,0.537117,-1.022254,2.143289,-0.917201,0.998004,-1.022066,...,1.359658,6.794025,-0.643185,0.80924,-0.089363,0.698698,-0.346855,-0.537914,-0.790834,0.440414
0,4,-1.003973,-0.401307,-0.198228,0.474632,0.52417,-1.003469,2.149922,-0.885611,0.99785,-1.080612,...,1.452738,6.319776,-0.70004,0.710715,-0.089702,0.67152,-0.325992,-0.573125,-0.810235,0.419267
0,5,-0.960633,-0.320211,-0.124473,0.447396,0.533642,-0.961801,2.123804,-0.834386,0.997721,-1.081393,...,1.486663,5.664063,-0.73614,0.537817,-0.09188,0.697553,-0.37809,-0.57808,-0.780137,0.332772
0,6,-0.907821,-0.269046,-0.083177,0.33092,0.512093,-0.961738,2.069494,-0.877621,0.997663,-1.060831,...,1.480301,5.401171,-0.7257,0.428626,-0.104321,0.671695,-0.415848,-0.552633,-0.706082,0.278089
0,7,-0.839953,-0.286512,-0.071055,0.297109,0.484002,-0.98674,2.074739,-0.895762,0.997681,-1.109492,...,1.507592,5.466467,-0.72759,0.403497,-0.121525,0.587974,-0.415917,-0.556024,-0.648932,0.292267
0,8,-0.772487,-0.311277,-0.054677,0.270687,0.492043,-0.991954,2.132844,-0.884255,0.997691,-1.145228,...,1.505033,5.502867,-0.737237,0.366669,-0.133018,0.541133,-0.424946,-0.549634,-0.499363,0.298909
0,9,-0.734242,-0.314998,0.015088,0.223051,0.513029,-0.965527,2.157679,-0.830218,0.997652,-1.145533,...,1.423444,5.414445,-0.72774,0.346196,-0.128901,0.511965,-0.427956,-0.480494,-0.31535,0.262295


In [11]:
cols = list(predictions.columns)
len(cols)

133

In [12]:
from collections import Counter

In [13]:
# count lengths and show percentages to find suitable cutoff (all data needs to be same length): The lenghts are all features * all timesteps, since it is flattened for classification of time series
list_of_lengths = (lambda x:[len(i) for i in x])(datalist)
c = Counter(list_of_lengths)
[(i, c[i] / len(list_of_lengths) * 100.0) for i in c]
[(i, c[i] / len(list_of_lengths) * 100.0) for i, count in c.most_common()]



[(1596, 100.0)]

In [14]:
# pick cutoff based on above, trim patient data and ground truths accordingly (only keep relevant)
thresh = 12*len(cols) #length of forecast * number of features
cutoff = list(filter(lambda i: len(i) >= thresh, datalist))
ground_truth_cutoff = [ground_truth["sepsis_label"][x] for x, i in enumerate(datalist) if len(i) >= thresh]

datalist_trimmed = [element[:thresh] for element in cutoff]


In [15]:
print(len(datalist_trimmed),len(ground_truth_cutoff))

7000 7000


In [16]:
# check that min len is what we intended
list_of_lengths2 = (lambda x:[len(i) for i in x])(datalist_trimmed)
min_len2 = min(list_of_lengths2)
min_len2

1596

In [17]:
import random
# data splitting
# Shuffle two lists with same order
# Using zip() + * operator + shuffle()
temp = list(zip(datalist_trimmed, ground_truth_cutoff))
random.shuffle(temp)
dataset, truths = zip(*temp)
# res1 and res2 come out as tuples, and so must be converted to lists.
dataset, truths = list(dataset), list(truths)
# remove x% data from dataset and truths for testing
train = dataset[:5500]
train_Y = truths[:5500]

test = dataset[len(train):]
test_Y = truths[len(train):]

In [18]:
len(train_Y) + len(test_Y)

7000

In [21]:
# load forecast data full patient as singular sequence of variables (for classification on time series) 
pdatalist2 = []
for ts_ind in scu.tqdm(IDs[7000:], leave=True):
    # get prediction
    pre = preds[0].loc[preds[0]["ts_ind"] == ts_ind]
    # get all observation windows and forecasting predictions
    predicted = pre[["obs_window", "forecasting_pred"]]
    predictions = restore_predictions(
            predicted["forecasting_pred"], mapping=map, mean_stds=mean_stds
        )
    
    predictions = predictions[predictions.columns.intersection(cols)]
    #print(predictions)
    #predictions = predictions.drop(["Dobutamine"], axis=1)
    predictions = predictions.reindex(sorted(predictions.columns), axis=1)
    pdatalist2.append(list(predictions.values.flatten()))
# deal with true and false    
mapping = {"True": 1, "False": 0}
pdatalist = []
for string in pdatalist2:
    pdatalist.append([mapping.get(x, x) for x in string])


100%|██████████| 3557/3557 [00:05<00:00, 676.75it/s]


In [22]:
len(predictions.columns)

133

In [23]:
# count lengths and show percentages to find suitable cutoff (all data needs to be same length): The lenghts are all features * all timesteps, since it is flattened for classification of time series
list_of_lengths = (lambda x:[len(i) for i in x])(pdatalist)
c = Counter(list_of_lengths)
[(i, c[i] / len(list_of_lengths) * 100.0) for i in c]
[(i, c[i] / len(list_of_lengths) * 100.0) for i, count in c.most_common()]

[(1596, 100.0)]

In [24]:
from sklearn.svm import LinearSVC
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [51]:
# macro f1: 0.56
#clf = make_pipeline(StandardScaler(),HistGradientBoostingClassifier(class_weight="balanced", max_iter=1500, min_samples_leaf=50, max_leaf_nodes=150, random_state=100, validation_fraction=0.1, verbose=1, l2_regularization=0.01, early_stopping="auto"))

# macro f1: .57
#clf1 = make_pipeline(StandardScaler(),HistGradientBoostingClassifier(class_weight="balanced", max_iter=1500, min_samples_leaf=50, max_leaf_nodes=150, random_state=100, validation_fraction=0.1, verbose=1, l2_regularization=0.05, early_stopping="auto"))

########## train on forecasts
# macro f1: .6
#clf = make_pipeline(StandardScaler(),HistGradientBoostingClassifier(class_weight="balanced", max_iter=1500, min_samples_leaf=50, max_leaf_nodes=150, random_state=100, validation_fraction=0.1, verbose=1, l2_regularization=0.01, early_stopping="auto"))

# macro f1: .6
clf = make_pipeline(StandardScaler(),HistGradientBoostingClassifier(class_weight="balanced", max_iter=500, min_samples_leaf=100, max_leaf_nodes=250, scoring='f1_macro',random_state=100, validation_fraction=0.1, verbose=1, l2_regularization=0.05, early_stopping="auto"))

In [52]:
from sklearn.model_selection import GridSearchCV
#parameters = {'scoring':['f1_macro'],'class_weight':['balanced'], 'max_iter':[250, 500, 750], 'min_samples_leaf':[20, 50, 100, 150], 'max_leaf_nodes':[31, 50, 100, 200], 'l2_regularization':[0.1, 0.01], 'random_state':[100], 'learning_rate':[0.1, 0.01, 0.001]}
#hGB =  HistGradientBoostingClassifier()
#clf = GridSearchCV(hGB, parameters, verbose=1)
clf.fit(train, train_Y)

Binning 0.070 GB of training data: 1.107 s
Fitting gradient boosted rounds:
[1/500] 1 tree, 36 leaves, max depth = 8, in 0.307s
[2/500] 1 tree, 31 leaves, max depth = 10, in 0.254s
[3/500] 1 tree, 34 leaves, max depth = 9, in 0.253s
[4/500] 1 tree, 34 leaves, max depth = 9, in 0.253s
[5/500] 1 tree, 36 leaves, max depth = 8, in 0.294s
[6/500] 1 tree, 37 leaves, max depth = 9, in 0.319s
[7/500] 1 tree, 37 leaves, max depth = 9, in 0.277s
[8/500] 1 tree, 37 leaves, max depth = 10, in 0.306s
[9/500] 1 tree, 36 leaves, max depth = 10, in 0.294s
[10/500] 1 tree, 39 leaves, max depth = 11, in 0.306s
[11/500] 1 tree, 35 leaves, max depth = 12, in 0.284s
[12/500] 1 tree, 43 leaves, max depth = 10, in 0.327s
[13/500] 1 tree, 37 leaves, max depth = 11, in 0.290s
[14/500] 1 tree, 38 leaves, max depth = 9, in 0.279s
[15/500] 1 tree, 39 leaves, max depth = 13, in 0.320s
[16/500] 1 tree, 40 leaves, max depth = 11, in 0.322s
[17/500] 1 tree, 40 leaves, max depth = 11, in 0.321s
[18/500] 1 tree, 43 le

In [None]:
grid = pd.DataFrame(clf.cv_results_)
grid


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_class_weight,param_l2_regularization,param_max_iter,param_max_leaf_nodes,param_min_samples_leaf,param_random_state,param_scoring,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.957123,0.010389,0.004400,0.000491,balanced,0.1,100,31,10,100,f1_macro,"{'class_weight': 'balanced', 'l2_regularizatio...",0.893491,0.890533,0.896450,0.893175,0.893175,0.893365,0.001878,39
1,1.751053,0.010186,0.004611,0.000499,balanced,0.1,100,31,20,100,f1_macro,"{'class_weight': 'balanced', 'l2_regularizatio...",0.905325,0.878698,0.872781,0.878338,0.878338,0.882696,0.011527,185
2,0.998498,0.008124,0.004200,0.000749,balanced,0.1,100,31,50,100,f1_macro,"{'class_weight': 'balanced', 'l2_regularizatio...",0.884615,0.878698,0.869822,0.860534,0.857567,0.870247,0.010326,231
3,0.554790,0.006007,0.003796,0.000752,balanced,0.1,100,31,100,100,f1_macro,"{'class_weight': 'balanced', 'l2_regularizatio...",0.881657,0.857988,0.837278,0.804154,0.824926,0.841201,0.026725,271
4,0.387840,0.009159,0.004202,0.000398,balanced,0.1,100,31,150,100,f1_macro,"{'class_weight': 'balanced', 'l2_regularizatio...",0.816568,0.834320,0.792899,0.780415,0.771513,0.799143,0.023213,296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,9.159856,0.225768,0.019000,0.002281,balanced,0.001,750,200,10,100,f1_macro,"{'class_weight': 'balanced', 'l2_regularizatio...",0.899408,0.890533,0.887574,0.905045,0.899110,0.896334,0.006378,9
296,8.640234,0.177233,0.018328,0.002044,balanced,0.001,750,200,20,100,f1_macro,"{'class_weight': 'balanced', 'l2_regularizatio...",0.896450,0.890533,0.890533,0.910979,0.893175,0.896334,0.007639,3
297,7.549422,0.107907,0.016539,0.001766,balanced,0.001,750,200,50,100,f1_macro,"{'class_weight': 'balanced', 'l2_regularizatio...",0.905325,0.884615,0.878698,0.899110,0.887240,0.890998,0.009770,73
298,5.016244,0.122234,0.018847,0.005627,balanced,0.001,750,200,100,100,f1_macro,"{'class_weight': 'balanced', 'l2_regularizatio...",0.905325,0.890533,0.893491,0.872404,0.872404,0.886831,0.012778,142


In [None]:
clf.cv_results_['params'][clf.best_index_]

{'class_weight': 'balanced',
 'l2_regularization': 0.001,
 'max_iter': 500,
 'max_leaf_nodes': 31,
 'min_samples_leaf': 20,
 'random_state': 100,
 'scoring': 'f1_macro'}

In [49]:
#h = clf.best_estimator_
print("Score:", clf.score(test, test_Y))
pr = clf.predict(test)
print(report(y_true=test_Y, y_pred=pr))

Score: 0.91
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      1367
           1       0.46      0.10      0.16       133

    accuracy                           0.91      1500
   macro avg       0.69      0.54      0.56      1500
weighted avg       0.88      0.91      0.88      1500



In [50]:
pr = clf.predict(pdatalist)
print("Score:",clf.score(pdatalist, list(ground_truth["sepsis_label"][7000:])))
print("Prediction Preview:\n",pr[:100])
print(report(y_true=list(ground_truth["sepsis_label"][7000:]), y_pred=pr))

Score: 0.9111610908068597
Prediction Preview:
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0       0.92      0.98      0.95      3244
           1       0.49      0.16      0.24       313

    accuracy                           0.91      3557
   macro avg       0.70      0.57      0.60      3557
weighted avg       0.89      0.91      0.89      3557



In [68]:
clf.predict_proba(np.array(test[5]).reshape(1,-1))

array([[0.91722849, 0.08277151]])

In [23]:
test_Y

[1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,


In [27]:
list(ground_truth["sepsis_label"][8557:])

[0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
