# Sepsis ML Real Time Implementation
Jupyter notebook created to display sepsis_ml_run.py


In [1]:
# import all libraries
import sepsis_ml as ml
import numpy as np
import pandas as pd
import argparse
import logging
from datetime import datetime, timedelta
import requests
from requests.auth import HTTPBasicAuth
import json
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500
pd.options.display.float_format = '{:,.4f}'.format

In [2]:
# will not print the following columns to this notebook due to PHI
hide = ["csn", "pat_id", "age", "is_Female", "is_asian", "is_white", "is_black", "is_other", "hospital_discharge_date_time", "hospital_admission_date_time"]

## API Connection Helper Functions

In [88]:
# GET API Call
def connect(url, username, password):
    response = requests.get(url,auth=HTTPBasicAuth(username, password), verify=True)
    data = json.loads(response.content)
    
    hasMore = data["hasMore"]
    offset = data["offset"]

    df = pd.DataFrame(data['items'])
   
    while hasMore:
        querystring = {"offset": offset+ 1000}
        response = requests.get(url,auth=HTTPBasicAuth(username, password), verify=True, params = querystring)
        data = json.loads(response.content)

        hasMore = data["hasMore"]
        offset = data["offset"]

        df = pd.concat([df,pd.DataFrame(data['items'])])

    return df

In [3]:
# POST API Call
def upload(url, username, password, output):
    response = requests.post(url,headers={"Content-Type":"application/json"},auth=HTTPBasicAuth(username, password), data = output)
    print(response)

In [4]:
def select_process_data(test_data, current_time):
    
    # 1) filter only encounters within 48 hour from current time
    final_test = test_data.copy()
    
    final_test.loc[:,"keep"] = (current_time - final_test["hospital_admission_date_time"]).dt.total_seconds()/3600
    keep_csns_filter1 = final_test.loc[final_test["keep"] <= 49].csn.unique()
    final_test = final_test[final_test["csn"].isin(keep_csns_filter1)]
    
    # 2) filter data within 0-48 hour since admission
    final_test["keep"] = (final_test["recorded_time"] - final_test["hospital_admission_date_time"]).dt.total_seconds()/3600.0
    final_test["keep"] = (final_test["keep"] <=49) & (final_test["keep"] >= 0)
    final_test = final_test.copy()
    final_test = final_test.loc[final_test.keep, :].reset_index(drop = True)
    final_test.drop(["keep"], axis = 1, inplace = True)
    
    return final_test

In [68]:
total_feature_list = ['HR','O2Sat','Temp','SBP','DBP','MAP','Resp','EtCO2',\
                'AST','Alkalinephos','BUN','BaseExcess','Bilirubin_total',\
                'Calcium','Chloride','Creatinine','FiO2','Glucose',\
                'HCO3','Hct','Hgb','Lactate','Magnesium',\
                'PTT', 'PaCO2','PaO2','Phosphate','Platelets','Potassium',\
                'SaO2','Sodium','WBC','pH',\
                'gcs_total_score','age','is_female',\
                'HR_interval_f1','HR_interval_f2','HR_diff',\
                'O2Sat_interval_f1','O2Sat_interval_f2','O2Sat_diff',\
                'Temp_interval_f1','Temp_interval_f2','Temp_diff',\
                'SBP_interval_f1','SBP_interval_f2','SBP_diff',\
                'MAP_interval_f1','MAP_interval_f2','MAP_diff',\
                'DBP_interval_f1','DBP_interval_f2','DBP_diff',\
                'Resp_interval_f1','Resp_interval_f2','Resp_diff',\
                'EtCO2_interval_f1','EtCO2_interval_f2','EtCO2_diff',\
                'AST_interval_f1','AST_interval_f2','AST_diff',\
                'Alkalinephos_interval_f1','Alkalinephos_interval_f2','Alkalinephos_diff',\
                'BUN_interval_f1','BUN_interval_f2','BUN_diff',\
                'BaseExcess_interval_f1','BaseExcess_interval_f2','BaseExcess_diff',\
                'Bilirubin_total_interval_f1','Bilirubin_total_interval_f2','Bilirubin_total_diff',\
                'Calcium_interval_f1','Calcium_interval_f2','Calcium_diff',\
                'Chloride_interval_f1','Chloride_interval_f2','Chloride_diff',\
                'Creatinine_interval_f1','Creatinine_interval_f2','Creatinine_diff',\
                'FiO2_interval_f1','FiO2_interval_f2','FiO2_diff',\
                'Glucose_interval_f1','Glucose_interval_f2','Glucose_diff',\
                'HCO3_interval_f1','HCO3_interval_f2','HCO3_diff',\
                'Hct_interval_f1','Hct_interval_f2','Hct_diff',\
                'Hgb_interval_f1','Hgb_interval_f2','Hgb_diff',\
                'Lactate_interval_f1','Lactate_interval_f2','Lactate_diff',\
                'Magnesium_interval_f1','Magnesium_interval_f2','Magnesium_diff',\
                'PTT_interval_f1','PTT_interval_f2','PTT_diff',\
                'PaCO2_interval_f1','PaCO2_interval_f2','PaCO2_diff',\
                'PaO2_interval_f1','PaO2_interval_f2','PaO2_diff',\
                'Phosphate_interval_f1','Phosphate_interval_f2','Phosphate_diff',\
                'Platelets_interval_f1','Platelets_interval_f2','Platelets_diff',\
                'Potassium_interval_f1','Potassium_interval_f2','Potassium_diff',\
                'SaO2_interval_f1','SaO2_interval_f2','SaO2_diff',\
                'Sodium_interval_f1','Sodium_interval_f2','Sodium_diff',\
                'WBC_interval_f1','WBC_interval_f2','WBC_diff',\
                'pH_interval_f1','pH_interval_f2','pH_diff',\
                'HR_mean','O2Sat_mean','SBP_mean','MAP_mean','Resp_mean',\
                'HR_median','O2Sat_median','SBP_median','MAP_median','Resp_median',\
                'HR_min','O2Sat_min','SBP_min','MAP_min','Resp_min',\
                'HR_max','O2Sat_max','SBP_max','MAP_max','Resp_max',\
                'HR_std','O2Sat_std','SBP_std','MAP_std','Resp_std',\
                'HR_dstd','O2Sat_dstd','SBP_dstd','MAP_dstd','Resp_dstd',\
                'HR_score','Temp_score','Resp_score','MAP_score','Creatinine_score','qsofa','Platelets_score','Bilirubin_score','SIRS']

In [62]:
def get_hist_data(test_data, current_time):
    
    hist_data = test_data.copy()
    hist_data["past_time"] = (current_time - hist_data["recorded_time"]).dt.total_seconds()/3600
    
    g= total_feature_list[:34]
   
    hist_data[g] = hist_data[g].notnull().astype('int')

    t = hist_data["past_time"].values
    t = np.vstack([t]*len(g))

    hist_data[g] = np.where(hist_data[g] == 1, np.transpose(t), hist_data[g])

    hist_data.replace(0, np.nan, inplace = True)
    
    hist_data[g] = hist_data.groupby(by = ["csn", "pat_id"])[g].fillna(method = 'ffill')
    
    hist_data.drop_duplicates(subset = ["csn", "pat_id"], keep = "last", inplace = True)
    
    add_cols = total_feature_list[34:]
    
    nan_df = pd.DataFrame(columns = add_cols)
    hist_data = pd.concat([hist_data, nan_df], axis =1)
    hist_data.iloc[:,3:] = round(hist_data.iloc[:,3:],3)
    
    return hist_data

In [82]:
def run(test_data):
    
    dtypes = {"pat_id": int, "csn": int, "HR": float, "O2Sat": float, "Temp": float, 
             "SBP": float, "DBP": float, "Resp": float, "EtCO2": float, 
             "AST": float, "Alkalinephos": float, "BUN": float, "BaseExcess": float,
             "Bilirubin_total": float, "Calcium": float, "Creatinine": float,
             "FiO2": float, "Glucose": float, "HCO3": float, "Hct": float, 
             "Hgb": float, "Lactate": float, "Magnesium": float, "PTT": float,
             "PaCO2": float, "PaO2": float, "Phosphate": float, "Platelets": float,
             "Potassium": float, "SaO2": float, "Sodium": float, "WBC": float,
             "pH": float, "gcs_total_score": float, "age": float, "gender": int}
    
    #test_data = pd.read_csv(test_data_path, dtype = dtypes, parse_dates= ["recorded_time", "hospital_admission_date_time"])
    
    thresh = {"HR": (0,250),
                 "O2Sat": (0,100),
                 "Temp": (25,45),
                 "SBP": (0,260),
                 "DBP": (0, 220),
                 "MAP": (0,260),
                 'Resp': (0,80),
                 'EtCO2': (0, 60),
                  "pH": (6.7, 8),
                  "PaCO2": (15, 150),
                  "SaO2": (0,100),
                  "AST": (0, 10000),
                  "BUN": (0,200),
                  "Alkalinephos": (0, 10000),
                  "Calcium": (0,20),
                  "Chloride": (60,150),
                  "Creatinine": (0, 15),
                  "Glucose": (0, 1200),
                  "Lactate": (0,20),
                  "Magnesium": (0,10), 
                  "Phosphate": (0,20),
                  "Potassium": (0,10),
                  "Bilirubin_total": (0,30),
                  "Hct": (0, 75),
                   "Hgb": (0,25),
                   "PTT": (0,150),
                   "WBC": (0,150),
                   "Platelets": (0,1000)}

    test_data = ml.data_clean(test_data, thresh)
    
    variables = total_feature_list[:34]
    
    demographics = ["pat_id", "csn", "age", "is_female"]
    
    stat = test_data[demographics].drop_duplicates()
    
    test_data = test_data.drop(["age", "is_female"], axis = 1)
    
    test_data = ml.resampling(test_data)
    test_data = test_data.groupby(["pat_id", "csn"]).apply(lambda v: ml.rolling_overlap(v, 6, variables, 3))
    test_data = test_data.drop(["pat_id", "csn"], axis = 1).reset_index(drop = False).rename(columns = {"level_2" : "los"})
    test_data = test_data.merge(stat, on = ["pat_id", "csn"], how = "left")
  
    return test_data

# Main Function

## 1. Preprocess

### 1.0 Read Data

In [90]:
# getting current time
current_time = pd.to_datetime(str(pd.Timestamp.now())[:-7])
print(current_time)

2022-08-13 22:27:36


In [None]:
# credidentials for API
username = 'Sepsis_ML'
password = 'jfVDS756F$jkf&@*'

# receive input data with GET
url = 'https://prd-rta-app01.eushc.org:8443/ords/rta/sepsisml/derivedcache'
test_data = connect(url, username, password)
    
print("completed reading")

In [106]:
test_data = pd.read_csv("sample_from_API_GET.csv")

In [107]:
test_data.iloc[:,-4:]

Unnamed: 0,display,result_val,valid_from_dt_tm,event_start_dt_tm
0,Heart Rate,101.0000,2022-08-12T23:38:35Z,2022-08-12T22:00:00Z
1,Heart Rate,102.0000,2022-08-12T23:38:35Z,2022-08-12T23:00:00Z
2,Heart Rate,106.0000,2022-08-13T00:17:32Z,2022-08-13T00:16:00Z
3,Heart Rate,107.0000,2022-08-13T03:03:50Z,2022-08-13T00:00:00Z
4,Heart Rate,104.0000,2022-08-13T03:03:50Z,2022-08-13T01:00:00Z
...,...,...,...,...
4995,"MAP, Cuff",73.0000,2022-08-13T04:10:53Z,2022-08-13T04:06:00Z
4996,"MAP, Cuff",65.0000,2022-08-13T07:40:33Z,2022-08-13T07:37:00Z
4997,"MAP, Cuff",77.0000,2022-08-13T12:12:35Z,2022-08-13T12:11:00Z
4998,"MAP, Cuff",83.0000,2022-08-13T15:52:15Z,2022-08-13T15:51:00Z


In [108]:
# convert name mapping used in later step
sepsis_ml_map = pd.read_csv("sepsis_ml_map.csv")
sepsis_ml_map = sepsis_ml_map[["Category", "event_cd"]]
sepsis_ml_map

Unnamed: 0,Category,event_cd
0,PTT,1513062
1,PTT,1513062
2,Calcium,1514676
3,Calcium,1514676
4,Magnesium,257696134
5,Magnesium,257696134
6,AST,1514885
7,BaseExcess,116559600
8,HCO3,665495419
9,HCO3,665486990


### 1.1 Clean Data
- column name changes
- convert numerics, datetime
- filter only <= 48 hour data since hospital admission

In [109]:
# renaming
test_data = test_data.rename(columns = {"person_id": "pat_id", "encntr_id": "csn",
                                            "gender_disp": "is_female",
                                            "arrive_dt_tm": "hospital_admission_date_time",
                                            "event_start_dt_tm": "recorded_time"})

# convert days to ET but w/o time zone
for col in ["recorded_time", "hospital_admission_date_time", "birth_dt_tm"]:
    test_data[col] = pd.to_datetime(test_data[col]).dt.tz_convert('US/Eastern')
    test_data[col] = test_data[col].dt.tz_localize(None)
# convert numeric values
test_data["result_val"] = pd.to_numeric(test_data["result_val"], errors='coerce')
test_data["pat_id"] = pd.to_numeric(test_data["pat_id"], errors='coerce')
test_data["csn"] = pd.to_numeric(test_data["csn"], errors='coerce')

In [110]:
# filter only 48 hour data
test_data  = select_process_data(test_data, current_time)

### 1.2 Pivot Columns


In [111]:
# derive age
test_data["age"] = np.floor((test_data["hospital_admission_date_time"] - test_data["birth_dt_tm"]).dt.total_seconds() / (60 * 60 * 24 * 365))


### STATIC DATA EXTRACTION ###
static = test_data[["csn", "pat_id", "is_female", "hospital_admission_date_time","age"]].copy()
static = static.drop_duplicates()
static.loc[~(static["is_female"].isin(["Female", "Male"])), "is_female"] = np.nan
static["is_female"] = static["is_female"].replace({"Female": 1, "Male" : 0}).fillna(-1)

    
### LONGITUDINAL DATA EXTRACTION ###
test_data = test_data.merge(sepsis_ml_map, on = "event_cd", how = "left")
test_data = test_data.pivot_table(index=['csn', "pat_id", "recorded_time"], columns='Category', values='result_val')
test_data = test_data.reset_index()
    

In [112]:
test_data = test_data.merge(static, on = ["csn", "pat_id"], how = "left")
    
for c in list(sepsis_ml_map.Category.unique()):
    if not (c in test_data.columns): 
        test_data[c] = np.nan
            
            
# Priority given to line measurements

test_data["SBP"] = test_data["SBP"].fillna(test_data["SBP_Cuff"])
test_data["MAP"] = test_data["MAP"].fillna(test_data["MAP_Cuff"])
test_data["DBP"] = test_data["DBP"].fillna(test_data["DBP_Cuff"])

In [113]:
test_data.loc[:, ~test_data.columns.isin(hide)].head(5)

Unnamed: 0,recorded_time,AST,Alkalinephos,BUN,Bilirubin_total,Calcium,Chloride,Creatinine,DBP_Cuff,EtCO2,Glucose,HCO3,HR,Hct,Hgb,MAP_Cuff,Magnesium,O2Sat,PTT,PaCO2,PaO2,Phosphate,Platelets,Potassium,Resp,SBP_Cuff,Sodium,Temp,WBC,gcs_total_score,pH,is_female,BaseExcess,Lactate,SaO2,FiO2,Fibrinogen,SBP,DBP,MAP
0,2022-08-12 15:22:00,,,,,,,,92.0,,,,89.0,,,114.0,,100.0,,,,,,,16.0,159.0,,36.6,,,,0,,,,,,159.0,92.0,114.0
1,2022-08-12 15:30:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,15.0,,0,,,,,,,,
2,2022-08-12 16:16:00,,,,,,,,85.0,,,,83.0,,,107.0,,96.0,,,,,,,17.0,153.0,,36.7,,,,0,,,,,,153.0,85.0,107.0
3,2022-08-12 21:55:00,,,,,,,,80.0,,,,86.0,,,99.0,,100.0,,,,,,,16.0,138.0,,37.3,,,,0,,,,,,138.0,80.0,99.0
4,2022-08-12 23:58:00,,,10.0,,8.6,105.0,0.89,,,131.0,,,36.3,11.5,,,,,,,,369.0,4.2,,,137.0,,7.2,,,0,,,,,,,,


## (Only for RealTime) Extract last known values for later use

In [114]:
temp_test_data = test_data.loc[:,["pat_id", "csn", "recorded_time"] + total_feature_list[:34]].copy()

In [115]:
hist_data = get_hist_data(temp_test_data, current_time).reset_index(drop = True)

In [116]:
hist_data.loc[:, ~hist_data.columns.isin(hide)].head(5)

Unnamed: 0,recorded_time,HR,O2Sat,Temp,SBP,DBP,MAP,Resp,EtCO2,AST,Alkalinephos,BUN,BaseExcess,Bilirubin_total,Calcium,Chloride,Creatinine,FiO2,Glucose,HCO3,Hct,Hgb,Lactate,Magnesium,PTT,PaCO2,PaO2,Phosphate,Platelets,Potassium,SaO2,Sodium,WBC,pH,gcs_total_score,past_time,is_female,HR_interval_f1,HR_interval_f2,HR_diff,O2Sat_interval_f1,O2Sat_interval_f2,O2Sat_diff,Temp_interval_f1,Temp_interval_f2,Temp_diff,SBP_interval_f1,SBP_interval_f2,SBP_diff,MAP_interval_f1,MAP_interval_f2,MAP_diff,DBP_interval_f1,DBP_interval_f2,DBP_diff,Resp_interval_f1,Resp_interval_f2,Resp_diff,EtCO2_interval_f1,EtCO2_interval_f2,EtCO2_diff,AST_interval_f1,AST_interval_f2,AST_diff,Alkalinephos_interval_f1,Alkalinephos_interval_f2,Alkalinephos_diff,BUN_interval_f1,BUN_interval_f2,BUN_diff,BaseExcess_interval_f1,BaseExcess_interval_f2,BaseExcess_diff,Bilirubin_total_interval_f1,Bilirubin_total_interval_f2,Bilirubin_total_diff,Calcium_interval_f1,Calcium_interval_f2,Calcium_diff,Chloride_interval_f1,Chloride_interval_f2,Chloride_diff,Creatinine_interval_f1,Creatinine_interval_f2,Creatinine_diff,FiO2_interval_f1,FiO2_interval_f2,FiO2_diff,Glucose_interval_f1,Glucose_interval_f2,Glucose_diff,HCO3_interval_f1,HCO3_interval_f2,HCO3_diff,Hct_interval_f1,Hct_interval_f2,Hct_diff,Hgb_interval_f1,Hgb_interval_f2,Hgb_diff,Lactate_interval_f1,Lactate_interval_f2,Lactate_diff,Magnesium_interval_f1,Magnesium_interval_f2,Magnesium_diff,PTT_interval_f1,PTT_interval_f2,PTT_diff,PaCO2_interval_f1,PaCO2_interval_f2,PaCO2_diff,PaO2_interval_f1,PaO2_interval_f2,PaO2_diff,Phosphate_interval_f1,Phosphate_interval_f2,Phosphate_diff,Platelets_interval_f1,Platelets_interval_f2,Platelets_diff,Potassium_interval_f1,Potassium_interval_f2,Potassium_diff,SaO2_interval_f1,SaO2_interval_f2,SaO2_diff,Sodium_interval_f1,Sodium_interval_f2,Sodium_diff,WBC_interval_f1,WBC_interval_f2,WBC_diff,pH_interval_f1,pH_interval_f2,pH_diff,HR_mean,O2Sat_mean,SBP_mean,MAP_mean,Resp_mean,HR_median,O2Sat_median,SBP_median,MAP_median,Resp_median,HR_min,O2Sat_min,SBP_min,MAP_min,Resp_min,HR_max,O2Sat_max,SBP_max,MAP_max,Resp_max,HR_std,O2Sat_std,SBP_std,MAP_std,Resp_std,HR_dstd,O2Sat_dstd,SBP_dstd,MAP_dstd,Resp_dstd,HR_score,Temp_score,Resp_score,MAP_score,Creatinine_score,qsofa,Platelets_score,Bilirubin_score,SIRS
0,2022-08-13 12:27:00,10.01,10.01,10.01,10.01,10.01,10.01,10.01,,,,18.21,,,18.21,18.21,18.21,,18.21,,18.21,18.21,,18.21,,,,18.21,18.21,18.21,,18.21,18.21,,30.96,10.01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2022-08-13 11:20:00,11.127,11.127,11.127,11.127,11.127,11.127,11.127,,,,18.377,,,18.377,18.377,18.377,,18.377,,18.377,18.377,,18.377,,,,18.377,18.377,18.377,,18.377,18.377,,,11.127,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2022-08-13 12:07:00,10.343,10.343,10.343,10.343,10.343,10.343,10.343,10.343,,,,,,,,16.093,,,,,,,16.093,,,,,,16.093,,,,,,10.343,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2022-08-13 08:30:00,13.96,13.96,13.96,13.96,13.96,13.96,13.96,,,,,,,,,,,,,,,,,,,,,,,,,,,,13.96,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2022-08-13 14:15:00,8.21,8.21,8.21,8.21,8.21,13.66,8.21,,,,13.71,,,13.71,13.71,13.71,,13.71,,13.71,13.71,,13.71,,,,,13.71,13.71,,13.71,13.71,,,8.21,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### 1.3 & 1.4 & 1.5 
- Resample sparse matrix to hourly longitudinal data
- Aggregate data into 3 hour bins with 6 hour sliding window (median)
- Static merge

In [117]:
test_data = test_data[["csn", "pat_id", "recorded_time", "hospital_admission_date_time"] + total_feature_list[:36]].copy()

In [118]:
test_data = run(test_data)
keep_csns = list(test_data.csn.unique())

In [126]:
test_data.loc[:, ~test_data.columns.isin(hide)].head(5)

Unnamed: 0,los,rel_time,HR,O2Sat,Temp,SBP,DBP,MAP,Resp,EtCO2,AST,Alkalinephos,BUN,BaseExcess,Bilirubin_total,Calcium,Chloride,Creatinine,FiO2,Glucose,HCO3,Hct,Hgb,Lactate,Magnesium,PTT,PaCO2,PaO2,Phosphate,Platelets,Potassium,SaO2,Sodium,WBC,pH,gcs_total_score,is_female,HR_interval_f1,HR_interval_f2,HR_diff,O2Sat_interval_f1,O2Sat_interval_f2,O2Sat_diff,Temp_interval_f1,Temp_interval_f2,Temp_diff,SBP_interval_f1,SBP_interval_f2,SBP_diff,MAP_interval_f1,MAP_interval_f2,MAP_diff,DBP_interval_f1,DBP_interval_f2,DBP_diff,Resp_interval_f1,Resp_interval_f2,Resp_diff,EtCO2_interval_f1,EtCO2_interval_f2,EtCO2_diff,AST_interval_f1,AST_interval_f2,AST_diff,Alkalinephos_interval_f1,Alkalinephos_interval_f2,Alkalinephos_diff,BUN_interval_f1,BUN_interval_f2,BUN_diff,BaseExcess_interval_f1,BaseExcess_interval_f2,BaseExcess_diff,Bilirubin_total_interval_f1,Bilirubin_total_interval_f2,Bilirubin_total_diff,Calcium_interval_f1,Calcium_interval_f2,Calcium_diff,Chloride_interval_f1,Chloride_interval_f2,Chloride_diff,Creatinine_interval_f1,Creatinine_interval_f2,Creatinine_diff,FiO2_interval_f1,FiO2_interval_f2,FiO2_diff,Glucose_interval_f1,Glucose_interval_f2,Glucose_diff,HCO3_interval_f1,HCO3_interval_f2,HCO3_diff,Hct_interval_f1,Hct_interval_f2,Hct_diff,Hgb_interval_f1,Hgb_interval_f2,Hgb_diff,Lactate_interval_f1,Lactate_interval_f2,Lactate_diff,Magnesium_interval_f1,Magnesium_interval_f2,Magnesium_diff,PTT_interval_f1,PTT_interval_f2,PTT_diff,PaCO2_interval_f1,PaCO2_interval_f2,PaCO2_diff,PaO2_interval_f1,PaO2_interval_f2,PaO2_diff,Phosphate_interval_f1,Phosphate_interval_f2,Phosphate_diff,Platelets_interval_f1,Platelets_interval_f2,Platelets_diff,Potassium_interval_f1,Potassium_interval_f2,Potassium_diff,SaO2_interval_f1,SaO2_interval_f2,SaO2_diff,Sodium_interval_f1,Sodium_interval_f2,Sodium_diff,WBC_interval_f1,WBC_interval_f2,WBC_diff,pH_interval_f1,pH_interval_f2,pH_diff,HR_mean,O2Sat_mean,SBP_mean,MAP_mean,Resp_mean,HR_median,O2Sat_median,SBP_median,MAP_median,Resp_median,HR_min,O2Sat_min,SBP_min,MAP_min,Resp_min,HR_max,O2Sat_max,SBP_max,MAP_max,Resp_max,HR_std,O2Sat_std,SBP_std,MAP_std,Resp_std,HR_dstd,O2Sat_dstd,SBP_dstd,MAP_dstd,Resp_dstd,HR_score,Temp_score,Resp_score,MAP_score,Creatinine_score,qsofa,Platelets_score,Bilirubin_score,SIRS
68,0,6.0,91.5,98.0,36.7,134.0,83.0,98.0,15.0,2.0,12.0,82.0,17.0,,0.5,9.8,106.0,0.74,,98.0,22.3,39.1,12.8,,2.1,29.7,29.0,95.0,3.6,218.0,4.2,,141.0,5.6,7.51,15.0,1,0.0,-1.0,-6.0,0.0,-1.0,0.0,0.0,-1.0,-0.3,0.0,-1.0,9.5,0.0,-1.0,8.5,0.0,-1.0,9.0,0.0,-1.0,0.0,0.0,-1.0,0.0,1.0,0.0,-5.0,1.0,0.0,1.0,1.0,0.0,0.5,0.0,-1,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.5,1.0,0.0,-0.03,0.0,-1,,1.0,0.0,-2.5,0.0,-1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,-1,,1.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,1.0,0.0,0.0,1.0,0.0,-0.3,0.0,-1,,1.0,0.0,0.5,1.0,0.0,0.0,0.0,-1.0,0.0,91.5,98.0,134.0,98.0,15.0,91.5,98.0,134.0,98.0,15.0,91.5,98.0,134.0,98.0,15.0,91.5,98.0,134.0,98.0,15.0,,,,,,,,,,,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,False
67,6,11.0,91.5,98.0,36.7,134.0,83.0,98.0,15.0,2.0,34.0,57.0,30.0,,0.7,7.8,106.0,1.96,,114.0,22.3,30.3,9.7,,1.7,29.7,29.0,95.0,3.6,151.0,4.5,,140.0,14.6,7.51,15.0,0,1.0,0.0,-6.0,1.0,0.0,0.0,0.0,-1.0,-0.3,1.0,0.0,9.5,1.0,0.0,8.5,1.0,0.0,9.0,1.0,0.0,0.0,0.0,-1.0,0.0,3.0,0.0,-5.0,3.0,0.0,1.0,3.0,0.0,0.5,0.0,-1,,3.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.5,3.0,0.0,-0.03,0.0,-1,,3.0,0.0,-2.5,0.0,-1.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,-1,,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,2.0,0.0,0.0,3.0,0.0,-0.3,0.0,-1,,3.0,0.0,0.5,2.0,0.0,0.0,0.0,-1.0,0.0,111.8333,97.3333,124.6667,92.6667,17.0,122.0,97.0,120.0,90.0,18.0,91.5,97.0,120.0,90.0,15.0,122.0,98.0,134.0,98.0,18.0,17.6092,0.5774,8.0829,4.6188,1.7321,,,,,,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,True
64,12,18.0,122.0,97.0,36.7,120.0,74.0,90.0,18.0,2.0,72.0,798.0,20.0,,2.7,7.4,107.0,0.63,,113.0,22.3,19.7,6.0,,1.7,29.7,29.0,95.0,3.6,159.0,3.8,,139.0,7.0,7.51,15.0,0,1.0,0.0,-6.0,1.0,0.0,0.0,1.0,0.0,-0.3,1.0,0.0,9.5,1.0,0.0,8.5,1.0,0.0,9.0,1.0,0.0,0.0,0.0,-1.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,-1,,4.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,-1,,4.0,0.0,0.0,0.0,-1.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,-1,,2.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,-1,,4.0,0.0,0.0,4.0,0.0,0.0,0.0,-1.0,0.0,79.6,98.6,113.2,80.8,18.0,69.0,99.0,111.5,78.5,18.0,69.0,97.0,111.5,78.5,18.0,122.0,99.0,120.0,90.0,18.0,23.7023,0.8944,3.8013,5.143,0.0,,,,,,2.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,False
59,12,18.0,69.0,99.0,37.1,111.5,68.5,78.5,18.0,2.0,11.0,155.0,40.0,,0.5,8.5,106.0,1.32,,346.0,22.3,23.3,7.8,,1.6,29.7,29.0,95.0,3.6,216.0,4.0,,133.0,7.5,7.51,15.0,0,4.0,0.0,-6.0,2.0,1.0,0.0,4.0,0.0,-0.3,4.0,0.0,9.5,4.0,0.0,8.5,4.0,0.0,9.0,0.0,-1.0,0.0,0.0,-1.0,0.0,2.0,3.0,0.0,2.0,3.0,0.0,4.0,0.0,0.0,0.0,-1,,2.0,3.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,-1,,4.0,0.0,0.0,0.0,-1.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,-1,,2.0,0.0,0.0,2.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,-1,,4.0,0.0,0.0,4.0,0.0,0.0,0.0,-1.0,0.0,79.6,99.0,111.5,82.4,18.0,80.0,99.0,103.0,80.5,18.0,69.0,99.0,101.0,70.0,18.0,89.0,99.0,140.0,94.0,18.0,7.9246,0.0,16.4697,9.3635,0.0,,,,,,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,False
25,15,23.0,81.5,98.0,36.6,124.0,76.0,89.5,16.0,2.0,,,22.0,,,8.9,106.0,0.78,,88.0,,40.7,14.1,,1.9,,,,3.6,224.0,4.3,,140.0,7.9,,15.0,0,6.0,0.0,0.0,5.0,0.0,0.0,4.0,0.0,0.0,6.0,0.0,0.0,6.0,0.0,0.0,6.0,0.0,0.0,6.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,,0.0,-1.0,,0.0,-1.0,0.0,0.0,-1,,0.0,-1.0,,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1,,0.0,-1.0,0.0,0.0,-1.0,,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1,,0.0,-1.0,0.0,0.0,-1.0,,0.0,-1.0,,0.0,-1.0,,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1,,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,,85.5,98.0,132.75,98.0,15.75,85.0,98.0,129.75,97.5,16.0,81.5,98.0,124.0,89.5,14.0,93.0,98.0,145.0,112.0,17.0,4.3704,0.0,9.4644,8.5965,1.084,6.8099,0.4082,9.0425,5.0662,0.9832,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,False


In [120]:
# vm = True if running in VM
test_data = ml.preprocess(test_data, vm = False)
test_data = test_data.sort_values("rel_time", ascending = True).drop_duplicates(subset = ["csn", "pat_id"], keep = "last")
#test_data = test_data[total_feature_list]

  0%|          | 0/12 [00:00<?, ?it/s]

Extracting informative features


100%|██████████| 12/12 [00:06<00:00,  1.82it/s]


Completed Extracting informative features
Extracting Rolling features
Completed Extracting Rolling features
Extracting Score Features
Completed Extracting Score Features
Preprocessing completed with total of 178 features


In [122]:
len(test_data.columns)

178

40

## 2. TEST

In [125]:
test_set = list(test_data.csn.unique())
model_path = args.model_dir
drop_features = ['pat_id', 'csn', 'los', 'rel_time']

current_time_formatted = str(current_time.tz_localize("US/Eastern"))
current_time_formatted = current_time_formatted.replace(' ', 'T')

result = ml.predict(test_set, test_data, model_path, 0.48, vm = False, drop_features = drop_features, hist_data = hist_data, current_time = current_time_formatted)

NameError: name 'args' is not defined

## 3. Historical Test Output

In [None]:
url_output = 'https://prd-rta-app01.eushc.org:8443/ords/rta/sepsisml/outputcache'
historical_output = connect(url_output, username, password)
historical_output = historical_output.dropna(subset = ["run_date", "predictedprobability"])
 
if len(historical_output) >0 :
    historical_output = historical_output.sort_values(by = "run_date", ascending = True)
    historical_output = historical_output[["csn", "pat_id", "predictedprobability", "run_date"]]
        
    historical_output = historical_output.dropna().drop_duplicates(subset = ["csn", "pat_id"], keep = "last")
    historical_output = historical_output.rename(columns = {"predictedprobability": "PastProbability", "run_date": "PastRunDate"})

    historical_output["PastRunDate"] = pd.to_datetime(historical_output["PastRunDate"])
    historical_output["PastRunDate"] = historical_output["PastRunDate"].dt.tz_convert('US/Eastern')
    historical_output["PastRunDate"] = historical_output["PastRunDate"].dt.tz_localize(None)
    historical_output["pat_id"] = pd.to_numeric(historical_output["pat_id"], errors='coerce')
    historical_output["csn"] = pd.to_numeric(historical_output["csn"], errors='coerce')
    historical_output["PastProbability"] = pd.to_numeric(historical_output["PastProbability"], errors='coerce')
    historical_output = historical_output[historical_output.csn.isin(keep_csns)]
else:
    historical_output = pd.DataFrame(columns = {"csn", "pat_id", "PastProbability", "PastRunDate"})
    historical_output["csn"] = pd.Series(keep_csns)


    #historical_output.to_csv("hist_data.csv")
print("hist_data_saved")

## 4. Output to Output Table

In [None]:
result = result.merge(historical_output, on = ["csn", "pat_id"], how = "left")
#result.to_csv("result_debug.csv", index = False)
result["PastProbability"] = pd.to_numeric(result["PastProbability"]) 
result["prob_diff"] = round((result["PredictedProbability"] - result["PastProbability"])*100, 2)
    
result["PreviousAlert"] = round(result["PastProbability"] * 100, 2).astype(str) + '% ' + (result["prob_diff"]).map('({0:+}%)'.format)
    
    
result["run_date_relative"] = result["rel_time"]
    
result["run_date"] = current_time
result["past_curr_diff"] = (result["run_date"] - pd.to_datetime(result["PastRunDate"])).dt.total_seconds()/3600
    
result["PastRunDate"] = result["PastRunDate"].astype(str)
result["PastRunDate"] = result["PastRunDate"].str.replace(' ', 'T')
result["run_date"] = current_time_formatted
                                       
result["PriorAlertTime"] = result["PastRunDate"] + " " + (result["past_curr_diff"]).map('({:,.2f} hrs ago)'.format)
result = result.drop(["past_curr_diff", "prob_diff", "PastRunDate", "PastProbability"], axis = 1)
result = result.rename(columns = {"PreviousAlert": "previousalert", "PriorAlertTime": "prioralerttime"})
result = result.drop(["ranked_shap"], axis = 1)
  
output = result.to_dict(orient='records')
    
url_output = 'https://prd-rta-app01.eushc.org:8443/ords/rta/sepsisml/outputcache'
    
for i_output in output:
    upload(url_output, username, password, json.dumps(i_output))
    


In [None]:
logname = "sepsis_ml_execution.log"
logging.basicConfig(filename=logname,
                    filemode='a',
                    format='%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.DEBUG)

logging.info("Result Outputed")


print("result outputed")