# Deep Sepsis Neural Network

In [3]:
from easy_sql.io import Session as session
from easy_sql.utils import read_sql_text
from sklearn.preprocessing import Imputer, StandardScaler
import numpy as np
from keras import Model
from keras.layers import Dense, Dropout, Input, GaussianNoise
from sklearn.metrics import roc_auc_score, accuracy_score
from keras.callbacks import EarlyStopping
import pandas as pd
import pickle
np.random.seed(7)
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
dsn = 'muscedw'
sql_file_path = 'data/sepsis_NN.sql'
model_file_path = 'models/sepsis_nn.bundle'

# get data
s = session(dsn)
sql = read_sql_text(sql_file_path)
print('getting data from', dsn, sql)
data = s.get_data(sql)

time_col = 'AVAILABLE_TIME'
id_col = 'PAT_ENC_CSN_ID'
y_cols = ['MetSIRS4_8', 'MetSIRS4_24', 'MetSIRS4_48', 'MetSIRS4_4hr_8', 'MetSIRS4_4hr_24', 'MetSIRS4_4hr_48', 'MS870',
          'MS871', 'MS872', 'Sepsis_DRG']
non_x_cols = [time_col] + [id_col] + y_cols
x_cols = [col for col in data.columns if col not in non_x_cols]

# initialize imputer and scaler
imp = Imputer(strategy='median')
scaler = StandardScaler()

def input_fun(data, fit=False):
    # replace missing values with imputed ones
    temp_data = data[x_cols].replace(-99999, np.nan)
    if fit:
        print('fitting imputer and scaler')
        temp_data_imp = imp.fit_transform(temp_data)
        features = scaler.fit_transform(temp_data_imp)
    else:
        temp_data_imp = imp.transform(temp_data)
        features = scaler.transform(temp_data_imp)
    return features


# extract features
x = input_fun(data, fit=True)
y = np.array(data[y_cols])

# split the trainig and test data
split = .75
cut = int(split * x.shape[0])
x_train = x[:cut, :]
y_train = y[:cut, :]
x_test = x[cut:, :]
y_test = y[cut:, :]

n_input_dims = x_train.shape[1]
n_outputs = y_train.shape[1]


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


getting data from muscedw 
SELECT TOP 1000000
       iplv. [PAT_ENC_CSN_ID]
	   ,iplv.[AVAILABLE_TIME]
      ,[AGE]
      ,[AdmissionsSixMonths]
      ,[MaxPreviousLOS]
      ,[ICUSixMonths]
      ,[PCPSixMonths]
      ,[SurgerySixMonths]
      ,[VenilatorSixMonths]
      ,[GLUCOSE, WHOLE BLOOD]
      ,[HEMOLYSIS INDEX]
      ,[SODIUM]
      ,[POTASSIUM]
      ,[GLUCOSE]
      ,[CREATININE]
      ,[CHLORIDE]
      ,[CALCIUM]
      ,[CO2 CONTENT (BICARBONATE)]
      ,[UREA NITROGEN, BLOOD (BUN)]
      ,[ANION GAP]
      ,[HEMATOCRIT]
      ,[HEMOGLOBIN]
      ,[PLATELET COUNT]
      ,[RED BLOOD CELL COUNT]
      ,[MEAN CORPUSCULAR HEMOGLOBIN]
      ,[MEAN CORPUSCULAR HEMOGLOBIN CONC]
      ,[MEAN CORPUSCULAR VOLUME]
      ,[WHITE BLOOD CELL COUNT]
      ,[RED CELL DISTRIBUTION WIDTH]
      ,[MEAN PLATELET VOLUME]
      ,[ICTERIC INDEX]
      ,[MAGNESIUM]
      ,[NUCLEATED RED BLOOD CELLS]
      ,[PHOSPHORUS (PO4)]
      ,[EGFR]
      ,[BILIRUBIN, TOTAL]
      ,[TOTAL PROTEIN]
      ,[AL

ProgrammingError: (pyodbc.ProgrammingError) ('42S02', "[42S02] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid object name 'StatisticalModels.dbo.InpatientPipelineLabsVitals'. (208) (SQLExecDirectW)") [SQL: "\nSELECT TOP 1000000\n       iplv. [PAT_ENC_CSN_ID]\n\t   ,iplv.[AVAILABLE_TIME]\n      ,[AGE]\n      ,[AdmissionsSixMonths]\n      ,[MaxPreviousLOS]\n      ,[ICUSixMonths]\n      ,[PCPSixMonths]\n      ,[SurgerySixMonths]\n      ,[VenilatorSixMonths]\n      ,[GLUCOSE, WHOLE BLOOD]\n      ,[HEMOLYSIS INDEX]\n      ,[SODIUM]\n      ,[POTASSIUM]\n      ,[GLUCOSE]\n      ,[CREATININE]\n      ,[CHLORIDE]\n      ,[CALCIUM]\n      ,[CO2 CONTENT (BICARBONATE)]\n      ,[UREA NITROGEN, BLOOD (BUN)]\n      ,[ANION GAP]\n      ,[HEMATOCRIT]\n      ,[HEMOGLOBIN]\n      ,[PLATELET COUNT]\n      ,[RED BLOOD CELL COUNT]\n      ,[MEAN CORPUSCULAR HEMOGLOBIN]\n      ,[MEAN CORPUSCULAR HEMOGLOBIN CONC]\n      ,[MEAN CORPUSCULAR VOLUME]\n      ,[WHITE BLOOD CELL COUNT]\n      ,[RED CELL DISTRIBUTION WIDTH]\n      ,[MEAN PLATELET VOLUME]\n      ,[ICTERIC INDEX]\n      ,[MAGNESIUM]\n      ,[NUCLEATED RED BLOOD CELLS]\n      ,[PHOSPHORUS (PO4)]\n      ,[EGFR]\n      ,[BILIRUBIN, TOTAL]\n      ,[TOTAL PROTEIN]\n      ,[ALBUMIN]\n      ,[ASPARTATE AMINOTRANSFERASE (AST)(SGOT)]\n      ,[ALKALINE PHOSPHATASE]\n      ,[ALANINE AMINOTRANSFERASE (ALT)(SGPT)]\n      ,[FIO2, ARTERIAL]\n      ,[PO2 (CORR), ARTERIAL]\n      ,[pH (CORR), ARTERIAL]\n      ,[BICARB, ARTERIAL]\n      ,[PCO2 (CORR), ARTERIAL]\n      ,[BASE, ARTERIAL]\n      ,[O2 SAT, ARTERIAL]\n      ,[TOTAL CO2, ARTERIAL]\n      ,[PT TEMP (CORR), ARTERIAL]\n      ,[PROTHROMBIN TIME]\n      ,[INR]\n      ,[NEUTROPHILS ABSOLUTE COUNT]\n      ,[MONOCYTES RELATIVE PERCENT]\n      ,[LYMPHOCYTES ABSOLUTE COUNT]\n      ,[NEUTROPHILS RELATIVE PERCENT]\n      ,[LYMPHOCYTE RELATIVE PERCENT]\n      ,[MONOCYTES ABSOLUTE COUNT]\n      ,[EOSINOPHILS, ABSOLUTE COUNT]\n      ,CAST([PULSE] as Float) as 'PULSE'\n      ,CAST([PULSE OXIMETRY] as Float) as 'PULSE OXIMETRY'\n      ,CAST([RESPIRATIONS] as Float) as 'RESPIRATIONS'\n      ,CAST([TEMPERATURE] as Float) 'TEMPERATURE'\n      ,CAST([CPM S16 R AS PAIN RATING (0-10): REST] as Float) as 'PAIN RATING'\n      ,CAST([R MAINTENANCE IV VOLUME] as Float) as 'IV VOL'\n\t  ,CAST([ORAL INTAKE] as Float) as 'ORAL INTAKE'\n      ,[BLOOD PRESSURE (SYSTOLIC)]\n      ,[BLOOD PRESSURE (DIASTOLIC)]\n      ,[MUSC R SC PHLEBITIS IV DEVICE (TRANSFORMED)]\n      ,[MUSC R AS SC INFILTRATION IV DEVICE (TRANSFORMED)]\n      ,[R ARTERIAL LINE BLOOD PRESSURE (SYSTOLIC)]\n      ,[R ARTERIAL LINE BLOOD PRESSURE (DIASTOLIC)]\n      ,[CPM S16 R AS SC RASS (RICHMOND AGITATION-SEDATION SCALE) (TRANSFORMED)]\n      ,[MUSC IP R AVPU (TRANSFORMED)]\n      ,[MetTemp]\n      ,[MetHR]\n      ,[MetRR]\n      ,[MetWBC]\n      ,[MaxTemp8]\n      ,[MaxTemp24]\n      ,[MaxTemp48]\n      ,[MaxHR8]\n      ,[MaxHR24]\n      ,[MaxHR48]\n      ,[MaxRR8]\n      ,[MaxRR24]\n      ,[MaxRR48]\n      ,[MaxWBC8]\n      ,[MaxWBC24]\n      ,[MaxWBC48]\n      ,[MinTemp8]\n      ,[MinTemp24]\n      ,[MinTemp48]\n      ,[MinHR8]\n      ,[MinHR24]\n      ,[MinHR48]\n      ,[MinRR8]\n      ,[MinRR24]\n      ,[MinRR48]\n      ,[MinWBC8]\n      ,[MinWBC24]\n      ,[MinWBC48]\n      ,[MetSIRS4_8]\n      ,[MetSIRS4_24]\n      ,[MetSIRS4_48]\n      ,[MetSIRS4_4hr_8]\n      ,[MetSIRS4_4hr_24]\n      ,[MetSIRS4_4hr_48]\n\t   ,CASE WHEN drg.DRGNumber =  'MS870' THEN 1 Else 0 END AS MS870\n\t   ,CASE WHEN drg.DRGNumber =  'MS871' THEN 1 Else 0 END AS MS871\n\t   ,CASE WHEN drg.DRGNumber =  'MS872' THEN 1 Else 0 END AS MS872\n\t   ,CASE WHEN drg.DRGNumber  in ( 'MS870', 'MS871', 'MS872') THEN 1 Else 0 END AS Sepsis_DRG\n\n\nFROM [StatisticalModels].[dbo].[InpatientPipelineLabsVitals] as iplv\n\tleft join\n\t\tUMAOLAP.fact.InpatientAdmission as ipa on\n\t\tiplv.PAT_ENC_CSN_ID = ipa.PatEncCSNID\n\tleft join\n\t\tUMAOLAP.dim.DRG as drg on\n\t\tipa.MSDRGKey = drg.DRGKey\n\nwhere  drg.DRGNumber != '-1' and ipa.MSDRGWeight is not Null and AGE > 17\n"] (Background on this error at: http://sqlalche.me/e/f405)

In [None]:
data[x_cols].head()

In [None]:
print(x_cols) # input columns

In [None]:
print(y_cols) # output columns

fit a deep feed foreward neural net, using dropout and noise layers to reduce over fitting

In [None]:
esm = EarlyStopping(patience=2)
# trains a multiple output model
inputs = Input(shape=(n_input_dims, ))
noise = GaussianNoise(1.5)(inputs)
dense1 = Dense(100, activation='relu')(noise)
dropout1 = Dropout(.5)(dense1)
dense2 = Dense(100, activation='relu')(dropout1)
dropout2 = Dropout(.5)(dense2)
outputs = Dense(n_outputs, activation='sigmoid')(dropout2)
model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=50, batch_size=1000, validation_data=(x_test, y_test), verbose=2, callbacks=[esm],
          shuffle=True)


Check the model performance

In [None]:
model.summary()
# generate predictions
preds_train = model.predict(x_train)
preds_test = model.predict(x_test)

# prints auc off all the output nodes
perf = []
for i in range(n_outputs):
    test_auc = roc_auc_score(y_test[:, i], preds_test[:, i])
    train_auc = roc_auc_score(y_train[:, i], preds_train[:, i])
    perf.append([y_cols[i] + ' train auc: ' + str(train_auc) + ' test auc: ' + str(test_auc)])

print(perf)

In [None]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
print('complete')

#### Feature Contributions
SIRS 4 within 48 Hourse


In [None]:
i = 0
print(y_cols[i])
n = NeuralNetDescriber(x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, model=model, feature_names=x_cols, scaler=scaler, output_names=y_cols)
n.preds_test = preds_test
n.preds_train = preds_train
n.n_dims = x_train.shape[1]
n.n_obs = x_train.shape[0]
n.plot_decision_boundary(x_name = 'BLOOD PRESSURE (SYSTOLIC)',y_name = 'BLOOD PRESSURE (DIASTOLIC)', cut_off=.15, index=i)
n.plot_decision_boundary(x_name = 'RESPIRATIONS', y_name = 'PULSE', cut_off=.15, index=i)

In [None]:
i = 6
for i ,j in enumerate(x_cols):
    print('variable: ' + j)
    try:
        n.plot_variable_sensitivity(j, index=i)
    except:
        pass

In [None]:
from rnner.plot_methods import plot_auc_curve

for i, j in enumerate(y_cols):
    print('model ' + j)
    plot_auc_curve(y_train[:,i], preds_train[:,i], y_test[:,i], preds_test[:,i])


In [None]:
from rnner.plot_methods import plot_density

for i, j in enumerate(y_cols):
    print('model ' + j)
    plot_density(preds_train[:,i], y_train[:,i])

In [None]:
n.plot_decision_boundary(x_name = 'RESPIRATIONS', y_name = 'TEMPERATURE', cut_off=.15, index=5)
n.plot_decision_boundary(x_name = 'RESPIRATIONS', y_name = 'TEMPERATURE', cut_off=.15, index=5)
n.plot_decision_boundary(x_name = 'RESPIRATIONS', y_name = 'TEMPERATURE', cut_off=.15, index=5)