In [1]:
# Import our dependencies
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf

# Load the patient data
data = Path('Resources/patient_data.csv')
df = pd.read_csv(data)

# Preview the data
df.head()

Unnamed: 0,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,change,diabetesMed,...,glyburide-metformin:Up,A1Cresult:>7,A1Cresult:>8,A1Cresult:None,A1Cresult:Norm,max_glu_serum:>200,max_glu_serum:>300,max_glu_serum:None,max_glu_serum:Norm,readmitted
0,4,66,3,18,0,0,1,9,1,1,...,0,0,0,1,0,0,0,1,0,1
1,2,48,0,15,4,0,0,7,0,0,...,0,0,0,1,0,0,0,1,0,0
2,4,21,3,23,1,0,2,7,0,1,...,0,0,0,1,0,0,0,1,0,0
3,5,38,0,5,0,0,0,2,1,1,...,0,0,1,0,0,0,0,1,0,0
4,1,6,0,6,0,0,0,6,0,1,...,0,0,0,1,0,0,0,1,0,1


In [2]:
# determine % of patients readmitted
print(f"{round(len(df[df['readmitted'] == 1])/len(df), 2) * 100}% of patients are readmitted")

46.0% of patients are readmitted


In [3]:
# Calculate correlation matrix and get correlation with target column
correlation_threshold = 0.05
correlations = df.corr()['readmitted'].abs().sort_values(ascending=False)

# Filter columns with low correlation
high_correlation_columns = correlations[correlations > correlation_threshold].index.tolist()
if 'readmitted' in high_correlation_columns:
    high_correlation_columns.remove('readmitted')

# Remove target if present
if 'readmitted' in high_correlation_columns:
    high_correlation_columns.remove('readmitted')
print(f"Selected {len(high_correlation_columns)} features with correlation > {correlation_threshold}:")
print(high_correlation_columns)

Selected 9 features with correlation > 0.05:
['number_inpatient', 'number_diagnoses', 'number_emergency', 'number_outpatient', 'admission_source_id:Transfer', 'admission_source_id:Emergency', 'diabetesMed', 'time_in_hospital', 'admission_type_id:Elective']


In [4]:
# Remove the low correlation columns
df_modified = df[high_correlation_columns]

df_modified.head()

Unnamed: 0,number_inpatient,number_diagnoses,number_emergency,number_outpatient,admission_source_id:Transfer,admission_source_id:Emergency,diabetesMed,time_in_hospital,admission_type_id:Elective
0,1,9,0,0,0,1,1,4,0
1,0,7,0,4,0,1,0,2,0
2,2,7,0,1,0,0,1,4,1
3,0,2,0,0,0,0,1,5,0
4,0,6,0,0,0,1,1,1,0


In [5]:
# Split target column from dataset
y = df['readmitted']
X = df_modified

In [6]:
# Use sklearn to split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create scaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [7]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # match the number of input layers
    input_features = len(X_train.columns)

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Input(shape=[input_features]))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 20)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=20,
            step=2),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy", tf.keras.metrics.Precision()])

    return nn_model

In [8]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=50,
    hyperband_iterations=2)

Reloading Tuner from .\untitled_project\tuner0.json


In [9]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=50,validation_data=(X_test_scaled,y_test))

Trial 180 Complete [00h 02m 21s]
val_accuracy: 0.6209661364555359

Best val_accuracy So Far: 0.6236389875411987
Total elapsed time: 01h 43m 37s


In [10]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'num_layers': 1,
 'units_0': 17,
 'units_1': 3,
 'units_2': 7,
 'units_3': 11,
 'units_4': 5,
 'units_5': 19,
 'units_6': 7,
 'units_7': 3,
 'units_8': 15,
 'units_9': 17,
 'units_10': 11,
 'units_11': 19,
 'units_12': 19,
 'units_13': 7,
 'units_14': 19,
 'units_15': 9,
 'units_16': 1,
 'units_17': 7,
 'units_18': 11,
 'units_19': 3,
 'tuner/epochs': 17,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 1,
 'tuner/round': 0}

In [11]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(accuracy)

796/796 - 1s - 1ms/step - accuracy: 0.6236 - loss: 0.6506 - precision: 0.6227
[0.6506057977676392, 0.6236389875411987, 0.6226759552955627]


In [12]:
# Train the model
fit_model = best_model.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
[1m2386/2386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 672us/step - accuracy: 0.6239 - loss: 0.6455 - precision: 0.6231
Epoch 2/100
[1m2386/2386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 666us/step - accuracy: 0.6225 - loss: 0.6479 - precision: 0.6250
Epoch 3/100
[1m2386/2386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 669us/step - accuracy: 0.6256 - loss: 0.6453 - precision: 0.6260
Epoch 4/100
[1m2386/2386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 658us/step - accuracy: 0.6252 - loss: 0.6468 - precision: 0.6273
Epoch 5/100
[1m2386/2386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 688us/step - accuracy: 0.6246 - loss: 0.6466 - precision: 0.6276
Epoch 6/100
[1m2386/2386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 675us/step - accuracy: 0.6246 - loss: 0.6465 - precision: 0.6271
Epoch 7/100
[1m2386/2386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 670us/step - accuracy: 0.6208 - loss: 0.6480 - pre

In [14]:
# Evaluate the model using the test data
accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(accuracy)

796/796 - 0s - 601us/step - accuracy: 0.6213 - loss: 0.6510 - precision: 0.6181
[0.6509765982627869, 0.6213198900222778, 0.6180586814880371]


In [15]:
# Export our model to HDF5 file
best_model.save("NN_2.h5")

