In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import keras_tuner as kt
import tensorflow_addons as tfa

## Data Preprocessing

In [2]:
url = 'https://projectheartdisease.s3.amazonaws.com/heart.csv'
df = pd.read_csv(url)

# Cleanup
# Age
bins = [0, 40, 50, 60, 70, 80]
# Create the names for the five bins
group_names = ["<40", "40-50", "50-60", "60-70", "70-80"]  
df["Age Range"] = pd.cut(df["age"], bins, labels=group_names, include_lowest=True)

#Trestbps   
bins = [0, 125, 150, 175, 200]
# Create the names for the five bins
group_names = ["<125", "125-150", "150-175", "175-200"]  
df["Trestbps Range"] = pd.cut(df["trestbps"], bins, labels=group_names, include_lowest=True)

#Chol  
bins = [0, 200, 300, 400, 500, 600]
# Create the names for the five bins
group_names = ["<200", "200-300", "300-400", "400-500", "500-600"]  
df["Chol Range"] = pd.cut(df["chol"], bins, labels=group_names, include_lowest=True)

#Thalach 
bins = [0, 100, 125, 150, 175, 300]
# Create the names for the five bins
group_names = ["<100", "100-125", "125-150", "150-175", "175-300"]  
df["Thalach Range"] = pd.cut(df["thalach"], bins, labels=group_names, include_lowest=True)

# Removing unnecessary columns age, trestbps, chol, and thalach
df.drop(['age', 'trestbps', 'chol', 'thalach'], axis='columns', inplace=True)
df_dummies = pd.get_dummies(df)
df_dummies.head()

# Splitting and Scaling
# Splitting into target (y) and features (X)
y = df_dummies['target'].values
X = df_dummies.drop(['target'], axis='columns')

# Splitting X and y into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Scaling X sets with StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)

# Transforming X_train and X_test
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Optimization

In [3]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Choice of activation functions
    activation = hp.Choice('activation', ['relu', 'tanh'])

    # Kerastuner decides number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1, max_value=10, step=2), activation=activation, input_shape=(X_train_scaled.shape[1], )))
    
    # Kerastuner decides number of additional hidden layers and neurons included
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i), 
            min_value=1, max_value=20, step=2), activation=activation))
    
    # Output layer
    nn_model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    # Compile
    nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return nn_model

In [4]:
# Setting up tuner
tuner = kt.Hyperband(create_model, objective='val_accuracy', max_epochs=20, hyperband_iterations=2)

INFO:tensorflow:Reloading Oracle from existing project .\untitled_project\oracle.json
INFO:tensorflow:Reloading Tuner from .\untitled_project\tuner0.json


In [5]:
# Running tuner to find combination with highest accuracy
tuner.search(X_train_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Trial 60 Complete [00h 00m 04s]
val_accuracy: 0.8793774247169495

Best val_accuracy So Far: 0.9377431869506836
Total elapsed time: 00h 01m 20s
INFO:tensorflow:Oracle triggered exit


In [6]:
# Getting best hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 7,
 'num_layers': 6,
 'units_0': 9,
 'units_1': 17,
 'units_2': 13,
 'units_3': 3,
 'units_4': 11,
 'units_5': 3,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [7]:
# Making a model with the best hyperparameters
nn = tuner.hypermodel.build(best_hyper)
nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1da44d1ae50>

In [8]:
# Evaluating best model with test data

# Calculating R2
y_true = y_test.reshape(-1, 1)
y_pred = nn.predict(X_test_scaled)
metric = tfa.metrics.r_square.RSquare()
metric.update_state(y_true, y_pred)
result = metric.result()
# Calculating loss and accuracy
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=0)
print(f'Loss: {model_loss}, Accuracy: {model_accuracy}, R2: {result.numpy()}')

Loss: 0.2458946257829666, Accuracy: 0.9338521361351013, R2: 0.7510954141616821


In [9]:
# Saving the best model to HDF5 file
nn.save('Model/OptimizedModel.h5')