## Import dependencies and read csv

In [58]:
import keras_tuner as kt
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
#from tensorflow import keras
#from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import Dense, Activation

In [28]:
dm_df = pd.read_csv(Path("Resources/diabetes_indicators.csv"), encoding="UTF-8")
dm_df.head()

Unnamed: 0,Diabetes_012,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,0.0,1.0,1.0,1.0,40.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,5.0,18.0,15.0,1.0,0.0,9.0,4.0,3.0
1,0.0,0.0,0.0,0.0,25.0,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,3.0,0.0,0.0,0.0,0.0,7.0,6.0,1.0
2,0.0,1.0,1.0,1.0,28.0,0.0,0.0,0.0,0.0,1.0,...,1.0,1.0,5.0,30.0,30.0,1.0,0.0,9.0,4.0,8.0
3,0.0,1.0,0.0,1.0,27.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,11.0,3.0,6.0
4,0.0,1.0,1.0,1.0,24.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,2.0,3.0,0.0,0.0,0.0,11.0,5.0,4.0


## Cleaning data

In [29]:
# CONVERT DTYPES TO INT FOR EASIER DATA MANIPULATION
dm_df = dm_df.astype(int)

In [33]:
# SIMPLIFYING TARGET COLUMN TO ONLY 0 (no diabetes) AND 1 (diabetes or at-risk)
dm_column = dm_df["Diabetes_012"]
dm_column_new = []
for num in dm_column:
    if num == 2:
        dm_column_new.append(1)
    else:
        dm_column_new.append(num)

In [35]:
# REPLACE TARGET COLUMN WITH NEW DATA
dm_df["Diabetes_012"] = dm_column_new

## Train-test-split and StandardScaler

In [40]:
# SPLITTING DATA INTO FEATURES (X) AND TARGET (y) ARRAYS
X = dm_df.drop(columns=["Diabetes_012"])
y = dm_df["Diabetes_012"]

# SPLIT INTO TESTING AND TRAINING DATA
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [41]:
# CREATING STANDARDSCALER INSTANCE
scaler = StandardScaler()

# FITTING DATA
X_scaler = scaler.fit(X_train)

# SCALING THE DATA
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## KerasTuner

In [59]:
# KERASTUNER - AUTOTUNER FUNCTION
def create_model(hp):
    nn_model = tf.keras.models.Sequential()
    activation = hp.Choice('activation',['relu','tanh'])
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=30,
        step=5), activation=activation, input_dim=21))

    for i in range(hp.Int('num_layers', 1, 5)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=30,
            step=5), activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

    return nn_model

In [60]:
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2
)

In [61]:
# KERASTUNER SEARCHING FOR BEST PARAMETERS
tuner.search(X_train_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Trial 60 Complete [00h 04m 01s]
val_accuracy: 0.8520498275756836

Best val_accuracy So Far: 0.8526490330696106
Total elapsed time: 01h 25m 18s


In [67]:
# TOP 2 PARAMETERS
top_hyper = tuner.get_best_hyperparameters(2)
for param in top_hyper:
    print(param.values)

{'activation': 'tanh', 'first_units': 26, 'num_layers': 4, 'units_0': 26, 'units_1': 1, 'units_2': 1, 'units_3': 26, 'units_4': 21, 'tuner/epochs': 20, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}
{'activation': 'relu', 'first_units': 21, 'num_layers': 1, 'units_0': 21, 'units_1': 1, 'units_2': 21, 'units_3': 6, 'units_4': 26, 'tuner/epochs': 20, 'tuner/initial_epoch': 7, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0042'}
