## Import dependencies and read csv

In [1]:
import keras_tuner as kt
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import tensorflow as tf
from sklearn.ensemble import RandomForestClassifier

import pydotplus
from IPython.display import Image

#from tensorflow import keras
#from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import Dense, Activation

Using TensorFlow backend


In [2]:
dm_df = pd.read_csv(Path("Resources/diabetes_indicators.csv"), encoding="UTF-8")
dm_df.head()

Unnamed: 0,Diabetes_012,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,0.0,1.0,1.0,1.0,40.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,5.0,18.0,15.0,1.0,0.0,9.0,4.0,3.0
1,0.0,0.0,0.0,0.0,25.0,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,3.0,0.0,0.0,0.0,0.0,7.0,6.0,1.0
2,0.0,1.0,1.0,1.0,28.0,0.0,0.0,0.0,0.0,1.0,...,1.0,1.0,5.0,30.0,30.0,1.0,0.0,9.0,4.0,8.0
3,0.0,1.0,0.0,1.0,27.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,11.0,3.0,6.0
4,0.0,1.0,1.0,1.0,24.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,2.0,3.0,0.0,0.0,0.0,11.0,5.0,4.0


## Cleaning data

In [3]:
# CONVERT DTYPES TO INT FOR EASIER DATA MANIPULATION
dm_df = dm_df.astype(int)

In [4]:
# SIMPLIFYING TARGET COLUMN TO ONLY 0 (no diabetes) AND 1 (diabetes or at-risk)
dm_column = dm_df["Diabetes_012"]
dm_column_new = []
for num in dm_column:
    if num == 2:
        dm_column_new.append(1)
    else:
        dm_column_new.append(num)

In [5]:
# REPLACE TARGET COLUMN WITH NEW DATA
dm_df["Diabetes_012"] = dm_column_new

## Train-test-split and StandardScaler

In [6]:
# SPLITTING DATA INTO FEATURES (X) AND TARGET (y) ARRAYS
X = dm_df.drop(columns=["Diabetes_012"])
y = dm_df["Diabetes_012"]

# SPLIT INTO TESTING AND TRAINING DATA
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [7]:
# CREATING STANDARDSCALER INSTANCE
scaler = StandardScaler()

# FITTING DATA
X_scaler = scaler.fit(X_train)

# SCALING THE DATA
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## KerasTuner

In [59]:
# KERASTUNER - AUTOTUNER FUNCTION
#def create_model(hp):
#    nn_model = tf.keras.models.Sequential()
#    activation = hp.Choice('activation',['relu','tanh'])
#    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
#        min_value=1,
#        max_value=30,
#        step=5), activation=activation, input_dim=21))

#    for i in range(hp.Int('num_layers', 1, 5)):
#        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
#            min_value=1,
#            max_value=30,
#            step=5), activation=activation))
    
#    nn_model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

#    nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

#    return nn_model

In [60]:
#tuner = kt.Hyperband(
#    create_model,
#    objective="val_accuracy",
#    max_epochs=20,
#    hyperband_iterations=2
#)

In [61]:
# KERASTUNER SEARCHING FOR BEST PARAMETERS
#tuner.search(X_train_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Trial 60 Complete [00h 04m 01s]
val_accuracy: 0.8520498275756836

Best val_accuracy So Far: 0.8526490330696106
Total elapsed time: 01h 25m 18s


In [67]:
# TOP 2 PARAMETERS
#top_hyper = tuner.get_best_hyperparameters(2)
#for param in top_hyper:
#    print(param.values)

{'activation': 'tanh', 'first_units': 26, 'num_layers': 4, 'units_0': 26, 'units_1': 1, 'units_2': 1, 'units_3': 26, 'units_4': 21, 'tuner/epochs': 20, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}
{'activation': 'relu', 'first_units': 21, 'num_layers': 1, 'units_0': 21, 'units_1': 1, 'units_2': 21, 'units_3': 6, 'units_4': 26, 'tuner/epochs': 20, 'tuner/initial_epoch': 7, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0042'}


## Testing top model

In [9]:
num_input_features = 21
hidden_nodes_layer1 = 26
hidden_nodes_layer2 = 1
hidden_nodes_layer3 = 1
hidden_nodes_layer4 = 26
hidden_nodes_layer5 = 21

nn = tf.keras.models.Sequential()

nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=num_input_features, activation='tanh')
)

nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='tanh'))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='tanh'))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation='tanh'))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation='tanh'))

nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 26)                572       
                                                                 
 dense_1 (Dense)             (None, 1)                 27        
                                                                 
 dense_2 (Dense)             (None, 1)                 2         
                                                                 
 dense_3 (Dense)             (None, 26)                52        
                                                                 
 dense_4 (Dense)             (None, 21)                567       
                                                                 
 dense_5 (Dense)             (None, 1)                 22        
                                                                 
Total params: 1242 (4.85 KB)
Trainable params: 1242 (4.8

In [10]:
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [12]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1982/1982 - 2s - loss: 0.3419 - accuracy: 0.8500 - 2s/epoch - 892us/step
Loss: 0.3419131636619568, Accuracy: 0.8499842286109924
