In [1]:
# Set global seed
from numpy.random import seed
seed(42)

In [2]:
# Import dependencies
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
import warnings
warnings.simplefilter('ignore')
import os
import numpy as np

In [3]:
# Read the CSV
heart = pd.read_csv("cleaned_cardio.csv")
heart.head()

Unnamed: 0,age,height,weight,ap_hi,ap_lo,cardio,BMI,gender_1,gender_2,cholesterol_1,...,cholesterol_3,gluc_1,gluc_2,gluc_3,smoke_0,smoke_1,alco_0,alco_1,active_0,active_1
0,50.0,66.0,136.0,110,80,0,21.948577,0,1,1,...,0,1,0,0,1,0,1,0,0,1
1,55.0,61.0,187.0,140,90,1,35.329481,1,0,0,...,1,1,0,0,1,0,1,0,0,1
2,52.0,65.0,141.0,130,70,1,23.461065,1,0,0,...,1,1,0,0,1,0,1,0,1,0
3,48.0,67.0,180.0,150,100,1,28.188906,0,1,1,...,0,1,0,0,1,0,1,0,0,1
4,48.0,61.0,123.0,100,60,0,23.238108,1,0,1,...,0,1,0,0,1,0,1,0,1,0


In [4]:
# Assign X (data) and y (target)
X = heart.drop("cardio", axis=1)
y = heart["cardio"].values.reshape(-1, 1)
X2 = X.values
print(X2.shape, y.shape)

(68606, 20) (68606, 1)


In [5]:
# Set up train and test
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X2, y, random_state=1)

In [6]:
print(X_train)

[[ 62.  65. 136. ...   0.   0.   1.]
 [ 52.  64. 174. ...   0.   0.   1.]
 [ 53.  63. 174. ...   0.   0.   1.]
 ...
 [ 54.  67. 158. ...   0.   0.   1.]
 [ 53.  67. 180. ...   0.   1.   0.]
 [ 53.  59. 132. ...   0.   0.   1.]]


In [7]:
print(y_train)

[[1]
 [0]
 [0]
 ...
 [1]
 [0]
 [0]]


In [8]:
# One-hot encoding
from tensorflow.keras.utils import to_categorical

y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_train_categorical

array([[0., 1.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [9]:
# Define the model
from tensorflow.keras.models import Sequential

model = Sequential()

In [10]:
# Define first layer input shape
X_shape = X_train.shape
X_shape

(51454, 20)

In [11]:
# Add first layer
from tensorflow.keras.layers import Dense
model.add(Dense(units=20, activation='relu', input_shape=X_shape[1:]))
model.add(Dense(units=20, activation='relu'))

In [12]:
# Add Output
model.add(Dense(units=2, activation='sigmoid'))

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                420       
_________________________________________________________________
dense_1 (Dense)              (None, 20)                420       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 42        
Total params: 882
Trainable params: 882
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Compile the model 
model.compile(optimizer='Adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [15]:
# Find the best model with checkpoint
from keras.callbacks import ModelCheckpoint

filepath="neuralnetwork.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

Using TensorFlow backend.


In [16]:
# Fit (train) the model
best_nn = model.fit(X_train,
                    y_train_categorical,
                    epochs=500,
                    shuffle=True,
                    callbacks=callbacks_list,
                    verbose=2)

Train on 51454 samples
Epoch 1/500
51454/51454 - 3s - loss: 0.6028 - accuracy: 0.6894
Epoch 2/500
51454/51454 - 2s - loss: 0.5686 - accuracy: 0.7165
Epoch 3/500
51454/51454 - 2s - loss: 0.5665 - accuracy: 0.7192
Epoch 4/500
51454/51454 - 2s - loss: 0.5648 - accuracy: 0.7188
Epoch 5/500
51454/51454 - 2s - loss: 0.5638 - accuracy: 0.7198
Epoch 6/500
51454/51454 - 2s - loss: 0.5601 - accuracy: 0.7231
Epoch 7/500
51454/51454 - 2s - loss: 0.5584 - accuracy: 0.7249
Epoch 8/500
51454/51454 - 2s - loss: 0.5554 - accuracy: 0.7267
Epoch 9/500
51454/51454 - 2s - loss: 0.5557 - accuracy: 0.7249
Epoch 10/500
51454/51454 - 2s - loss: 0.5538 - accuracy: 0.7270
Epoch 11/500
51454/51454 - 2s - loss: 0.5532 - accuracy: 0.7272
Epoch 12/500
51454/51454 - 2s - loss: 0.5529 - accuracy: 0.7270
Epoch 13/500
51454/51454 - 2s - loss: 0.5515 - accuracy: 0.7286
Epoch 14/500
51454/51454 - 2s - loss: 0.5515 - accuracy: 0.7272
Epoch 15/500
51454/51454 - 2s - loss: 0.5508 - accuracy: 0.7284
Epoch 16/500
51454/51454 -

In [17]:
# Save model for future use
model.save('nn_model.h5')

In [18]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test, y_test_categorical, verbose=2)

17152/1 - 0s - loss: 0.5607 - accuracy: 0.7294


In [19]:
# Test the model
import numpy as np
from keras.models import load_model

new_user_input = np.array([[61,62,205,130,80,37.49,1,0,0,0,1,1,0,0,1,0,1,0,0,1]])

perc_score_nn = 100*model_accuracy
print("Test Score: {:.2f} %".format(perc_score_nn))
predict_nn = model.predict(new_user_input)
predict_nn    

Test Score: 72.94 %


array([[0.18083376, 0.81916624]], dtype=float32)

In [20]:
# Perform Confusion matrix on NN
from sklearn.metrics import confusion_matrix

predictions = model.predict(X_test)
predictions
# cm = confusion_matrix(y_test_categorical, X_test)
# conf_matrix = pd.DataFrame(data = cm, columns = ['Predicted:0','Predicted:1'], index = ['Actual:0','Actual:1'])
# plt.figure(figsize = (8,5))
# sn.heatmap(conf_matrix, annot=True, fmt='d', cmap="YlGnBu")

array([[0.34549636, 0.6545037 ],
       [0.44503912, 0.5549609 ],
       [0.21210966, 0.78789043],
       ...,
       [0.17409253, 0.82590747],
       [0.6582696 , 0.34173042],
       [0.16078097, 0.83921903]], dtype=float32)