## Cardio Neural Net

In [1]:
# import libraries
import tensorflow as tf
#import numpy as np
import keras
import time
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense, Activation
from tensorflow.python.keras.callbacks import TensorBoard
import time
import matplotlib.pyplot as plt
import os
from keras.callbacks import CSVLogger


#reset graph to remove duplicate nodes
tf.reset_default_graph()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


## Import data set and prepare for analysis

In [2]:
# initial work with the smaller data set
X_test = pd.read_csv('cardio_x_test.csv', sep = ',', encoding = 'unicode_escape')  
X_train = pd.read_csv('cardio_x_train.csv', sep = ',', encoding = 'unicode_escape')  
y_test = pd.read_csv('cardio_y_test.csv', sep = ',', encoding = 'unicode_escape')  
y_train = pd.read_csv('cardio_y_train.csv', sep = ',', encoding = 'unicode_escape')
# examine the shape of original input data
print('The shape of the X test set is: ', X_test.shape)
print('The shape of the X train set is: ',X_train.shape)
print('The shape of the Y test set is: ',y_test.shape)
print('The shape of the Y train set is: ',y_train.shape)

The shape of the X test set is:  (13750, 24)
The shape of the X train set is:  (54997, 24)
The shape of the Y test set is:  (13750, 2)
The shape of the Y train set is:  (54997, 2)


In [3]:
#drop unnecessary columns for model
X_train = X_train.drop(['Unnamed: 0', 'id', 'age_days', 'LATITUDE',
                 'LONGITUDE','ZIP','FACILITY_NAME','ADDRESS',
                 'CITY','STATE','DESCRIPTION','FACILITY_TYPE' ], axis=1)
X_test = X_test.drop(['Unnamed: 0', 'id', 'age_days', 'LATITUDE',
                 'LONGITUDE','ZIP','FACILITY_NAME','ADDRESS',
                 'CITY','STATE','DESCRIPTION','FACILITY_TYPE' ], axis=1)
y_train = y_train.drop(['Unnamed: 0'], axis=1)
y_test = y_test.drop(['Unnamed: 0'], axis=1)    


In [4]:
#rename the column in the target train and test sets
y_train = y_train.rename(columns={"x": "cardio"})
y_test = y_test.rename(columns={"x": "cardio"})

In [5]:
#total number of NaN values in each column
print('The total number of NaN values in the X test set is:\n', X_test.isnull().sum())
print('The total number of NaN values in the X train set is:\n',X_train.isnull().sum())
print('The total number of NaN values in the Y test set is:\n',y_test.isnull().sum())
print('The total number of NaN values in the Y train set is:\n',y_train.isnull().sum())

The total number of NaN values in the X test set is:
 age_years      0
gender         0
height_cm      0
weight_kg      0
BMI            0
ap_hi          0
ap_lo          0
cholesterol    0
gluc           0
smoke          0
alcohol        0
active         0
dtype: int64
The total number of NaN values in the X train set is:
 age_years      0
gender         0
height_cm      0
weight_kg      0
BMI            0
ap_hi          0
ap_lo          0
cholesterol    0
gluc           0
smoke          0
alcohol        0
active         0
dtype: int64
The total number of NaN values in the Y test set is:
 cardio    0
dtype: int64
The total number of NaN values in the Y train set is:
 cardio    0
dtype: int64


In [6]:
# mapping function to convert text female/male to integer 1/2
convert_to_binary = {'female' : 1, 'male' : 2}

# define binary variable for gender
X_train['gender'] = X_train['gender'].map(convert_to_binary)
X_test['gender'] = X_test['gender'].map(convert_to_binary)

In [7]:
X_train.head()

Unnamed: 0,age_years,gender,height_cm,weight_kg,BMI,ap_hi,ap_lo,cholesterol,gluc,smoke,alcohol,active
0,62,1,160,101.0,39.453125,120,70,1,1,0,0,1
1,41,2,172,78.0,26.365603,140,80,1,1,0,0,1
2,57,1,168,80.0,28.344671,140,90,1,1,0,0,1
3,52,1,161,75.0,28.934069,110,80,2,3,0,0,1
4,39,1,169,64.0,22.408179,100,70,1,1,0,0,1


In [8]:
y_train.head()

Unnamed: 0,cardio
0,1
1,0
2,1
3,0
4,0


## Run 1: Keras Model with 2 layers and 12 and 1 nodes

In [9]:
X_train.shape[1]

12

In [10]:
tf.compat.v1.random.set_random_seed(23)

#https://keras.io/getting-started/sequential-model-guide/
#Instantiate an estimator
model = Sequential()
#layer 1 with 12 nodes
model.add(Dense(12, input_dim=X_train.shape[1]))  #12 nodes
model.add(Activation('relu'))
#layer 2 with 1 node
model.add(Dense(1))                  #1 nodes
model.add(Activation('sigmoid'))

Instructions for updating:
Colocations handled automatically by placer.


In [11]:
#Before training a model, you need to configure 
#the learning process, which is done via the compile method. 

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [12]:
#https://stackoverflow.com/questions/38445982/how-to-log-keras-loss-output-to-a-file
#write model loss and accuracy to csv file
csv_logger = CSVLogger('log.csv', append=True, separator=';')
#model.fit(X_train, Y_train, callbacks=[csv_logger])

t0 = time.time()
# Train the model, iterating on the data in batches of 50 samples
history = model.fit(X_train, y_train, epochs=10, batch_size=50, callbacks=[csv_logger])
# Evaluate the model
score = model.evaluate(X_test, y_test, batch_size=50)

t1 = time.time()
print('Total time to train and evaluate model: {:.3f}'.format(t1-t0))
time_keras1 = t1-t0

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Total time to train and evaluate model: 12.629


In [13]:
score

[0.569511148929596, 0.7254545482722196]

In [14]:
print(model.metrics_names)

['loss', 'acc']


In [15]:
keras_test_loss, keras_test_accuracy = score
print('Test loss: {}'.format(keras_test_loss))
print('Test accuracy: {}'.format(keras_test_accuracy))

Test loss: 0.569511148929596
Test accuracy: 0.7254545482722196


## Run 2: Keras Model with 2 layers and 12 and 1 nodes

In [16]:
# Clean up the TF session
keras.backend.clear_session()
#reset graph to remove duplicate nodes
tf.reset_default_graph()

tf.compat.v1.random.set_random_seed(123)


model2 = Sequential()
model2.add(Dense(12, input_dim=X_train.shape[1]))  #12 nodes
model2.add(Activation('relu'))
model2.add(Dense(1))                  #1 nodes
model2.add(Activation('sigmoid'))


In [17]:
#Before training a model, you need to configure 
#the learning process, which is done via the compile method. 

model2.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [18]:
#https://stackoverflow.com/questions/38445982/how-to-log-keras-loss-output-to-a-file
#write model loss and accuracy to csv file
csv_logger = CSVLogger('log2.csv', append=True, separator=';')
#model.fit(X_train, Y_train, callbacks=[csv_logger])

t0 = time.time()
# Train the model, iterating on the data in batches of 50 samples
history2 = model2.fit(X_train, y_train, epochs=10, batch_size=50, callbacks=[csv_logger])
# Evaluate the model
score2 = model2.evaluate(X_test, y_test, batch_size=50)

t1 = time.time()
print('Total time to train and evaluate model: {:.3f}'.format(t1-t0))
time_keras2 = t1-t0

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Total time to train and evaluate model: 12.490


In [19]:
keras_test_loss2, keras_test_accuracy2 = score2
print('Test loss: {}'.format(keras_test_loss2))
print('Test accuracy: {}'.format(keras_test_accuracy2))

Test loss: 0.563597826090726
Test accuracy: 0.7281454569643194


## Run 3: Keras Model with 2 layers and 12 and 1 nodes

In [20]:
# Clean up the TF session
keras.backend.clear_session()
#reset graph to remove duplicate nodes
tf.reset_default_graph()

tf.compat.v1.random.set_random_seed(42)


model3 = Sequential()
model3.add(Dense(12, input_dim=X_train.shape[1]))  #12 nodes
model3.add(Activation('relu'))
model3.add(Dense(1))                  #1 nodes
model3.add(Activation('sigmoid'))


In [21]:
#Before training a model, you need to configure 
#the learning process, which is done via the compile method. 

model3.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [22]:
#https://stackoverflow.com/questions/38445982/how-to-log-keras-loss-output-to-a-file
#write model loss and accuracy to csv file
csv_logger = CSVLogger('log3.csv', append=True, separator=';')
#model.fit(X_train, Y_train, callbacks=[csv_logger])

t0 = time.time()
# Train the model, iterating on the data in batches of 50 samples
history3 = model3.fit(X_train, y_train, epochs=10, batch_size=50, callbacks=[csv_logger])
# Evaluate the model
score3 = model3.evaluate(X_test, y_test, batch_size=50)

t1 = time.time()
print('Total time to train and evaluate model: {:.3f}'.format(t1-t0))
time_keras3 = t1-t0

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Total time to train and evaluate model: 12.484


In [23]:
keras_test_loss3, keras_test_accuracy3 = score3
print('Test loss: {}'.format(keras_test_loss3))
print('Test accuracy: {}'.format(keras_test_accuracy3))

Test loss: 0.5733089323477312
Test accuracy: 0.7146181854334744


## Model summary

In [24]:
keras_summary_models = {
    'Processing Time' : [round(time_keras1, 3), round(time_keras2,3), round(time_keras3,3)],
    'Test Loss' : [round(keras_test_loss, 3), round(keras_test_loss2, 3), round(keras_test_loss3, 3)],
    'Test Accruacy' : [round(keras_test_accuracy, 3), round(keras_test_accuracy2, 3), round(keras_test_accuracy3, 3)]
}

keras_summary_models_df = pd.DataFrame(keras_summary_models)
keras_summary_models_df

Unnamed: 0,Processing Time,Test Loss,Test Accruacy
0,12.629,0.57,0.725
1,12.49,0.564,0.728
2,12.484,0.573,0.715
