In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import warnings
warnings.simplefilter('ignore', FutureWarning)

## Data Pre-Processing

In [None]:
#Testing for CVD, using max reduced csv file
heart_cvd = pd.read_csv('max_reduce.csv')
# Clean to columns needed
heart_cvd = heart_cvd[['SEX','AGE','SYSBP','CIGPDAY','BMI','TOTCHOL','DIABETES','BPMEDS','HEARTRTE','GLUCOSE','educ','CVD']]
heart_cvd.head()

In [None]:
#Encode SEX column
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
heart_cvd['SEX'] = label_encoder.fit_transform(heart_cvd['SEX'])

In [None]:
#Get dummies for educ col, drop first
heart_cvd = pd.get_dummies(heart_cvd,columns=['educ'],drop_first=True)
heart_cvd.head()

In [None]:
X = heart_cvd.drop("CVD", axis=1)
y = heart_cvd["CVD"]
print(X.shape, y.shape)

In [None]:
X.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a Deep Learning Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(units=11, activation='relu', input_dim=13))
model.add(Dense(units=9, activation='relu'))
model.add(Dense(units=7, activation='relu'))
model.add(Dense(units=5, activation='relu'))
model.add(Dense(units=3, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

In [None]:
#https://stackoverflow.com/questions/42081257/keras-binary-crossentropy-vs-categorical-crossentropy-performance
from keras.metrics import categorical_accuracy
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
history = model.fit(
    X_train_scaled,
    y_train,
    epochs=1000,
    shuffle=True,
    verbose=2
)

## Quantify our Trained Model

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# loss is sum of error on training set
# error should decrease over epochs, steeper the better
plt.plot(history.history['loss'])
plt.title('Training with Backpropagation over 1000 Iterations')
plt.ylabel('Training Loss')
plt.xlabel('Iterations')
plt.grid(axis='y')

In [None]:
# accuracy should increase over epochs
plt.plot(history.history['acc'])
plt.title('Training Accuracy over 1000 Iterations')
plt.ylabel('Training Accuracy')
plt.xlabel('Iterations')
plt.grid(axis='y')

In [None]:
def pred_count(heart_cvd):
    corr = 0
    wron = 0
    for index, row in heart_cvd.iterrows():
        if row['predicted'] == row['actual']:
            corr = corr + 1
        else:
            wron = wron + 1

    print(f'Correct predictions: {corr}')
    print(f'Incorrect predictions: {wron}')

predictions = model.predict_classes(X_test_scaled)
test_df = pd.DataFrame({'predicted':np.ravel(predictions),'actual':np.ravel(y_test)})
pred_count(test_df)


# Saving a Trained Model
We can save our trained models using the HDF5 binary format with the extension `.h5`

In [None]:
# Save the model
model.save("framingham_cvd.h5")

# Loading a Model

In [1]:
import pandas as pd
# Load the model
from keras.models import load_model
cvd_model = load_model("framingham_cvd.h5")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Evaluating the loaded model

In [None]:
model_loss, model_accuracy = cvd_model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [3]:
#%% create a sample "good" patient, not from data
pt = {'SEX':[0],'AGE':[50],'CIGPDAY':[0],'HEARTRTE':[85],'SYSBP':[120],
     'BPMEDS':[0],'TOTCHOL':[160],'BMI':[25],'GLUCOSE':[70],'DIABETES':[1],
     'educ_2.0':[0],'educ_3.0':[0],'educ_4.0':[1]}
good_patient = pd.DataFrame(pt)

# return probability of response (stroke)
cvd_model.predict_proba(good_patient)

array([[4.1413753e-07]], dtype=float32)

In [4]:
print(f"Predicted class: {cvd_model.predict_classes(good_patient)}")

Predicted class: [[0]]


In [5]:
#%% create a sample "bad" patient, not from data
pt = {'SEX':[1],'AGE':[75],'CIGPDAY':[5],'HEARTRTE':[68],'SYSBP':[180],
     'BPMEDS':[0],'TOTCHOL':[210],'BMI':[25],'GLUCOSE':[70],'DIABETES':[0],
     'educ_2.0':[0],'educ_3.0':[0],'educ_4.0':[0]}
bad_patient = pd.DataFrame(pt)

# return probability of response (stroke)
cvd_model.predict_proba(bad_patient)

array([[0.00057183]], dtype=float32)

In [6]:
print(f"Predicted class: {cvd_model.predict_classes(bad_patient)}")

Predicted class: [[0]]


In [7]:
#%% create a sample "bad" patient, not from data
pt2 = {'SEX':[1],'AGE':[75],'CIGPDAY':[55],'HEARTRTE':[90],'SYSBP':[180],
     'BPMEDS':[1],'TOTCHOL':[210],'BMI':[33],'GLUCOSE':[101],'DIABETES':[0],
     'educ_2.0':[0],'educ_3.0':[0],'educ_4.0':[0]}
bad_patient2 = pd.DataFrame(pt2)

# return probability of response (stroke)
cvd_model.predict_proba(bad_patient2)

array([[0.99988925]], dtype=float32)

In [8]:
print(f"Predicted class: {cvd_model.predict_classes(bad_patient)}")

Predicted class: [[0]]
