In [1]:
import numpy as np
import pandas as pd
import os

import warnings
warnings.simplefilter('ignore', FutureWarning)

## Data Pre-Processing

In [2]:
#Testing for CVD, using max reduced csv file
heart_cvd = pd.read_csv('data/max_reduce.csv')

X = heart_cvd.drop("CVD", axis=1)
y = heart_cvd["CVD"]
print(X.shape, y.shape)

(9482, 14) (9482,)


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

Using TensorFlow backend.


In [4]:
heart_cvd.head()

Unnamed: 0,RANDID,SEX,AGE,SYSBP,DIABP,CURSMOKE,BMI,DIABETES,BPMEDS,HEARTRTE,GLUCOSE,educ,STROKE,CVD,HYPERTEN
0,2448,1,39,106.0,70.0,0,26.97,0,0,80,77,4,0,1,0
1,6238,2,46,121.0,81.0,0,28.73,0,0,95,76,2,0,0,0
2,6238,2,52,105.0,69.5,0,29.43,0,0,80,86,2,0,0,0
3,6238,2,58,108.0,66.0,0,28.5,0,0,80,71,2,0,0,0
4,9428,1,48,127.5,80.0,1,25.34,0,0,75,70,1,0,0,0


In [7]:
y.head()

0    1
1    0
2    0
3    0
4    0
Name: CVD, dtype: int64

In [8]:
X.head()

Unnamed: 0,RANDID,SEX,AGE,SYSBP,DIABP,CURSMOKE,BMI,DIABETES,BPMEDS,HEARTRTE,GLUCOSE,educ,STROKE,HYPERTEN
0,2448,1,39,106.0,70.0,0,26.97,0,0,80,77,4,0,0
1,6238,2,46,121.0,81.0,0,28.73,0,0,95,76,2,0,0
2,6238,2,52,105.0,69.5,0,29.43,0,0,80,86,2,0,0
3,6238,2,58,108.0,66.0,0,28.5,0,0,80,71,2,0,0
4,9428,1,48,127.5,80.0,1,25.34,0,0,75,70,1,0,0


# Create a Deep Learning Model

In [5]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=14))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [6]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=1000,
    shuffle=True,
    verbose=0
)

<keras.callbacks.History at 0x112429518>

## Quantify our Trained Model

In [9]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 2.4256852144765886, Accuracy: 0.766343315056938


In [10]:
encoded_predictions = model.predict_classes(X_test_scaled[:50])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

  if diff:


In [11]:
prediction_labels

array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0])

In [12]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:50])}")

Predicted classes: [0 0 0 0 0 0 0 1 1 1 0 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0
 1 0 1 0 0 1 0 0 0 1 0 0 0]
Actual Labels: [1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1]


# Saving a Trained Model
We can save our trained models using the HDF5 binary format with the extension `.h5`

In [13]:
# Save the model
model.save("framingham_cvd.h5")

# Loading a Model

In [14]:
# Load the model
from keras.models import load_model
diabetes_model = load_model("framingham_cvd.h5")

## Evaluating the loaded model

In [15]:
model_loss, model_accuracy = diabetes_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 2.4256852144765886, Accuracy: 0.766343315056938


In [16]:
# Testing Data Paths
X_testing_data = os.path.join("data/" "cvd_test.csv")

In [17]:
# Read the testing data
X_testNew_df = pd.read_csv(X_testing_data, delimiter=",", skiprows=1, header=None)
X_testNew = X_testNew_df.values
X_testNew.shape

(21, 14)

In [18]:
# Grab just one data point to test with
test = np.expand_dims(X_testNew[0], axis=0)
test.shape

(1, 14)

In [19]:
# Make a prediction. The result should be 0 - no CVD
print(f"Predicted class: {model.predict_classes(test)}")

Predicted class: [0]


In [20]:
# Grab just one data point to test with
test2 = np.expand_dims(X_testNew[1], axis=0)
test2.shape

(1, 14)

In [21]:
# Make a prediction. The result should be 1 - CVD
print(f"Predicted class: {model.predict_classes(test2)}")

Predicted class: [0]


In [22]:
# Grab just one data point to test with
test3 = np.expand_dims(X_testNew[2], axis=0)
test3.shape

(1, 14)

In [23]:
# Make a prediction. The result should be 1 - CVD
print(f"Predicted class: {model.predict_classes(test3)}")

Predicted class: [0]


In [32]:
# Grab just one data point to test with
test4 = np.expand_dims(X_testNew[14], axis=0)
test4.shape

(1, 14)

In [33]:
# Make a prediction. The result should be 0 - no CVD
print(f"Predicted class: {model.predict_classes(test4)}")

Predicted class: [0]
