In [1]:
import json
import pickle

from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
import numpy as np
import pandas as pd
pd.options.display.float_format = '{:,.4f}'.format
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import AdaBoostClassifier

%config Completer.use_jedi = False

#### Step 1: Read and Prepare Data

In [2]:
data = pd.read_csv('cleveland.csv')

my_filter = data['diagnosis'] == 0
data.loc[my_filter, 'heart_disease'] = 0
data.loc[~my_filter, 'heart_disease'] = 1
data = data.drop(columns = ['diagnosis'])
data.columns = [col.replace(' ', '_') for col in data.columns]

for col in data.columns:
    data[col] = pd.to_numeric(data[col], errors = 'coerce')
    data[col] = data[col].fillna(data[col].median())

#### Step 2: Perform Feature Engineering to Transform Data Into Format Suitable for ML. **SAVE STEPS FOR LATER!!!**

In [3]:
features = [
    'age', 
    'sex', 
    'chest_pain', 
    'blood_pressure', 
    'serum_cholestoral',
    'fasting_blood_sugar', 
    'electrocardiographic', 
    'max_heart_rate',
    'induced_angina', 
    'ST_depression', 
    'slope', 
    'vessels', 
    'thal',
]

target = 'heart_disease'

scaler = preprocessing.StandardScaler()
scaled_data = scaler.fit(data[features])
scaled_data = scaler.fit_transform(data[features])
scaled_data = pd.DataFrame(scaled_data, columns = features)
scaled_data[target] = data[target]

scaler_means = {key: val for key, val in zip(features, scaler.mean_)}
scaler_sigmas = {key: val for key, val in zip(features, scaler.scale_)}

with open('scaler_means.json', 'w') as fout: 
    json.dump(scaler_means, fout)
    
with open('scaler_sigmas.json', 'w') as fout: 
    json.dump(scaler_sigmas, fout)

#### Step 3: Fit and Save Model(s)

In [4]:
train, test = train_test_split(scaled_data, stratify = scaled_data[target], random_state = 0)
x_train, y_train = train[features], train[target]
x_test, y_test = test[features], test[target]

In [5]:
adaboost_params = {
    'learning_rate': 0.01, 
    'n_estimators': 100,
}

clf = AdaBoostClassifier(random_state = 0, **adaboost_params)
clf = clf.fit(x_train, y_train)
with open('adaboost.pkl', 'wb') as f:
    pickle.dump(clf, f)

In [6]:
tf.random.set_seed(0)

model = Sequential()
model.add(Dense(9, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(6, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss = 'binary_crossentropy', 
              optimizer = 'adam', 
              metrics = ['accuracy'])

X, y = train[features].values, train[target].values
history = model.fit(X, y, 
          epochs = 100, 
          batch_size = 10, 
          verbose = 0,
          validation_split = 0.2);

model.save('neural')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: neural\assets
