In [122]:
import pandas as pd
import tensorflow as tf
import numpy as np

In [123]:
data = pd.read_csv(r'heart.csv')

data.drop('row.names', inplace=True, axis=1)

data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1


In [124]:
data['famhist'] = data['famhist'].map({'Present': '1', 'Absent': '0'})

data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.0,5.73,23.11,1,49,25.3,97.2,52,1
1,144,0.01,4.41,28.61,0,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,1,52,29.14,3.81,46,0
3,170,7.5,6.41,38.03,1,51,31.99,24.26,58,1
4,134,13.6,3.5,27.78,1,60,25.99,57.34,49,1


In [125]:
# Taken from https://www.tensorflow.org/tutorials/load_data/csv 

inputs = {}

for name, column in data.items():
  dtype = column.dtype
  if dtype == object:
    dtype = tf.string
  else:
    dtype = tf.float32

  inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)

inputs

{'sbp': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'sbp')>,
 'tobacco': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'tobacco')>,
 'ldl': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'ldl')>,
 'adiposity': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'adiposity')>,
 'famhist': <KerasTensor: shape=(None, 1) dtype=string (created by layer 'famhist')>,
 'typea': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'typea')>,
 'obesity': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'obesity')>,
 'alcohol': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'alcohol')>,
 'age': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'age')>,
 'chd': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'chd')>}

In [126]:
# famhist still has dtype string need to convert to float

data['famhist'] = data['famhist'].astype(float)

inputs = {}

for name, column in data.items():
  dtype = column.dtype
  if dtype == object:
    dtype = tf.string
  else:
    dtype = tf.float32

  inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)

inputs

{'sbp': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'sbp')>,
 'tobacco': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'tobacco')>,
 'ldl': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'ldl')>,
 'adiposity': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'adiposity')>,
 'famhist': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'famhist')>,
 'typea': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'typea')>,
 'obesity': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'obesity')>,
 'alcohol': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'alcohol')>,
 'age': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'age')>,
 'chd': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'chd')>}

In [127]:
# Taken from https://stackoverflow.com/questions/43697240/how-can-i-split-a-dataset-from-a-csv-file-for-training-and-testing
train = data.sample(frac=0.8, random_state = np.random.RandomState())
test = data.loc[~data.index.isin(train.index)]

train.to_csv('heart_train.csv', index=False)
test.to_csv('heart_test.csv', index=False)

print("--Get data--")
y_test = test.chd
x_test = test.drop('chd', axis=1)

y_train = train.chd
x_train = train.drop('chd', axis=1)

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

--Get data--
(370, 9) (92, 9) (370,) (92,)


In [128]:
print("--Make model--")
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(16, input_shape=(9,), activation="relu", kernel_regularizer=tf.keras.regularizers.l2(0.0001)),
  tf.keras.layers.Dense(16, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(0.0001)),
  #tf.keras.layers.Dense(32, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(0.001)),
  tf.keras.layers.Dense(1, activation = "sigmoid")
])

initial_learning_rate = 1e-2
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
  initial_learning_rate, decay_steps=100000, decay_rate=0.9, staircase=True)

optimizer = tf.keras.optimizers.Adamax(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

print("--Fit model--")
model.fit(x_train, y_train, epochs=75, verbose=2, batch_size = 5)

print("--Evaluate model--")
model_loss1, model_acc1 = model.evaluate(x_train,  y_train, verbose=2)
model_loss2, model_acc2 = model.evaluate(x_test,  y_test, verbose=2)
print(f"Train / Test Accuracy: {model_acc1*100:.1f}% / {model_acc2*100:.1f}%")

--Make model--
--Fit model--
Epoch 1/75
74/74 - 1s - loss: 2.0611 - accuracy: 0.5892 - 603ms/epoch - 8ms/step
Epoch 2/75
74/74 - 0s - loss: 1.0972 - accuracy: 0.6081 - 102ms/epoch - 1ms/step
Epoch 3/75
74/74 - 0s - loss: 0.8618 - accuracy: 0.5973 - 113ms/epoch - 2ms/step
Epoch 4/75
74/74 - 0s - loss: 0.8127 - accuracy: 0.6135 - 101ms/epoch - 1ms/step
Epoch 5/75
74/74 - 0s - loss: 0.9359 - accuracy: 0.6027 - 107ms/epoch - 1ms/step
Epoch 6/75
74/74 - 0s - loss: 0.8910 - accuracy: 0.6189 - 105ms/epoch - 1ms/step
Epoch 7/75
74/74 - 0s - loss: 0.6749 - accuracy: 0.6324 - 104ms/epoch - 1ms/step
Epoch 8/75
74/74 - 0s - loss: 0.7786 - accuracy: 0.6027 - 110ms/epoch - 1ms/step
Epoch 9/75
74/74 - 0s - loss: 0.6402 - accuracy: 0.6676 - 109ms/epoch - 1ms/step
Epoch 10/75
74/74 - 0s - loss: 0.6945 - accuracy: 0.6162 - 106ms/epoch - 1ms/step
Epoch 11/75
74/74 - 0s - loss: 0.6663 - accuracy: 0.6189 - 110ms/epoch - 1ms/step
Epoch 12/75
74/74 - 0s - loss: 0.7065 - accuracy: 0.6649 - 114ms/epoch - 2ms/s