# Heart Disease Prediction

In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [2]:
# Load the dataset
data = pd.read_csv('/data/cleaned_dataset.csv')

# Drop irrelevant columns
X = data.drop(['num', 'id', 'dataset'], axis=1)
y = (data['num'] > 0).astype(int)

# Encode binary columns
X['sex'] = X['sex'].map({'Male': 0, 'Female': 1})
X['fbs'] = X['fbs'].map({False: 0, True: 1})
X['exang'] = X['exang'].map({False: 0, True: 1})

# One-hot encode categorical columns
X = pd.get_dummies(X, columns=['cp', 'restecg', 'slope', 'thal'])

# Scale numerical columns
numerical_cols = ['age', 'trestbps', 'chol', 'thalch', 'oldpeak', 'ca']
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])


In [3]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
# Build the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.AUC()])


In [6]:
# Define a custom callback to stop training when accuracy reaches 95%
class StopTrainingAtAccuracy(tf.keras.callbacks.Callback):
    def __init__(self, target_accuracy=0.95):
        super(StopTrainingAtAccuracy, self).__init__()
        self.target_accuracy = target_accuracy

    def on_epoch_end(self, epoch, logs=None):
        if logs.get('accuracy') >= self.target_accuracy:
            print(f"\nReached {self.target_accuracy * 100:.2f}% accuracy. Stopping training!")
            self.model.stop_training = True


In [7]:
# Train the model
accuracy_callback = StopTrainingAtAccuracy(target_accuracy=0.95)
history = model.fit(X_train, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2,
                    callbacks=[accuracy_callback])


Epoch 1/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.6877 - auc: 0.7957 - loss: 0.6226 - val_accuracy: 0.7959 - val_auc: 0.9111 - val_loss: 0.5223
Epoch 2/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7974 - auc: 0.8900 - loss: 0.5220 - val_accuracy: 0.8027 - val_auc: 0.9221 - val_loss: 0.4359
Epoch 3/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8411 - auc: 0.9087 - loss: 0.4245 - val_accuracy: 0.8231 - val_auc: 0.9275 - val_loss: 0.3865
Epoch 4/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8254 - auc: 0.9036 - loss: 0.3870 - val_accuracy: 0.8435 - val_auc: 0.9356 - val_loss: 0.3595
Epoch 5/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8689 - auc: 0.9281 - loss: 0.3452 - val_accuracy: 0.8435 - val_auc: 0.9408 - val_loss: 0.3500
Epoch 6/50
[1m19/19[0m [32m━━━━━━━━━

In [8]:
# Evaluate the model
results = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {results[1]:.2f}")
print(f"Test AUC: {results[2]:.2f}")


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8193 - auc: 0.8806 - loss: 0.5269 
Test Accuracy: 0.83
Test AUC: 0.89
