In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report

In [2]:
data=pd.read_csv('data.csv')

In [3]:
data = data.replace({
    'Yes': 1, 'No': 0, 'Male': 1, 'Female': 0,
    'No, borderline diabetes': '0',
    'Yes (during pregnancy)': '1'
})

  data = data.replace({


In [4]:
data['MentalHealth'].fillna(data['MentalHealth'].mean(),inplace=True)
selected_features = [
    'AgeCategory', 'DiffWalking', 'Diabetic', 'PhysicalHealth', 'Stroke',
    'Smoking','GenHealth',
    'Sex', 'Race', 'BMI', 'SkinCancer', 'Asthma','MentalHealth'
]
X = data[selected_features]
y = data['HeartDisease']
# X = X.drop(columns=['MentalHealth'], axis=1)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42, shuffle=True)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['MentalHealth'].fillna(data['MentalHealth'].mean(),inplace=True)


In [5]:
age_order = ['18-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80 or older']
ordinal_encoder = OrdinalEncoder(categories=[age_order])
X_train['AgeCategory'] = ordinal_encoder.fit_transform(X_train[['AgeCategory']])
X_val['AgeCategory'] = ordinal_encoder.transform(X_val[['AgeCategory']])
X_test['AgeCategory'] = ordinal_encoder.transform(X_test[['AgeCategory']])

In [6]:
#One-hot encode 'AgeCategory', 'Race', 'GenHealth' for all datasets
X_train = pd.get_dummies(X_train, columns=['Race', 'GenHealth'], drop_first=True)
X_val = pd.get_dummies(X_val, columns=['Race', 'GenHealth'], drop_first=True)
X_test = pd.get_dummies(X_test, columns=['Race', 'GenHealth'], drop_first=True)

In [7]:
X_train, X_val = X_train.align(X_val, join='left', axis=1, fill_value=0)
X_train, X_test = X_train.align(X_test, join='left', axis=1, fill_value=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [11]:
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid') 
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    verbose=1
)

test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.2f}")

model.save("heart_disease_ann_model.h5")

Epoch 1/100
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7074 - loss: 0.5638 - val_accuracy: 0.7635 - val_loss: 0.4958
Epoch 2/100
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 918us/step - accuracy: 0.7495 - loss: 0.5089 - val_accuracy: 0.7663 - val_loss: 0.4926
Epoch 3/100
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 900us/step - accuracy: 0.7524 - loss: 0.5085 - val_accuracy: 0.7671 - val_loss: 0.4928
Epoch 4/100
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 904us/step - accuracy: 0.7588 - loss: 0.4989 - val_accuracy: 0.7664 - val_loss: 0.4920
Epoch 5/100
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 916us/step - accuracy: 0.7609 - loss: 0.4938 - val_accuracy: 0.7657 - val_loss: 0.4917
Epoch 6/100
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 916us/step - accuracy: 0.7605 - loss: 0.4934 - val_accuracy: 0.7682 - val_loss: 0.4922




Test Accuracy: 0.77


In [12]:
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)
print(classification_report(y_test, y_pred))

[1m370/370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 538us/step
              precision    recall  f1-score   support

           0       0.82      0.73      0.77      6407
           1       0.72      0.81      0.76      5407

    accuracy                           0.77     11814
   macro avg       0.77      0.77      0.77     11814
weighted avg       0.77      0.77      0.77     11814



In [13]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[4699, 1708],
       [1051, 4356]])