In [None]:
import pandas as pd 
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

import tensorflow as tf
from tensorflow import keras
layers = keras.layers
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

df = pd.read_csv("Heart_Disease.csv")
df.head()

Unnamed: 0,id,age,sex,dataset,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,num
0,1,63,Male,Cleveland,typical angina,145.0,233.0,True,lv hypertrophy,150.0,False,2.3,downsloping,0.0,fixed defect,0
1,2,67,Male,Cleveland,asymptomatic,160.0,286.0,False,lv hypertrophy,108.0,True,1.5,flat,3.0,normal,2
2,3,67,Male,Cleveland,asymptomatic,120.0,229.0,False,lv hypertrophy,129.0,True,2.6,flat,2.0,reversable defect,1
3,4,37,Male,Cleveland,non-anginal,130.0,250.0,False,normal,187.0,False,3.5,downsloping,0.0,normal,0
4,5,41,Female,Cleveland,atypical angina,130.0,204.0,False,lv hypertrophy,172.0,False,1.4,upsloping,0.0,normal,0


### Binary target: 
- 0 = no disease
- 1 = disease

In [2]:
df["target"] = (df['num'] > 0).astype(int)
df = df.drop(columns=['num','id'])
df.head()

Unnamed: 0,age,sex,dataset,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,target
0,63,Male,Cleveland,typical angina,145.0,233.0,True,lv hypertrophy,150.0,False,2.3,downsloping,0.0,fixed defect,0
1,67,Male,Cleveland,asymptomatic,160.0,286.0,False,lv hypertrophy,108.0,True,1.5,flat,3.0,normal,1
2,67,Male,Cleveland,asymptomatic,120.0,229.0,False,lv hypertrophy,129.0,True,2.6,flat,2.0,reversable defect,1
3,37,Male,Cleveland,non-anginal,130.0,250.0,False,normal,187.0,False,3.5,downsloping,0.0,normal,0
4,41,Female,Cleveland,atypical angina,130.0,204.0,False,lv hypertrophy,172.0,False,1.4,upsloping,0.0,normal,0


### Separate Features and Target

In [3]:
x = df.drop(columns=['target'])
y = df['target']

### identify Numerical and Categorical Columns

In [4]:
numerical_features = x.select_dtypes(include=["int64","float64"]).columns
print("Numerical Features:", numerical_features)
categorical_features = x.select_dtypes(include=["object","bool"]).columns
print("Categorical Features:", categorical_features)

Numerical Features: Index(['age', 'trestbps', 'chol', 'thalch', 'oldpeak', 'ca'], dtype='object')
Categorical Features: Index(['sex', 'dataset', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal'], dtype='object')


### Numeric Feature Pipeline
- Missing values = replaced with median
- Features = standardized (mean = 0, std = 1)

In [5]:
numeric_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

### Categorical Feature Pipeline
- Missing values = replaced with most frequent value
- Categories = converted to numbers using One-Hot Encoding
- Unknown categories = safely ignored

In [6]:
categorical_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

### Combine Pipelines Using ColumnTransformer

In [7]:
preprocessor = ColumnTransformer([
    ("num",numeric_pipeline, numerical_features),
    ("cat",categorical_pipeline, categorical_features)
])

### Train-Test Split
- 80% training, 20% testing
- `stratify=y` keeps class distribution balanced
- `random_state=42` ensures reproducibility


In [8]:
x_train_raw, x_test_raw, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42, stratify=y)

### Apply Preprocessing

In [9]:
x_train = preprocessor.fit_transform(x_train_raw)
x_test = preprocessor.transform(x_test_raw)

### Convert Sparse Matrix to Dense

In [10]:
if hasattr(x_train, "toarray"):
    x_train = x_train.toarray()
    x_test = x_test.toarray()

# Convert Data Type
x_train = x_train.astype("float32")
x_test = x_test.astype("float32")
print("Train shape:", x_train.shape)

Train shape: (736, 29)


### Build the Neural Network Model

In [11]:
# Input Dimension
input_dim = x_train.shape[1]
print(input_dim)

model = keras.Sequential([keras.Input(shape=(input_dim,)),
                         layers.Dense(128, activation="relu"),
                         layers.BatchNormalization(),
                         layers.Dropout(0.3),
                          
                         layers.Dense(64, activation="relu"),
                         layers.BatchNormalization(),
                         layers.Dropout(0.3),
                          
                         layers.Dense(1,activation="sigmoid")])

29


### Compile the Model


In [12]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss = "binary_crossentropy",
    metrics = ["accuracy"])
model.summary()

### Model Training with Callbacks
#### This section trains the neural network and uses Early Stopping to prevent overfitting and save the best model.

- `monitor="val_loss"` = Watches the validation loss during training.
- `patience=10` =Training will stop if validation loss does not improve for 10 consecutive epochs.
- `restore_best_weights` =True =After stopping, the model restores the weights from the epoch where
- validation loss was minimum (best performance).

In [13]:
callbacks = [keras.callbacks.EarlyStopping(
    monitor ="val_loss",
    patience = 10,
    restore_best_weights=True
)]

### Train the Model

In [14]:
history = model.fit(x_train, y_train,
                   validation_split=0.2,
                   epochs =200,
                   batch_size=32,
                   callbacks = callbacks,
                   verbose = 1)
history

Epoch 1/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.6718 - loss: 0.6934 - val_accuracy: 0.8108 - val_loss: 0.5169
Epoch 2/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7908 - loss: 0.4748 - val_accuracy: 0.8243 - val_loss: 0.4686
Epoch 3/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7976 - loss: 0.4525 - val_accuracy: 0.8108 - val_loss: 0.4521
Epoch 4/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7823 - loss: 0.4617 - val_accuracy: 0.8176 - val_loss: 0.4380
Epoch 5/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8367 - loss: 0.3620 - val_accuracy: 0.8311 - val_loss: 0.4278
Epoch 6/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8197 - loss: 0.4247 - val_accuracy: 0.8311 - val_loss: 0.4199
Epoch 7/200
[1m19/19[0m [32m━━

<keras.src.callbacks.history.History at 0x1ce8c117230>

### Evaluate Model

In [15]:
y_prob = model.predict(x_test).ravel()
y_pred = (y_prob >= 0.5).astype(int)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Test Accuracy: 0.8369565217391305

Confusion Matrix:
 [[61 21]
 [ 9 93]]

Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.74      0.80        82
           1       0.82      0.91      0.86       102

    accuracy                           0.84       184
   macro avg       0.84      0.83      0.83       184
weighted avg       0.84      0.84      0.84       184



### Save Model & Preprocessor

In [16]:
import joblib
import os

os.makedirs("artifacts", exist_ok=True)

model.save("artifacts/heart_mlp_tf.keras")
joblib.dump(preprocessor, "artifacts/preprocessor.joblib")

print("Saved model & preprocessing pipeline")

Saved model & preprocessing pipeline
