# Teach a Machine Predict Heart Disease using Data

### Load Dataset

In [8]:
import pandas as pd

dataset = pd.read_csv('datasets/heart.csv')
data = dataset.copy()

### Seperate input Features and output(labels)

In [10]:
X = data.drop(['HeartDisease'], axis=1) # Feature Columns
y = data['HeartDisease']

### Split the dataset into Training and Validation sets

In [12]:
from sklearn.model_selection import train_test_split

X_train,X_val,y_train,y_val = train_test_split(X,y,test_size=0.2,random_state=42)

### Preprocess data (Scale numbers and encode Categorical data)

In [13]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

numeric_features = ["Age", "RestingBP", "Cholesterol", "MaxHR", "Oldpeak"]
categorical_features = ["Sex", "ChestPainType", "FastingBS", "RestingECG", "ExerciseAngina", "ST_Slope"]

numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers = [
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features),
    ]
)

X_train_preprocessed = preprocessor.fit_transform(X_train)
X_val_preprocessed = preprocessor.fit_transform(X_val)

### Build The Neural Network

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

model = Sequential([
    Input(shape=(X_train_preprocessed.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

### Compile the Model

In [15]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

### Train The Model

In [16]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(patience=5, restore_best_weights=True)

model.fit(
    X_train_preprocessed, y_train,
    epochs=50,
    batch_size=16,
    validation_data=(X_val_preprocessed, y_val),
    callbacks=[early_stop]
)


Epoch 1/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.7009 - loss: 0.6025 - val_accuracy: 0.8207 - val_loss: 0.4361
Epoch 2/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8344 - loss: 0.3989 - val_accuracy: 0.8315 - val_loss: 0.3729
Epoch 3/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8588 - loss: 0.3390 - val_accuracy: 0.8641 - val_loss: 0.3490
Epoch 4/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.8723 - loss: 0.2924 - val_accuracy: 0.8533 - val_loss: 0.3560
Epoch 5/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8838 - loss: 0.3000 - val_accuracy: 0.8641 - val_loss: 0.3366
Epoch 6/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8809 - loss: 0.3058 - val_accuracy: 0.8696 - val_loss: 0.3329
Epoch 7/50
[1m46/46[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x21d2bf2d340>

### Evaluate the Model

In [17]:
loss, accuracy = model.evaluate(X_val_preprocessed, y_val)
print(f"Validation Accuracy: {accuracy:.2f}")

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.8610 - loss: 0.3089
Validation Accuracy: 0.89


### Save the Model and the Preproccessor

In [18]:
import joblib

joblib.dump(preprocessor, "preprocessor.pkl")
model.save("nn_model.keras")

In [6]:
import sklearn
print(sklearn.__version__)

1.6.1
