**Business Problem**

This dataset contains synthetic patient records generated for health risk assessment and predictive modeling. It includes demographic, lifestyle, and biometric health indicators commonly used in cardiovascular and general health research. Each record captures age, cholesterol levels, blood pressure, smoking habits, diabetes status, and heart attack history—key factors influencing cardiovascular diseases.


**Features :**

age: Patient's age (years)

sex: Biological sex (0 = Female, 1 = Male)

total_cholesterol: Total cholesterol level (mg/dL)

ldl: Low-Density Lipoprotein (LDL) cholesterol (mg/dL)

hdl: High-Density Lipoprotein (HDL) cholesterol (mg/dL)

systolic_bp: Systolic blood pressure (mmHg)

diastolic_bp: Diastolic blood pressure (mmHg)

smoking: Smoking status (0 = Non-Smoker, 1 = Smoker)

diabetes: Diabetes status (0 = No, 1 = Yes)

heart_attack: History of heart attack (0 = No, 1 = Yes)

In [195]:
import tensorflow as tf
print(tf.__version__)

2.19.0


In [196]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [197]:
#from google.colab import drive
#drive.mount('/content/sdrive')

In [198]:
df=pd.read_csv("Heart_Modelling.csv")

In [199]:
df.head()

Unnamed: 0,age,sex,total_cholesterol,ldl,hdl,systolic_bp,diastolic_bp,smoking,diabetes,heart_attack
0,57,1,229.463642,175.879129,39.225687,124.070127,91.37878,0,0,0
1,58,1,186.46412,128.984916,34.950968,95.492552,64.35504,1,0,0
2,37,1,251.300719,152.347592,45.913288,99.519335,64.953147,0,1,0
3,55,1,192.058908,116.803684,67.208925,122.460002,73.821382,0,0,0
4,53,1,151.203448,107.017396,60.693838,123.022257,81.121946,0,1,0


In [200]:
df["heart_attack"].value_counts()

Unnamed: 0_level_0,count
heart_attack,Unnamed: 1_level_1
0,896
1,104


In [201]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   age                1000 non-null   int64  
 1   sex                1000 non-null   int64  
 2   total_cholesterol  1000 non-null   float64
 3   ldl                1000 non-null   float64
 4   hdl                1000 non-null   float64
 5   systolic_bp        1000 non-null   float64
 6   diastolic_bp       1000 non-null   float64
 7   smoking            1000 non-null   int64  
 8   diabetes           1000 non-null   int64  
 9   heart_attack       1000 non-null   int64  
dtypes: float64(5), int64(5)
memory usage: 78.3 KB


In [202]:
df.isnull().sum()

Unnamed: 0,0
age,0
sex,0
total_cholesterol,0
ldl,0
hdl,0
systolic_bp,0
diastolic_bp,0
smoking,0
diabetes,0
heart_attack,0


In [203]:
df["sex"].value_counts()

Unnamed: 0_level_0,count
sex,Unnamed: 1_level_1
1,527
0,473


In [204]:
df["smoking"].value_counts()

Unnamed: 0_level_0,count
smoking,Unnamed: 1_level_1
0,798
1,202


In [205]:
df["diabetes"].value_counts()

Unnamed: 0_level_0,count
diabetes,Unnamed: 1_level_1
0,910
1,90


In [206]:
df["heart_attack"].value_counts()

Unnamed: 0_level_0,count
heart_attack,Unnamed: 1_level_1
0,896
1,104


**x&y**

In [207]:
x=df.drop("heart_attack",axis=1)
y=df["heart_attack"]

In [208]:
from imblearn.over_sampling import SMOTE
SMOTE = SMOTE()
x_new,y_new = SMOTE.fit_resample(x,y)

print("After SMOTE dataset shape")
print("-----------------------------")

y_new.value_counts()

After SMOTE dataset shape
-----------------------------


Unnamed: 0_level_0,count
heart_attack,Unnamed: 1_level_1
0,896
1,896


In [209]:
x_new

Unnamed: 0,age,sex,total_cholesterol,ldl,hdl,systolic_bp,diastolic_bp,smoking,diabetes
0,57,1,229.463642,175.879129,39.225687,124.070127,91.378780,0,0
1,58,1,186.464120,128.984916,34.950968,95.492552,64.355040,1,0
2,37,1,251.300719,152.347592,45.913288,99.519335,64.953147,0,1
3,55,1,192.058908,116.803684,67.208925,122.460002,73.821382,0,0
4,53,1,151.203448,107.017396,60.693838,123.022257,81.121946,0,1
...,...,...,...,...,...,...,...,...,...
1787,50,1,161.159522,116.312803,56.857156,134.843575,89.948679,0,0
1788,40,1,261.637683,179.709934,35.059516,153.091340,103.325088,0,0
1789,47,1,230.699895,132.487525,52.390381,122.041155,78.831775,0,0
1790,66,0,267.663967,152.998540,46.847999,131.853273,79.219894,0,0


**train-test-split**

In [210]:
from sklearn.model_selection import train_test_split

random_state_number = np.random.randint(0, 100)
x_new_train,x_new_test,y_new_train,y_new_test = train_test_split(x_new,y_new,test_size=0.2,random_state=random_state_number)
print("Random State Number:", random_state_number)

Random State Number: 68


In [211]:
#feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_new_train = sc.fit_transform(x_new_train)
x_new_test = sc.transform(x_new_test)

In [212]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU,PReLU,ELU,ReLU
from tensorflow.keras.layers import Dropout

**Modelling**

In [213]:
classifier = Sequential()

**Initializing the Artificial Neural Network**

from keras.models import Sequential
ann = Sequential()

**Adding the input layer and the first hidden layer**

In [214]:
classifier.add(Dense(units=32,activation='relu',input_dim=9))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


from keras.layers import Dense
ann.add(Dense(input_dim=9,units=6,kernel_initializer="uniform",activation="relu"))


**Adding the second hidden layer**

In [215]:
classifier.add(Dense(units=32,activation="relu"))
classifier.add(Dropout(0.2))

**Adding third hidden layer**

In [216]:
classifier.add(Dense(units=32,activation="relu"))
classifier.add(Dropout(0.2))


**Adding the output layer**

In [217]:
classifier.add(Dense(units=1,activation="sigmoid"))

**Compiling the ANN**

In [218]:
classifier.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

**Training the ANN on the Training set**

In [219]:
import tensorflow as tf
early_stopping=tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=0,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0,
)

In [220]:
classifier.fit(x_new_train,y_new_train,validation_split=0.33,epochs=100,batch_size=32,callbacks=early_stopping)

Epoch 1/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.5433 - loss: 0.6918 - val_accuracy: 0.6385 - val_loss: 0.6537
Epoch 2/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6762 - loss: 0.6393 - val_accuracy: 0.6660 - val_loss: 0.6191
Epoch 3/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7016 - loss: 0.5968 - val_accuracy: 0.6829 - val_loss: 0.6018
Epoch 4/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6933 - loss: 0.5882 - val_accuracy: 0.6808 - val_loss: 0.5936
Epoch 5/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7007 - loss: 0.5628 - val_accuracy: 0.6913 - val_loss: 0.5905
Epoch 6/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7131 - loss: 0.5556 - val_accuracy: 0.6998 - val_loss: 0.5842
Epoch 7/100
[1m30/30[0m [32m━━

<keras.src.callbacks.history.History at 0x7d6fc8551d60>

**Predictions & Evaluation the model**

Predictions

In [221]:
y_pred = classifier.predict(x_new_test)
y_pred = (y_pred > 0.5)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


**Evaluation**

In [222]:
from sklearn.metrics import confusion_matrix,accuracy_score

print("Test Accuracy:",accuracy_score(y_new_test,y_pred))
confusion_matrix(y_new_test,y_pred)

Test Accuracy: 0.7298050139275766


array([[114,  54],
       [ 43, 148]])

In [223]:
pip install keras-tuner --upgrade



In [224]:
import keras_tuner
import keras

In [225]:
def build_model(hp):
  model = keras.Sequential()
  model.add(keras.layers.Dense(units=16,activation='relu',input_dim=9))
  model.add(keras.layers.Dense(units=16,activation="relu"))
  model.add(Dropout(0.2))
  #model.add(keras.layers.Dense(units=16,activation="relu"))
  #model.add(Dropout(0.2))
  model.add(keras.layers.Dense(units=1,activation="sigmoid"))
  model.compile(optimizer=hp.Choice("optimizer",values=["adam","sgd","rmsprop","adadelta"]),loss="binary_crossentropy",metrics=["accuracy"])
  return model

In [226]:
tuner = keras_tuner.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [227]:
tuner = keras_tuner.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    directory="to1dir")

tuner.search(x_new_train, y_new_train, epochs=5, validation_data=(x_new_test, y_new_test))
best_model=tuner.get_best_models()[0]

Reloading Tuner from to1dir/untitled_project/tuner0.json


In [228]:
tuner.get_best_hyperparameters()[0].values

{'optimizer': 'sgd'}

In [229]:
best_model.summary()

In [230]:
best_model.fit(x_new_train, y_new_train, epochs=100,initial_epoch=6, batch_size=32, validation_data=(x_new_test, y_new_test))

Epoch 7/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 65ms/step - accuracy: 0.6705 - loss: 0.6302 - val_accuracy: 0.6992 - val_loss: 0.6094
Epoch 8/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.6752 - loss: 0.6103 - val_accuracy: 0.6936 - val_loss: 0.6014
Epoch 9/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.6508 - loss: 0.6216 - val_accuracy: 0.7019 - val_loss: 0.5937
Epoch 10/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6754 - loss: 0.6018 - val_accuracy: 0.7075 - val_loss: 0.5871
Epoch 11/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.6823 - loss: 0.5991 - val_accuracy: 0.7075 - val_loss: 0.5811
Epoch 12/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7005 - loss: 0.5939 - val_accuracy: 0.7075 - val_loss: 0.5755
Epoch 13/100
[1m45/45[0m 

<keras.src.callbacks.history.History at 0x7d6fc86e5dc0>

**Evaluation**

In [231]:
y_pred = best_model.predict(x_new_test)
y_pred = (y_pred > 0.5)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


In [232]:
from sklearn.metrics import confusion_matrix,accuracy_score

print("Test Accuracy:",accuracy_score(y_new_test,y_pred))
confusion_matrix(y_new_test,y_pred)

Test Accuracy: 0.7688022284122563


array([[126,  42],
       [ 41, 150]])