## Heart attack risk prediction



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
import pandas as pd
import seaborn as sns
import pickle

In [3]:
df = pd.read_csv('/content/heart_attack_prediction_dataset.csv')

In [4]:
df.head()

Unnamed: 0,Heart Rate,Oxygen Level,Body Temperature,Heart Attack Risk
0,101.865749,96.476418,98.234058,Low Risk
1,67.382193,98.229872,99.267701,Low Risk
2,62.29001,100.313704,98.14591,Low Risk
3,76.288138,95.066348,98.305198,Low Risk
4,74.003683,97.772355,98.13616,Low Risk


In [5]:
df = df.dropna()

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Heart Rate         20000 non-null  float64
 1   Oxygen Level       20000 non-null  float64
 2   Body Temperature   20000 non-null  float64
 3   Heart Attack Risk  20000 non-null  object 
dtypes: float64(3), object(1)
memory usage: 625.1+ KB


In [7]:
df.head()

Unnamed: 0,Heart Rate,Oxygen Level,Body Temperature,Heart Attack Risk
0,101.865749,96.476418,98.234058,Low Risk
1,67.382193,98.229872,99.267701,Low Risk
2,62.29001,100.313704,98.14591,Low Risk
3,76.288138,95.066348,98.305198,Low Risk
4,74.003683,97.772355,98.13616,Low Risk


In [8]:
df.describe()

Unnamed: 0,Heart Rate,Oxygen Level,Body Temperature
count,20000.0,20000.0,20000.0
mean,82.55699,92.961272,99.553702
std,15.960827,6.288189,1.23925
min,31.436247,69.724567,96.204609
25%,70.002081,87.97122,98.564898
50%,82.593428,95.123447,99.23066
75%,95.131451,98.084141,100.509554
max,134.262377,106.458123,104.036532


In [9]:
features = ['Heart Rate', 'Body Temperature', 'Oxygen Level']

In [10]:
target_column = 'Heart Attack Risk'
X = df[features]
y = df[target_column]

In [11]:
print(y.value_counts())

Heart Attack Risk
Low Risk     10000
High Risk    10000
Name: count, dtype: int64


In [12]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [13]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [14]:
model = xgb.XGBClassifier(
    n_estimators=100,
    learning_rate=0.01,
    max_depth=3,
    subsample=0.5,
    colsample_bytree=0.5,
    random_state=42
)

model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=True)

[0]	validation_0-logloss:0.68540
[1]	validation_0-logloss:0.67866
[2]	validation_0-logloss:0.67107
[3]	validation_0-logloss:0.66361
[4]	validation_0-logloss:0.65638
[5]	validation_0-logloss:0.64978
[6]	validation_0-logloss:0.64337
[7]	validation_0-logloss:0.63708
[8]	validation_0-logloss:0.63104
[9]	validation_0-logloss:0.62497
[10]	validation_0-logloss:0.61832
[11]	validation_0-logloss:0.61234
[12]	validation_0-logloss:0.60660
[13]	validation_0-logloss:0.60086
[14]	validation_0-logloss:0.59528
[15]	validation_0-logloss:0.58968
[16]	validation_0-logloss:0.58421
[17]	validation_0-logloss:0.57887
[18]	validation_0-logloss:0.57361
[19]	validation_0-logloss:0.56840
[20]	validation_0-logloss:0.56333
[21]	validation_0-logloss:0.55833
[22]	validation_0-logloss:0.55329
[23]	validation_0-logloss:0.54763
[24]	validation_0-logloss:0.54284
[25]	validation_0-logloss:0.53805
[26]	validation_0-logloss:0.53262
[27]	validation_0-logloss:0.52799
[28]	validation_0-logloss:0.52348
[29]	validation_0-loglos

In [15]:
cv_scores = cross_val_score(model, X_train, y_train, cv=10, scoring='accuracy')

In [16]:
print("Cross-validation scores:", cv_scores)
print("\nMean accuracy:", np.mean(cv_scores))
print("\nStandard deviation:", np.std(cv_scores))

Cross-validation scores: [0.98416667 0.985      0.98083333 0.99       0.98666667 0.98666667
 0.98666667 0.98583333 0.9725     0.98916667]

Mean accuracy: 0.98475

Standard deviation: 0.004744148442496753


In [17]:
y_pred = model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)
print("\nValidation Confusion Matrix:\n", confusion_matrix(y_val, y_pred))
print("\nValidation Classification Report:\n", classification_report(y_val, y_pred))

Validation Accuracy: 0.9845

Validation Confusion Matrix:
 [[1990   46]
 [  16 1948]]

Validation Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.98      2036
           1       0.98      0.99      0.98      1964

    accuracy                           0.98      4000
   macro avg       0.98      0.98      0.98      4000
weighted avg       0.98      0.98      0.98      4000



In [18]:
y_test_pred = model.predict(X_test)

test_accuracy = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", test_accuracy)
print("\nTest Classification Report:\n", classification_report(y_test, y_test_pred))
print("\nTest Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

Test Accuracy: 0.98575

Test Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.99      1977
           1       0.98      0.99      0.99      2023

    accuracy                           0.99      4000
   macro avg       0.99      0.99      0.99      4000
weighted avg       0.99      0.99      0.99      4000


Test Confusion Matrix:
 [[1931   46]
 [  11 2012]]


In [19]:
pickle.dump(model, open('heart_attack_prediction_model.pkl', 'wb'))