In [None]:
# Heart Disease Risk Prediction

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('Heart_Disease_Dataset.csv')
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [None]:
features = ['Age', 'RestingBP', 'Cholesterol', 'FastingBS']
target = 'HeartDisease'

X = df[features]
y = df[target]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
models = {
    'Logistic Regression': LogisticRegression(),
    'KNN': KNeighborsClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    print(f"\n{name} Results:")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))


Logistic Regression Results:
[[43 34]
 [31 76]]
              precision    recall  f1-score   support

           0       0.58      0.56      0.57        77
           1       0.69      0.71      0.70       107

    accuracy                           0.65       184
   macro avg       0.64      0.63      0.63       184
weighted avg       0.64      0.65      0.65       184


KNN Results:
[[47 30]
 [37 70]]
              precision    recall  f1-score   support

           0       0.56      0.61      0.58        77
           1       0.70      0.65      0.68       107

    accuracy                           0.64       184
   macro avg       0.63      0.63      0.63       184
weighted avg       0.64      0.64      0.64       184


XGBoost Results:
[[41 36]
 [39 68]]
              precision    recall  f1-score   support

           0       0.51      0.53      0.52        77
           1       0.65      0.64      0.64       107

    accuracy                           0.59       184
   macro 

Parameters: { "use_label_encoder" } are not used.



In [None]:
def predict_risk(model, age, bp, chol, fastingbs):
    input_data = np.array([[age, bp, chol, fastingbs]])
    input_scaled = scaler.transform(input_data)
    prob = model.predict_proba(input_scaled)[0][1]
    if prob < 0.3:
        risk = 'Low'
    elif prob < 0.7:
        risk = 'Medium'
    else:
        risk = 'High'
    return f"Risk of Heart Disease: {risk} ({prob:.2f})"

In [None]:
# Example usage
example_model = models['XGBoost']
print(predict_risk(example_model, age=54, bp=135, chol=240, fastingbs=1))


Risk of Heart Disease: Low (0.11)


