# Libraries

In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings
import seaborn as sns
warnings.filterwarnings("ignore")

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, f1_score, recall_score

In [11]:
df  = pd.read_csv('Heart_Attack_Risk_Levels_Dataset.csv')

In [12]:
df.head()

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result,Risk_Level,Recommendation
0,63,1,66,160,83,160.0,1.8,0.012,negative,Moderate,Monitor closely and consult doctor
1,20,1,94,98,46,296.0,6.75,1.06,positive,High,Immediate medical attention
2,56,1,64,160,77,270.0,1.99,0.003,negative,Moderate,Monitor closely and consult doctor
3,66,1,70,120,55,270.0,13.87,0.122,positive,High,Immediate medical attention
4,54,1,64,112,65,300.0,1.08,0.003,negative,Moderate,Monitor closely and consult doctor


In [13]:
df.shape

(1319, 11)

 Check Duplicates values 🎈

In [14]:
df.duplicated().sum()

0

 Check Null values 🎈

In [15]:
df.isnull().sum()

Age                         0
Gender                      0
Heart rate                  0
Systolic blood pressure     0
Diastolic blood pressure    0
Blood sugar                 0
CK-MB                       0
Troponin                    0
Result                      0
Risk_Level                  0
Recommendation              0
dtype: int64

 Data Summarize 🎈

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1319 entries, 0 to 1318
Data columns (total 11 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Age                       1319 non-null   int64  
 1   Gender                    1319 non-null   int64  
 2   Heart rate                1319 non-null   int64  
 3   Systolic blood pressure   1319 non-null   int64  
 4   Diastolic blood pressure  1319 non-null   int64  
 5   Blood sugar               1319 non-null   float64
 6   CK-MB                     1319 non-null   float64
 7   Troponin                  1319 non-null   float64
 8   Result                    1319 non-null   object 
 9   Risk_Level                1319 non-null   object 
 10  Recommendation            1319 non-null   object 
dtypes: float64(3), int64(5), object(3)
memory usage: 113.5+ KB


 Check Statistical Summary 🎈

In [17]:
df.describe()

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin
count,1319.0,1319.0,1319.0,1319.0,1319.0,1319.0,1319.0,1319.0
mean,56.193328,0.659591,78.336619,127.170584,72.269143,146.634344,15.274306,0.360942
std,13.638173,0.474027,51.63027,26.12272,14.033924,74.923045,46.327083,1.154568
min,14.0,0.0,20.0,42.0,38.0,35.0,0.321,0.001
25%,47.0,0.0,64.0,110.0,62.0,98.0,1.655,0.006
50%,58.0,1.0,74.0,124.0,72.0,116.0,2.85,0.014
75%,65.0,1.0,85.0,143.0,81.0,169.5,5.805,0.0855
max,103.0,1.0,1111.0,223.0,154.0,541.0,300.0,10.3


 Check Correlation 🎈

In [18]:
df.corr(numeric_only=True)

TypeError: corr() got an unexpected keyword argument 'numeric_only'

In [None]:
df.columns

# EDA

-  Exploratory Data Analysis (Data Visualization) 🎈

In [None]:
df.head()

 Check Distribution of Recommendation Column 🎈

In [None]:
df['Recommendation'].value_counts().plot(kind='pie', autopct='%.2f', shadow=True, explode=[0,0,0.1],
                                        colors=['hotpink', 'teal', 'steelblue'], figsize=(9,6))
plt.title('Distribution Of Recommendation', fontsize=15)
plt.show()

 Check Distribution of Risk Level Column 🎈

In [None]:
df['Risk_Level'].value_counts().plot(kind='pie', autopct='%.2f', shadow=True, explode=[0,0.1,0],
                                        colors=['gray', 'orange', 'tomato'], figsize=(9,6))
plt.title('Distribution Of Risk Level', fontsize=15)
plt.show()

 Check Distribution of Result Column 🎈

In [None]:
sns.set_style('darkgrid')
plt.figure(figsize=(11,6))
sns.countplot(x=df['Result'], palette='mako')
plt.title('Distribution Of Result', fontsize=15)
plt.xlabel('Result', fontsize=13)
plt.ylabel('Count', fontsize=13)
plt.show()

 Check Distribution of Result Column with Gender🎈

In [None]:

plt.figure(figsize=(11,6))
sns.countplot(x=df['Result'], palette='jet_r', hue=df['Gender'])
plt.title('Result Vs Gender', fontsize=15)
plt.xlabel('Result', fontsize=13)
plt.ylabel('Count', fontsize=13)
plt.show()

 Check Distribution Numerical Columns (histplot) 🎈

In [None]:
columns = ['Age', 'Gender', 'Heart rate', 'Systolic blood pressure',
       'Diastolic blood pressure', 'Blood sugar', 'CK-MB', 'Troponin']

plt.figure(figsize=(14,12))
for i, col in enumerate(columns):
    plt.subplot(4, 2, i + 1)
    sns.histplot(data=df, x=col, kde=True, color='brown')
    plt.title(f'Distribution of {col}', fontsize=15)
    plt.tight_layout(pad=3.0)

plt.show()

 Scatterplot of Age vs Heart Rate with Result 🎈

In [None]:
plt.figure(figsize=(11,6))
sns.scatterplot(x=df['Age'], y=df['Heart rate'], s=120, alpha=0.8, hue=df['Result'], palette='magma')
plt.title('Age Vs Heart Rate With Result', fontsize=15)
plt.xlabel('Age', fontsize=13)
plt.ylabel('Heart Rate', fontsize=13)
plt.show()

 Scatterplot of Systolic blood pressure vs Diastolic blood pressure with Result 🎈

In [None]:
plt.figure(figsize=(11,6))
sns.scatterplot(x=df['Systolic blood pressure'], y=df['Diastolic blood pressure'], s=120, alpha=0.8,
                hue=df['Result'], palette='summer')
plt.title('Systolic blood pressure Vs Diastolic blood pressure With Result', fontsize=15)
plt.xlabel('Systolic blood pressure', fontsize=13)
plt.ylabel('Diastolic blood pressure', fontsize=13)
plt.show()

 Check Correlation of Numerical Columns 🎈

In [None]:
plt.figure(figsize=(11,6))
sns.heatmap(df.corr(numeric_only=True), cmap='jet', annot=True)
plt.title('Correlation of Numerical Features', fontsize=15)
plt.show()

# Encoding Categorical Features

In [None]:
encoder = LabelEncoder()
for c in ['Result', 'Risk_Level', 'Recommendation']:
    df[c] = encoder.fit_transform(df[c]) # Result- 0: Negative, 1: Positive

 Data Split into Training and Testing Sets 🎈

In [None]:
X = df.drop('Result', axis=1)
y = df['Result']

X_train,X_test, y_train,y_test = train_test_split(X,y , test_size=0.2)
print(f"X_train Shape: {X_train.shape}")
print(f"Y_train Shape: {y_train.shape}")
print(f"X_test Shape: {X_test.shape}")
print(f"y_test Shape: {y_test.shape}")

# Models Building

 1- Logistic Regression 🎈    
 2- Random Forest Classifier 🎈       
 3- Gradient Boosting Classifier 🎈    
 4- Naive Bayes 🎈      
 5- Decision Tree Classifier🎈         

In [None]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest Classifier": RandomForestClassifier(),
    "Gradient Boosting Classifier": GradientBoostingClassifier(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree Classifier": DecisionTreeClassifier()
}

Models Training and Prediction, Calculate accuracy Score, and ------ 🎈         

In [None]:
summary = []
for names, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accuracy = accuracy_score(y_test, preds)
    p = precision_score(y_test, preds)
    r = recall_score(y_test, preds, average='macro')
    f = f1_score(y_test, preds,  average='macro')
    print(f"\n⭐Name: {names}")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision Score: {p}")

    plt.figure(figsize=(6, 4))
    cm  = confusion_matrix(y_test,preds)
    sns.heatmap(cm, annot=True, cmap='jet_r')
    plt.title(f"{names}-  Confution Metrics ")
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.show()

    summary.append({
        'Model Name': names,
        'Test Accuracy': accuracy,
        'Test Precision': p,
        'Test Recall Score': r,
        'Test F1 Score': f

    })
    summary_df = pd.DataFrame(summary)

# Show all model evaluation metrics in one table
print("\n⭐ Summary:")
summary_df

# Summary

 summary show in graph 🎈         

In [None]:
plt.figure(figsize=(12, 6))
sns.barplot(data=summary_df, x='Model Name', y='Test Accuracy', palette='crest')
plt.title("Model Accuracy Comparison")
plt.ylabel("Accuracy")
plt.xticks(rotation=45)
plt.show()