In [None]:
from IPython.display import display, HTML, Javascript
from IPython.core.interactiveshell import InteractiveShell

# Function to show a spinner below the output
def show_spinner_below():
    display(HTML('''
        <div id="cell-status" style="margin-top: 10px; font-size: 20px; color: blue; text-align: center;">
            🔄 Running...
        </div>
    '''))

# Function to show success tick below the output
def success_indicator_below():
    display(Javascript('''
        var status = document.getElementById("cell-status");
        if (status) {
            status.innerHTML = "✅ Completed";
            status.style.color = "green";
            setTimeout(function() { status.remove(); }, 2000);  // Remove after 2 seconds
        }
    '''))

# Hooks with arguments to avoid TypeError
def pre_run_cell_hook(info):  # Accepts execution info from Jupyter
    show_spinner_below()

def post_run_cell_hook(result):  # Handles execution result
    if result.success:
        success_indicator_below()

# Register the hooks correctly
shell = InteractiveShell.instance()
shell.events.register('pre_run_cell', pre_run_cell_hook)
shell.events.register('post_run_cell', post_run_cell_hook)




In [1]:
import pandas as pd

In [2]:
dataset=pd.read_csv(r"heart_disease_health_indicators_BRFSS2015.csv")

In [3]:
dataset.head()

Unnamed: 0,HeartDiseaseorAttack,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,Diabetes,PhysActivity,Fruits,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,0,1,1,1,40,1,0,0,0,0,...,1,0,5,18,15,1,0,9,4,3
1,0,0,0,0,25,1,0,0,1,0,...,0,1,3,0,0,0,0,7,6,1
2,0,1,1,1,28,0,0,0,0,1,...,1,1,5,30,30,1,0,9,4,8
3,0,1,0,1,27,0,0,0,1,1,...,1,0,2,0,0,0,0,11,3,6
4,0,1,1,1,24,0,0,0,1,1,...,1,0,2,3,0,0,0,11,5,4


In [5]:
df=dataset
df.shape

(5020, 22)

In [6]:
df.isnull().sum()

HeartDiseaseorAttack    0
HighBP                  0
HighChol                0
CholCheck               0
BMI                     0
Smoker                  0
Stroke                  0
Diabetes                0
PhysActivity            0
Fruits                  0
Veggies                 0
HvyAlcoholConsump       0
AnyHealthcare           0
NoDocbcCost             0
GenHlth                 0
MentHlth                0
PhysHlth                0
DiffWalk                0
Sex                     0
Age                     0
Education               0
Income                  0
dtype: int64

In [7]:
df['HeartDiseaseorAttack'].value_counts()  #to check how many 0 or 1 to balance data

HeartDiseaseorAttack
0    4494
1     526
Name: count, dtype: int64

In [8]:
from sklearn.utils import resample

# seperate majority and minority classes
df_majority = df[df['HeartDiseaseorAttack']==0]
df_minority = df[df['HeartDiseaseorAttack']==1]

# upsample minority class
df_minority_upsampled = resample(df_minority,
                                 replace = True,  # sample with replacement
                                 n_samples = len(df_majority), # to match majority class
                                 random_state = 42)  # Reproducible result

# combine majority class with upsamle minority class   
df_balanced =pd.concat([df_majority,df_minority_upsampled])


# Shuffle the dataset
df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
                                 

In [9]:
df_balanced['HeartDiseaseorAttack'].value_counts()  #to check how many 0 or 1 to balance data

HeartDiseaseorAttack
1    4494
0    4494
Name: count, dtype: int64

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# seperate features (x) and target variable (y)
x = df_balanced.drop(columns=['HeartDiseaseorAttack'])
y = df_balanced['HeartDiseaseorAttack']

# split the data into training and testing sets(80% train ,20%)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)


In [11]:
# initialize standarScaler
scaler = StandardScaler()

# fit scaler to training data and transform both training and testing data
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled =  scaler.transform(x_test)
x_train_scaled

array([[ 0.75762325,  0.81886448,  0.17472516, ...,  0.47486409,
        -1.70377462, -1.39655739],
       [ 0.75762325, -1.2212033 ,  0.17472516, ...,  0.47486409,
         0.20806924,  1.22050902],
       [-1.31991725,  0.81886448,  0.17472516, ..., -2.01548807,
         0.20806924, -1.83273513],
       ...,
       [ 0.75762325,  0.81886448,  0.17472516, ..., -0.2366651 ,
        -1.70377462, -1.39655739],
       [-1.31991725, -1.2212033 , -5.72327429, ...,  0.47486409,
         1.16399117,  1.22050902],
       [ 0.75762325, -1.2212033 ,  0.17472516, ..., -0.2366651 ,
        -0.74785269, -0.08802419]], shape=(7190, 21))

In [12]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier()

rf_model.fit(x_train_scaled,y_train)

y_pred = rf_model.predict(x_test_scaled)


In [13]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [14]:
accuracy_score(y_test,y_pred)

0.9838709677419355

In [15]:
confusion_matrix(y_test,y_pred)

array([[892,  29],
       [  0, 877]])

In [16]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      0.97      0.98       921
           1       0.97      1.00      0.98       877

    accuracy                           0.98      1798
   macro avg       0.98      0.98      0.98      1798
weighted avg       0.98      0.98      0.98      1798



In [18]:
from sklearn.ensemble import RandomForestClassifier , AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier

In [19]:
classifiers = [
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    LogisticRegression(),
    SVC(),
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    GaussianNB(),
    XGBClassifier()
]    

#create a directory to store result
result = {}

# Create an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['Model', 'Accuracy', 'F1-Score', 'Precision', 'Recall'])

# Train and evaluate each classifier
for clf in classifiers:
    clf_name = clf.__class__.__name__
    clf.fit(x_train_scaled, y_train)
    y_pred = clf.predict(x_test_scaled)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    f1_score = report['weighted avg']['f1-score']
    precision = report['weighted avg']['precision']
    recall = report['weighted avg']['recall']

    # Append results to DataFrame
    results_df = pd.concat([results_df, pd.DataFrame([{
        'Model': clf_name,
        'Accuracy': accuracy,
        'F1-Score': f1_score,
        'Precision': precision,
        'Recall': recall
    }])], ignore_index=True)

    # Print metrics
    print(f"{clf_name} Accuracy: {accuracy}")
    print(f"Classification Report for {clf_name}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix for {clf_name}:")
    print(confusion_matrix(y_test, y_pred))
    print("="*50)



  results_df = pd.concat([results_df, pd.DataFrame([{


RandomForestClassifier Accuracy: 0.9827586206896551
Classification Report for RandomForestClassifier:
              precision    recall  f1-score   support

           0       1.00      0.97      0.98       921
           1       0.97      1.00      0.98       877

    accuracy                           0.98      1798
   macro avg       0.98      0.98      0.98      1798
weighted avg       0.98      0.98      0.98      1798

Confusion Matrix for RandomForestClassifier:
[[890  31]
 [  0 877]]
AdaBoostClassifier Accuracy: 0.7385984427141268
Classification Report for AdaBoostClassifier:
              precision    recall  f1-score   support

           0       0.75      0.73      0.74       921
           1       0.73      0.74      0.74       877

    accuracy                           0.74      1798
   macro avg       0.74      0.74      0.74      1798
weighted avg       0.74      0.74      0.74      1798

Confusion Matrix for AdaBoostClassifier:
[[676 245]
 [225 652]]
GradientBoostingCl

In [20]:

# Display the DataFrame
print(results_df)

                        Model  Accuracy  F1-Score  Precision    Recall
0      RandomForestClassifier  0.982759  0.982761   0.983347  0.982759
1          AdaBoostClassifier  0.738598  0.738637   0.738859  0.738598
2  GradientBoostingClassifier  0.797553  0.797202   0.801818  0.797553
3          LogisticRegression  0.760289  0.760271   0.761383  0.760289
4                         SVC  0.847052  0.846491   0.855328  0.847052
5        KNeighborsClassifier  0.867631  0.866314   0.887725  0.867631
6      DecisionTreeClassifier  0.947164  0.947084   0.952328  0.947164
7                  GaussianNB  0.725806  0.724835   0.727294  0.725806
8               XGBClassifier  0.943826  0.943761   0.948139  0.943826


In [21]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize results DataFrame
results_df = pd.DataFrame()

# Variable to store the best model's accuracy
best_accuracy = 0
best_model_name = ""

# Train and evaluate each classifier
for clf in classifiers:
    clf_name = clf.__class__.__name__
    clf.fit(x_train_scaled, y_train)
    y_pred = clf.predict(x_test_scaled)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    f1_score = report['weighted avg']['f1-score']
    precision = report['weighted avg']['precision']
    recall = report['weighted avg']['recall']

    # Append results to DataFrame
    results_df = pd.concat([results_df, pd.DataFrame([{
        'Model': clf_name,
        'Accuracy': accuracy,
        'F1-Score': f1_score,
        'Precision': precision,
        'Recall': recall
    }])], ignore_index=True)

    # Update best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model_name = clf_name

    # Print metrics
    print(f"{clf_name} Accuracy: {accuracy}")
    print(f"Classification Report for {clf_name}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix for {clf_name}:")
    print(confusion_matrix(y_test, y_pred))
    print("="*50)

# ✅ Save the best model's accuracy to a file
with open("accuracy.txt", "w") as f:
    f.write(f"Best Model: {best_model_name}\n")
    f.write(f"Model Accuracy: {best_accuracy * 100:.2f}%")


RandomForestClassifier Accuracy: 0.9827586206896551
Classification Report for RandomForestClassifier:
              precision    recall  f1-score   support

           0       1.00      0.97      0.98       921
           1       0.97      1.00      0.98       877

    accuracy                           0.98      1798
   macro avg       0.98      0.98      0.98      1798
weighted avg       0.98      0.98      0.98      1798

Confusion Matrix for RandomForestClassifier:
[[890  31]
 [  0 877]]
AdaBoostClassifier Accuracy: 0.7385984427141268
Classification Report for AdaBoostClassifier:
              precision    recall  f1-score   support

           0       0.75      0.73      0.74       921
           1       0.73      0.74      0.74       877

    accuracy                           0.74      1798
   macro avg       0.74      0.74      0.74      1798
weighted avg       0.74      0.74      0.74      1798

Confusion Matrix for AdaBoostClassifier:
[[676 245]
 [225 652]]
GradientBoostingCl

In [22]:
# Test basic file writing
with open("test_accuracy.txt", "w") as f:
    f.write("This is a test to check file writing.")

# Verify if the file was created
import os
print("File exists:", os.path.isfile("test_accuracy.txt"))


File exists: True


In [23]:
import os

# Absolute file path check
file_path = os.path.abspath("test_accuracy.txt")
print(f"File saved at: {file_path}")


File saved at: C:\Users\ROHIT VERMA\Desktop\heart_desease_app_with_flask\test_accuracy.txt


In [24]:
import pickle
# Save the model and scaler
with open("model.pkl", "wb") as f:
    pickle.dump(rf_model, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("RandomForest model and scaler saved successfully!")

RandomForest model and scaler saved successfully!
