In [None]:
# import qrcode
# img = qrcode.make('https://colab.research.google.com/drive/15M9A1ScE3xpUZJQeYrZVC6E2qJ0W_Ise?usp=sharing')
# img.save('myqr.png')


In [None]:
# pip install qrcode

In [None]:
# # Project Objective
# Heart Disease Prediction using Machine Learning
# Heart disease prevention is critical, and data-driven prediction systems can significantly aid in early diagnosis and treatment. Machine Learning offers accurate prediction capabilities, enhancing healthcare outcomes.
# In this project, I analyzed a heart disease dataset with appropriate preprocessing. Multiple classification algorithms were implemented in Python using Scikit-learn and Keras to predict the presence of heart disease.

# **Algorithms Used**:
# - Logistic Regression
# - Naive Bayes
# - Support Vector Machine (Linear)
# - K-Nearest Neighbors
# - Decision Tree
# - Random Forest
# - XGBoost
# - Artificial Neural Network (1 Hidden Layer, Keras)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')
print('All Modules Loaded Successfully!')

In [None]:
url = '''https://github.com/ankitmisk/Heart_Disease_Prediction_ML_Model/blob/main/heart.csv?raw=true'''
df = pd.read_csv(url)
print('Done')

In [None]:
#  Step 3 Understanding Data using EDA

In [None]:
# 3.1 Data head
df.head()

In [None]:
# 3.2 data tail
df.tail()

In [None]:
# 3.3 data Shape

df.shape

In [None]:
# 3.4 data information
df.info()

In [None]:
# 3.5 data columns
df.columns

###### 3.6 Column Desc
- **age**:			``age``
- **sex**:			``1: male, 0: female``
- **cp**:			``chest pain type, 1: typical angina, 2: atypical angina, 3: non-anginal pain, 4: asymptomatic``
- **trestbps**:			``resting blood pressure``
- **chol**:			`` serum cholestoral in mg/dl``
- **fbs**:			``fasting blood sugar > 120 mg/dl``
- **restecg**:			``resting electrocardiographic results (values 0,1,2)``
- **thalach**:			 ``maximum heart rate achieved``
- **exang**:			``exercise induced angina``
- **oldpeak**:			``oldpeak = ST depression induced by exercise relative to rest``
- **slope**:			``the slope of the peak exercise ST segment``
- **ca**:			``number of major vessels (0-3) colored by flourosopy``
- **thal**:			``thal: 3 = normal; 6 = fixed defect; 7 = reversable defect``

In [None]:
# Domain Knowledge is required for this level of project

In [None]:
df.isna().sum()
# No null values found
# in case of null:remove, fill, replace, drop

In [None]:
df.describe().round(2)

In [None]:
for i in df:
    print(f'Column_{i}',df[i].unique())
    print('==================',end = '\n\n')

In [None]:
r = 5
c = 3
plt.figure(figsize=(17,30))
for i,j in enumerate(df):
    plt.subplot(r,c,i+1)
    plt.title(f'{j} Distribution Analysis',color = 'Blue')
    sns.histplot(df[j],kde = True,color = 'r')

plt.savefig('All Numerical Features Analysis.jpeg',dpi = 500)
plt.show()


In [None]:
# Target col not that much imbalance, we can proceed, if imbalance found we need to balance using sampling

In [None]:
plt.figure(figsize=(10,12))
sns.heatmap(df.corr().round(2),cmap='mako',annot = True)
plt.show()

In [None]:
# "thalach" refers to the maximum heart rate achieved during exercise, specifically measured in beats per minute (bpm)

In [None]:
# Fasting blood sugar (FBS) levels, when elevated, can indicate an increased risk of heart disease

In [None]:
# "slope" usually refers to the ST/HR slope, which is a measurement derived from an exercise stress test used to assess the severity of coronary artery disease

In [None]:
# 3.12: Target vs Features
df.corr()['target'].sort_values(ascending = False).reset_index()


In [None]:
r = 3
c = 3
x_col = ['sex','cp','restecg','fbs','exang','slope', 'ca', 'thal']
plt.figure(figsize=(15,14))
for i in range(r*c):
    try:
        plt.subplot(r,c,i+1)
        plt.title(f'Feature {x_col[i]} vs Gender vs Target')
        sns.barplot(data = df, x = x_col[i], y = 'target', palette=sns.color_palette('mako',2))
        plt.legend(['0-Female','1-Male'])
    except:
        ...


plt.show()


In [None]:
# 4.1 Divide data into Target and Features

X = df.iloc[:,:-1]
y = df['target']


In [None]:
X.shape, y.shape

In [None]:
# 4.2 Train - test Split

In [None]:
# 4.2 Train - test Split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.linear_model import LogisticRegression

model_lr = LogisticRegression()

model_lr.fit(X_train,y_train)

In [None]:
sns.scatterplot(data = df, x = 'cp', y = 'target', hue = 'target')
plt.show()

In [None]:
y_pred = model_lr.predict(X_test)
# y_pred

In [None]:
# 5.1.2: Import model evaluation metrices
# It will be called only when problem is of Classification
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [None]:
# 5.1.3: Import Confusion Matrix to Evaluate classificationModel
from sklearn.metrics import confusion_matrix

In [None]:
# Error: comparison: if Score tie
# Type1, Type2: Type 1 Error danger ,

In [None]:
cm = confusion_matrix(y_test,y_pred)

In [None]:
plt.title('Actual vs Predicted')
sns.heatmap(cm,annot = True,cmap='mako')
plt.xlabel('Predicted Value')
plt.ylabel('Actual Value')
plt.show()

In [None]:
TN,FP,FN,TP = cm.ravel()

In [None]:
precision_score(y_test,y_pred)

In [None]:
acs = (TN + TP)/cm.sum()
print(acs)

In [None]:
print(accuracy_score(y_test,y_pred))

In [None]:
rs = recall_score(y_test,y_pred)
print('Recall Score is: ',rs)

In [None]:
f1_sc = f1_score(y_test,y_pred)
f1_sc

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
#5.1.4: Checking Model Score
# Model Score same as accuracy Score
model_lr.score(X_test,y_test)

In [None]:
from sklearn.naive_bayes import GaussianNB
# GaussianNB only be use in case of binary classification
# MultinomialNB: only be use in case of Multi class classification


model_nv = GaussianNB()

model_nv.fit(X_train,y_train)  # Train part/learning


In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred))
print('Precision Score: ',precision_score(y_test,y_pred))
print('Recall Score: ',recall_score(y_test,y_pred))
print('F1 Score: ',f1_score(y_test,y_pred))


In [None]:
cm = confusion_matrix(y_test,y_pred)

plt.title('Actual vs Predicted using Naive Bayes ML Model')
sns.heatmap(cm,annot = True,cmap='mako')
plt.xlabel('Predicted Value')
plt.ylabel('Actual Value')
plt.show()

In [None]:
# Model1>Y_pred>Scores>confusionMatrix>classification_report
# Model2>Y_pred>Scores>confusionMatrix>classification_report
# Model3>Y_pred>Scores>confusionMatrix>classification_report
# Model4>Y_pred>Scores>confusionMatrix>classification_report
# Model5>Y_pred>Scores>confusionMatrix>classification_report
# Model6>Y_pred>Scores>confusionMatrix>classification_report
# best Model Save: Error(low), Accuracy Score

# SVM

In [None]:
# SVM Works on Support vectos of nearest data points of different classes
# It will create line, plane or Hyperplane
# we can use this for both regression problems and classification(Binary/Multiclass)

In [None]:
from sklearn.svm import SVC

model_svc = SVC()

model_svc.fit(X_train,y_train)

In [None]:
y_pred = model_svc.predict(X_test)

In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred))
print('Precision Score: ',precision_score(y_test,y_pred))
print('Recall Score: ',recall_score(y_test,y_pred))
print('F1 Score: ',f1_score(y_test,y_pred))


In [None]:
print(classification_report(y_test,y_pred))

In [None]:
cm = confusion_matrix(y_test,y_pred)

plt.title('Actual vs Predicted using SVC ML Model')
sns.heatmap(cm,annot = True,cmap='mako')
plt.xlabel('Predicted Value')
plt.ylabel('Actual Value')
plt.show()

In [None]:
svm_score = accuracy_score(y_test,y_pred)

In [None]:
model_svc.score(X_test,y_test)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
model_dt = DecisionTreeClassifier()

model_dt.fit(X_train,y_train)

In [None]:
# 5.4.2 Step Predict y_pred

y_pred = model_dt.predict(X_test)

In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred))
print('Precision Score: ',precision_score(y_test,y_pred))
print('Recall Score: ',recall_score(y_test,y_pred))
print('F1 Score: ',f1_score(y_test,y_pred))

In [None]:
from sklearn.tree import plot_tree

In [None]:
plt.figure(figsize=(15, 10)) # Adjust figure size for better readability
plot_tree(model_dt,
          feature_names=X.columns,
          class_names=['No-0','Yes-1'],
          filled=True, # Color nodes based on class
          rounded=True, # Round node corners
          fontsize=5) # Adjust font size

plt.title("Decision Tree Visualization")
plt.show() # Display the plot

# Step 5.5: Ensemble ML Model
- `Bagging Technique`: Bags of Multiple ML Model, and each model give it's prediction/classification, final outcome based on majority
- `Boosting Technique`: Next model learn from previous model and boost it minimize error to improve score

In [None]:
# Random Forest regression problem: Avg of Final value>> 2.2 + 2.4 + 2.5>> 2.36
# Random Forest Classification>> Binary/Multiclass classification >> Majority Voting

In [None]:
from sklearn.ensemble import RandomForestClassifier

model_rf = RandomForestClassifier(n_estimators = 200) # Parameter change Default Tree 100 to 200

model_rf.fit(X_train,y_train)  # Learning

In [None]:
y_pred = model_rf.predict(X_test)

In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred))
print('Precision Score: ',precision_score(y_test,y_pred))
print('Recall Score: ',recall_score(y_test,y_pred))
print('F1 Score: ',f1_score(y_test,y_pred))

In [None]:
rf_score = accuracy_score(y_test,y_pred)
rf_score

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
cm = confusion_matrix(y_test,y_pred)

plt.title('Actual vs Predicted Confusion Matrix using Random Forest Model')
sns.heatmap(cm,annot = True,cmap='mako')
plt.xlabel('Predicted Value')
plt.ylabel('Actual Value')
plt.show()

### Ensemble Boosting technique: Adaboost, Gradientboost, XGboost

In [None]:
# Adaboost: Adaptive Boosting
# Gradientboost: GBM
# LBM: Gradientboost: LGBM
# XGboost: Extreme Gradient Boosting:
# Catboost: Categorical: textual data:

In [None]:
# https://ars.els-cdn.com/content/image/1-s2.0-S1568494623000844-gr6.jpg

In [None]:
from xgboost import XGBClassifier

model_xgb = XGBClassifier() # Hyperparameters: Model Fine tune  objective="binary:logistic", random_state=42, learning_rate = 0.001
model_xgb.fit(X_train,y_train)  # NONE: Hyperparameters: Model Fine tune

In [None]:
y_pred = model_xgb.predict(X_test)

In [None]:
xgb_score = accuracy_score(y_test,y_pred)
xgb_score

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
model_knn = KNeighborsClassifier(n_neighbors=14) # k= 14

model_knn.fit(X_train,y_train)


In [None]:
y_pred = model_knn.predict(X_test)

In [None]:
knn_score = accuracy_score(y_test,y_pred)
knn_score

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
# Step 5.3.4
cm = confusion_matrix(y_test,y_pred)

plt.title('Actual vs Predicted Confusion Matrix using KNN Model')
sns.heatmap(cm,annot = True,cmap='mako')
plt.xlabel('Predicted Value')
plt.ylabel('Actual Value')
plt.show()

# Step 5.8: Artificial Neural network using Deep learning

In [None]:
# ML works only on Limited data
# If Input Changes ML Model fail(8 Features col: 8)

In [None]:
# https://editor.analyticsvidhya.com/uploads/94912bosem.png

In [None]:
# https://media.licdn.com/dms/image/v2/D4D12AQH2F3GJ9wen_Q/article-cover_image-shrink_720_1280/article-cover_image-shrink_720_1280/0/1688885174323?e=2147483647&v=beta&t=dY_S6xeNsRCIvpIrjrPFzq8qgHPgmP4e_HLaA15ufPM

In [None]:
from keras.models import Sequential
from keras.layers import Dense

In [None]:
model = Sequential()  # Step 1 Sequential model creation
model.add(Dense(11,activation='relu',input_dim=13)) # Dense Layer
model.add(Dense(1,activation='sigmoid')) # Output will be 1 Neuron for Binary

model.compile(loss='binary_crossentropy',  # loss to calculate model loss
              optimizer='adam', #optimizer to optimize model learning
              metrics=['accuracy'])  #  metrics return accuracy score

print('Done')

In [None]:
# 5.8.3 Model Training
model.fit(X_train,y_train,epochs=300)  # iteration during retraining: epochs=300

In [None]:
# 5.8.4 pred y_value

y_pred = model.predict(X_test)
y_pred  = [round(i[0]) for i in  y_pred]
print(y_pred)

In [None]:
ann_score = accuracy_score(y_test,y_pred)
ann_score

In [None]:
all_model_score  = [lr_score,nv_score,svm_score,dt_score,rf_score,xgb_score,knn_score,ann_score]
model_name = ['Logistic','Naive','SVM','Decision Tree','Random Forest','XGB','Knn','ANN']

print('Done')

In [None]:
plt.figure(figsize=(12,5))
plt.title('All Model Score Comparison')
ax = plt.bar(x = model_name, height= all_model_score,color = sns.color_palette('mako',8))
plt.bar_label(ax)
plt.xlabel('Model Name')
plt.ylabel('Score')
plt.xticks(rotation = 45)
plt.show()

In [None]:
# https://github.com/ankitmisk/Heart_Disease_Prediction_Classification_Model/blob/main/ML%20Model%20Heart%20Disease%20Prediction.ipynb

In [None]:
import pickle
with open('heart_disease_pred.pkl','wb') as f:
    pickle.dump(model_lr,f)
print('Done')

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import random

st.header('Heart Disease Prediction Using Machine Learning')

data = '''Heart Disease Prediction using Machine Learning Heart disease prevention is critical, and data-driven prediction systems can significantly aid in early diagnosis and treatment. Machine Learning offers accurate prediction capabilities, enhancing healthcare outcomes. In this project, I analyzed a heart disease dataset with appropriate preprocessing. Multiple classification algorithms were implemented in Python using Scikit-learn and Keras to predict the presence of heart disease.

Algorithms Used:

**Logistic Regression**

**Naive Bayes**

**Support Vector Machine (Linear)**

**K-Nearest Neighbors**

**Decision Tree**

**Random Forest**

**XGBoost**

**Artificial Neural Network (1 Hidden Layer, Keras)**
'''

st.markdown(data)


st.image('https://i0.wp.com/asianheartinstitute.org/wp-content/uploads/2024/11/Understanding-How-Heart-Disease-Impacts-Your-Body.jpg?fit=1572%2C917&ssl=1')

with open('heart_disease_pred.pkl','rb') as f:
    chatgpt = pickle.load(f)

# Load data
url = '''https://github.com/ankitmisk/Heart_Disease_Prediction_ML_Model/blob/main/heart.csv?raw=true'''
df = pd.read_csv(url)


st.sidebar.header('Select Features to Predict Heart Disease')
st.sidebar.image('https://humanbiomedia.org/animations/circulatory-system/cardiac-cycle/heart-beating.gif')

all_values = []

for i in df.iloc[:,:-1]:
    min_value, max_value = df[i].agg(['min','max'])

    var =st.sidebar.slider(f'Select {i} value', int(min_value), int(max_value),
                      random.randint(int(min_value),int(max_value)))

    all_values.append(var)

final_value = [all_values]

ans = chatgpt.predict(final_value)[0]

import time
random.seed(132)
progress_bar = st.progress(0)
placeholder = st.empty()
placeholder.subheader('Predicting Heart Disease')

place = st.empty()
place.image('https://i.makeagif.com/media/1-17-2024/dw-jXM.gif',width = 200)

for i in range(100):
    time.sleep(0.05)
    progress_bar.progress(i + 1)

if ans == 0:
    body = f'No Heart Disease Detected'
    placeholder.empty()
    place.empty()
    st.success(body)
    progress_bar = st.progress(0)
else:
    body = 'Heart Disease Found'
    placeholder.empty()
    place.empty()
    st.warning(body)
    progress_bar = st.progress(0)



