# Heart Diseases Detection & Prediction
#### Using KNN, SVM, Random Forest, Naive Bayes

### Load Modules

In [1]:
import pandas as pd
#import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

import warnings
warnings.filterwarnings("ignore")

### Load Dataset

In [2]:
df=pd.read_csv("heart.csv")

In [3]:
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


In [4]:
df.shape

(1025, 14)

In [5]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

### Data Cleaning

In [6]:
df = df[df.thal != 0]

### Features Separating

In [7]:
#Seperating Dependent Features
x = df.drop(['target'], axis=1)
y = df['target']

### Data Normalization

In [8]:
#Data Normalization using Min-Max Method
x = MinMaxScaler().fit_transform(x)

### Splitting the Dataset

In [9]:
#Splitting Dataset into 80:20
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)

### Model Implementation

#### KNN

In [10]:
KNNClassifier = KNeighborsClassifier(n_neighbors=3)
KNNClassifier.fit(x_train, y_train)

y_pred_KNN = KNNClassifier.predict(x_test)

In [11]:
KNNAcc = accuracy_score(y_pred_KNN, y_test)
print('K-Nearest Neighbour Accuracy:'+'\033[1m {:.2f}%'.format(KNNAcc*100))

K-Nearest Neighbour Accuracy:[1m 97.55%


#### Naive Bayes

In [12]:
GNBclassifier = GaussianNB(var_smoothing=0.1)
GNBclassifier.fit(x_train, y_train)

y_pred_GNB = GNBclassifier.predict(x_test)

In [13]:
GNBAcc = accuracy_score(y_pred_GNB, y_test)
print('Gaussian Naive Bayes Accuracy:'+'\033[1m {:.2f}%'.format(GNBAcc*100))

Gaussian Naive Bayes Accuracy:[1m 83.82%


#### Random Forest

In [14]:
RFclassifier = RandomForestClassifier(n_estimators=1000, random_state=1, max_leaf_nodes=20, min_samples_split=15)

RFclassifier.fit(x_train, y_train)
y_pred_RF = RFclassifier.predict(x_test)

In [15]:
RFAcc = accuracy_score(y_pred_RF, y_test)
print('Random Forest Accuracy:'+'\033[1m {:.2f}%'.format(RFAcc*100))

Random Forest Accuracy:[1m 94.12%


#### SVM

In [16]:
SVMclassifier = SVC(kernel='linear', max_iter=1000, C=10, probability=True)
SVMclassifier.fit(x_train, y_train)

y_pred_SVM = SVMclassifier.predict(x_test)

In [17]:
SVMAcc = accuracy_score(y_pred_SVM, y_test)
print('Support Vector Machine Accuracy:'+'\033[1m {:.2f}%'.format(SVMAcc*100))

Support Vector Machine Accuracy:[1m 89.71%


We'll use KNN for prediction.

### Prediction Model

In [18]:
# Make predictions on validation dataset
predictions = KNNClassifier.predict(x_test)

In [19]:
result = confusion_matrix(y_test, predictions)
print("Confusion Matrix:\n")
print(result)

result1 = classification_report(y_test, predictions)
print("\n\nClassification Report:\n",)
print (result1)

result2 = accuracy_score(y_test,predictions)
print("\nPrediction Model Accuracy:"+'\033[1m {:.2f}%'.format(result2*100))

Confusion Matrix:

[[101   0]
 [  5  98]]


Classification Report:

              precision    recall  f1-score   support

           0       0.95      1.00      0.98       101
           1       1.00      0.95      0.98       103

    accuracy                           0.98       204
   macro avg       0.98      0.98      0.98       204
weighted avg       0.98      0.98      0.98       204


Prediction Model Accuracy:[1m 97.55%


In [20]:
# Pickle model 
pd.to_pickle(KNNClassifier,r'cvd_model.pickle')

In [21]:
# Unpickle model 
model = pd.read_pickle(r'cvd_model.pickle') 
# read a pickle pd.read_pickle('model.pkl')

### Heart Disease Prediction

In [22]:
## Take input from user
age = float(input("Enter Age: "))
sex = float(input("Enter Gender (0,1): "))
cp = float(input("Enter Chest pain type (0-3): "))
trestbps = float(input("Enter Resting blood pressure: "))
chol = float(input("Enter Serum cholesterol in mg/dl: "))
fbs = float(input("Enter Fasting blood sugar > 120 mg/dl (0,1): "))
restecg = float(input("Enter Resting electrocardiographic results (0-2): "))
thalach = float(input("Enter Maximum heart rate: "))
exang = float(input("Enter Exercise induced angina (0,1): "))
oldpeak = float(input("Enter ST depression: "))
slope = float(input("Enter Slope of peak exercise (0-2): "))
ca = float(input("Enter Number of major vessels (0-3): "))
thal = float(input("Enter thalesemia (0-3): "))

'## Take input from user\nage = float(input("Enter Age: "))\nsex = float(input("Enter Gender (0,1): "))\ncp = float(input("Enter Chest pain type (0-3): "))\ntrestbps = float(input("Enter Resting blood pressure: "))\nchol = float(input("Enter Serum cholesterol in mg/dl: "))\nfbs = float(input("Enter Fasting blood sugar > 120 mg/dl (0,1): "))\nrestecg = float(input("Enter Resting electrocardiographic results (0-2): "))\nthalach = float(input("Enter Maximum heart rate: "))\nexang = float(input("Enter Exercise induced angina (0,1): "))\noldpeak = float(input("Enter ST depression: "))\nslope = float(input("Enter Slope of peak exercise (0-2): "))\nca = float(input("Enter Number of major vessels (0-3): "))\nthal = float(input("Enter thalesemia (0-3): "))'

In [23]:
result = model.predict([[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]])  # input must be 2D array

for i in result:
    if i==0:
        print("Heart Disease: No")
    elif i==1:
        print("Heart Disease: Yes")

'result = model.predict([[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]])  # input must be 2D array\n\nfor i in result:\n    if i==0:\n        print("Heart Disease: No")\n    elif i==1:\n        print("Heart Disease: Yes")'

In [24]:
print(result)