# Packages Required

In [None]:
import pandas as pd
import numpy as np
#visualisation
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#EDA
from collections import Counter
# data preprocessing
from sklearn.preprocessing import StandardScaler
# data splitting
from sklearn.model_selection import train_test_split
# data modeling
from sklearn.metrics import confusion_matrix,accuracy_score,roc_curve,classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

# Loading Data

In [None]:
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
# Any results you write to the current directory are saved as output.
Heartattack = pd.read_csv('/kaggle/input/heart-attack/Heart Attack Indicators.csv')

In [None]:
Heartattack.head()

# Understanding Data

In [None]:

Heartattack.info()

In [None]:
Heartattack.columns

In [None]:

Heartattack.describe()

In [None]:
sns.heatmap(Heartattack.isnull(), yticklabels = False, cbar = False, cmap = 'viridis')
#checking for null values

In [None]:
fig, ax = plt.subplots(figsize=(10,10))         
HA = Heartattack.corr()
sns.heatmap(HA, annot = True, cmap = 'coolwarm', ax =ax)

# Data Visualization

In [None]:
sns.countplot('target', hue = 'sex', data = Heartattack)

In [None]:
sns.displot(Heartattack['age'], kde = 'False', bins = 30)

In [None]:
pd.crosstab(Heartattack.age,Heartattack.target).plot(kind="bar")
plt.title("Freuency of Heart Disease with Age")
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.legend(["No Attack", "Heart Attack"], loc = "upper right") 
fig = plt.gcf()
fig.set_size_inches(22, 10)

In [None]:
sns.countplot( x ='sex', data = Heartattack, hue = "target", palette = "tab10") 
plt.legend(["No Attack", "Heart Attack"], loc = "upper right") 
fig = plt.gcf()
fig.set_size_inches(8, 8)

In [None]:
sns.displot(Heartattack['chol'], kde = 'False', bins = 30)

In [None]:
x1 = Heartattack.age[Heartattack.target==1]
x2 = Heartattack.age[Heartattack.target==0]
y1 = Heartattack.thalach[(Heartattack.target==1)]
y2 = Heartattack.thalach[(Heartattack.target==0)]
plt.scatter(x = x1 , y = y1 , c = "blue")
plt.scatter(x = x2 , y = y2 , c = "red")
plt.legend(["No Attack", "Heart Attack"])
plt.xlabel("Age")
plt.ylabel("Maximum Heart Rate")
plt.show()

In [None]:
pd.crosstab(Heartattack.cp,Heartattack.target).plot(kind = "bar" , figsize=(15,6) , color = ["#ff6600","#9966cc"])
plt.title("Heart Disease Frequency  w.r.t  to Chest Pain Type")
plt.xlabel("Chest Pain Type")
plt.xticks(rotation = 0)
plt.ylabel("Frequency of Disease")
plt.legend(["No Attack", "Heart Attack"], loc = "upper right") 
fig = plt.gcf()
fig.set_size_inches(8, 8)

# Model Preparation

In [None]:
X = Heartattack.drop(['target'],axis = 1)
y = Heartattack['target']

In [None]:
from sklearn import preprocessing
encoder = preprocessing.LabelEncoder()
for i in Heartattack.columns:
    if isinstance(Heartattack[i][0], str):
        Heartattack[i] = encoder.fit_transform(Heartattack[i])
            
            

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
print(y_test.unique())
Counter(y_train)
#checking if the data is split equally


# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
lm = LogisticRegression()
lm.fit(X_train,y_train)

In [None]:
pred = lm.predict(X_test)
print(pred)
LR_acc_score = accuracy_score(y_test, predict)
print("Accuracy of LR model:",LR_acc_score*100,'\n')

In [None]:
resultLR = pd.DataFrame(data = [pred,y_test])
resultLR.transpose()

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred))

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, pred)
cm

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors = 1)
knn.fit(X_train, y_train)

In [None]:
predic = knn.predict(X_test)
predic

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, predic))
print(confusion_matrix(y_test, predic))

In [None]:
resultKNN = pd.DataFrame(data = [predic,y_test])
resultKNN.transpose()

In [None]:
error_rate = []

# Will take some time
for i in range(1,40):
    
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    pred_i = knn.predict(X_test)
    error_rate.append(np.mean(pred_i != y_test))

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(1,40),error_rate,color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')

In [None]:
knn = KNeighborsClassifier(n_neighbors = 24)
knn.fit(X_train, y_train)
predict = knn.predict(X_test)
predict
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, predict))
print(confusion_matrix(y_test, predict))
knn_acc_score = accuracy_score(y_test, predict)
print("Accuracy of KNN model:",knn_acc_score*100,'\n')


# Naive Bayes

In [None]:
from sklearn.metrics import confusion_matrix,accuracy_score,roc_curve,classification_report
from sklearn.naive_bayes import GaussianNB
m2 = 'Naive Bayes'
nb = GaussianNB()
nb.fit(X_train,y_train)
nbpred = nb.predict(X_test)
nb_conf_matrix = confusion_matrix(y_test, nbpred)
nb_acc_score = accuracy_score(y_test, nbpred)
print(nbpred)
print("confussion matrix")
print(nb_conf_matrix)
print("\n")
print("Accuracy of Naive Bayes model:",nb_acc_score*100,'\n')
print(classification_report(y_test,nbpred))
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, nbpred)

In [None]:
resultNB = pd.DataFrame(data = [nbpred,y_test])
resultNB.transpose()

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
m3 = 'Random Forest Classfier'
rf = RandomForestClassifier(n_estimators=20, random_state=12,max_depth=5)
rf.fit(X_train,y_train)
rf_predicted = rf.predict(X_test)
rf_conf_matrix = confusion_matrix(y_test, rf_predicted)
rf_acc_score = accuracy_score(y_test, rf_predicted)
print(rf_predicted)
print("confussion matrix")
print(rf_conf_matrix)
print("\n")
print("Accuracy of Random Forest:",rf_acc_score*100,'\n')
print(classification_report(y_test,rf_predicted))

In [None]:
resultRF = pd.DataFrame(data = [rf_predicted,y_test])
resultRF.transpose()

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
m6 = 'DecisionTreeClassifier'
dt = DecisionTreeClassifier(criterion = 'entropy',random_state=0,max_depth = 6)
dt.fit(X_train, y_train)
dt_predicted = dt.predict(X_test)
dt_conf_matrix = confusion_matrix(y_test, dt_predicted)
dt_acc_score = accuracy_score(y_test, dt_predicted)
print(dt_predicted)
print("confussion matrix")
print(dt_conf_matrix)
print("\n")
print("Accuracy of DecisionTreeClassifier:",dt_acc_score*100,'\n')
print(classification_report(y_test,dt_predicted))

In [None]:
resultDT = pd.DataFrame(data = [dt_predicted,y_test])
resultDT.transpose()

# SVC

In [None]:
from sklearn.svm import SVC
m7 = 'Support Vector Classifier'
svc =  SVC(kernel='rbf', C=2)
svc.fit(X_train, y_train)
svc_predicted = svc.predict(X_test)
svc_conf_matrix = confusion_matrix(y_test, svc_predicted)
svc_acc_score = accuracy_score(y_test, svc_predicted)
print(svc_predicted)
print("confussion matrix")
print(svc_conf_matrix)
print("\n")
print("Accuracy of Support Vector Classifier:",svc_acc_score*100,'\n')
print(classification_report(y_test,svc_predicted))

In [None]:
resultSVC = pd.DataFrame(data = [svc_predicted,y_test])
resultSVC.transpose()

# Model Evaluation

In [None]:
model_ev = pd.DataFrame({'Model': ['Logistic Regression','Naive Bayes','Random Forest','K-Nearest Neighbour','Decision Tree','Support Vector Machine'], 'Accuracy': [LR_acc_score*100,
                    nb_acc_score*100,rf_acc_score*100,knn_acc_score*100,dt_acc_score*100,svc_acc_score*100]})
model_ev

In [None]:
colors = ['red','green','blue','silver','yellow','orange']
plt.figure(figsize=(12,5))
plt.title("barplot Represent Accuracy of different models")
plt.xlabel("Accuracy %")
plt.ylabel("Algorithms")
plt.bar(model_ev['Model'],model_ev['Accuracy'],color = colors)
plt.show()

Sources : https://www.kaggle.com/nareshbhat/heart-attack-prediction-using-different-ml-models