# Heart attack analysis & prediction

find the patients with higher and lower chances of heart attack using various features.

0 = less chance of heart attack

1 = more chance of heart attack

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier

seed=40

# Averaging (Regression)

In [2]:
df = pd.read_csv('dataset/heart.csv')

In [3]:
df.head(3)

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1


In [4]:
target = df['output']

#dropping target column(output) from train data
train = df.drop('output', axis=1)

In [5]:
#normalizing the train dataset
scaled_train = StandardScaler().fit_transform(train)

In [33]:
#plt.scatter(train,target)
train.shape  , target.shape

((303, 13), (303,))

In [7]:
#splitting data into train, test datasets
x_train, x_test, y_train, y_test = train_test_split(train, target, test_size=0.2, random_state=seed)

In [42]:
#model building
model_1 = RandomForestClassifier(random_state=seed) 
model_2 = LogisticRegression(random_state=seed, max_iter=1000)
model_3 = SGDClassifier(random_state=seed)

#training
model_1.fit(x_train, y_train)
model_2.fit(x_train, y_train)
model_3.fit(x_train, y_train)

#predictions
pred_1 = model_1.predict(x_test)
pred_2 = model_2.predict(x_test)
pred_3 = model_3.predict(x_test)
pred_4 = model_4.predict(x_test)


#averaging 
pred_final = np.round((pred_1 + pred_2 + pred_3) / 3 )

#evaluation
accuracy = round(accuracy_score(pred_final, y_test)*100,2)
auc = round(roc_auc_score(y_test, pred_final), 3)

print(f'ensemble learning accuracy: {accuracy}%')
print(f'ensemble learning AUC score: {auc}')

ensemble learning accuracy: 85.25%
ensemble learning AUC score: 0.847


In [43]:
model_1_accuracy = round(accuracy_score(pred_1 ,y_test)*100,2)
print(f'model_1 accuracy:{model_1_accuracy}%')

model_2_accuracy = round(accuracy_score(pred_2, y_test)*100,2)
print(f'model_2 accuracy: {model_2_accuracy}%')

model_3_accuracy = round(accuracy_score(pred_3, y_test)*100, 2)
print(f'model_4 accuracy: {model_3_accuracy}%')

model_1 accuracy:83.61%
model_2 accuracy: 90.16%
model_4 accuracy: 52.46%


In [44]:
pred_final_1 = np.round(0.3*pred_1 + 0.6*pred_2 + 0.1*pred_3)

#evaluation
weighted_accuracy = round(accuracy_score(pred_final_1, y_test)*100, 3)
weighted_auc = round(roc_auc_score(pred_final_1, y_test), 3)

print(f'weighted accuracy: {weighted_accuracy}%')
print(f'weighted AUC score: {weighted_auc}')

weighted accuracy: 90.164%
weighted AUC score: 0.913


# Max Voting (Classifier)

In [45]:
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier

In [48]:
model_4 = KNeighborsClassifier()

#voting classifier
final_model = VotingClassifier(
                 estimators=[ ("rf",model_1), ("lr",model_2),("knn",model_4) ],
                 voting='hard'
               )


#training
final_model.fit(x_train, y_train)

#prediction
prediction = final_model.predict(x_test)

In [49]:
#evaluation
accuracy = round(accuracy_score(prediction, y_test)*100, 3)
roc = round(roc_auc_score(prediction, y_test),3)

print(f'accuracy: {accuracy}%')
print(f'AUC score: {roc}')

accuracy: 90.164%
AUC score: 0.913
