## Heart Attack Analysis & Prediction Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df= pd.read_csv('../input/heart-attack-analysis-prediction-dataset/heart.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
for i in df.columns:
    print('Unique Values of {} are {}\n'.format(i.upper(), df[i].unique()))
    print(df[i].value_counts(normalize= True))
    print('-' * 100)

In [None]:
l= ['age', 'trtbps', 'chol', 'thalachh', 'oldpeak']
plt.figure(figsize= (15,20))

n=1
for i in l:
    plt.subplot(3,2,n)
    sns.boxplot(x= 'output', y= i, data= df,)
    plt.title('Boxplot of {}'.format(i.upper()), fontsize= 16)
    n+=1

In [None]:
l= ['age', 'trtbps', 'chol', 'thalachh', 'oldpeak']
plt.figure(figsize= (15,20))

n=1
for i in l:
    plt.subplot(3,2,n)
    sns.histplot(x= i, data= df, kde= True, hue= 'output')
    plt.title('Histogram of {}'.format(i.upper()), fontsize= 16)
    n+=1

In [None]:
sns.pairplot(df, y_vars= ['age', 'trtbps', 'chol', 'thalachh', 'oldpeak'], 
             x_vars= ['age', 'trtbps', 'chol', 'thalachh', 'oldpeak'],
            diag_kind= None, hue= 'output', kind= 'reg')

In [None]:
plt.figure(figsize= (15,20))

plt.subplot(4,2,1)
plt.pie(df.output.value_counts(), explode= [0.05, 0.05], labels= df.output.value_counts().index, 
        autopct= '%.2f%%', startangle= 90, shadow= True)
plt.title('Output', fontsize= 16)
plt.legend(['More Chance of Heart Attack', 'Less Chance of Heart Attack'])

plt.subplot(4,2,2)
plt.pie(df.sex.value_counts(), explode= [0.05, 0.05], labels= df.sex.value_counts().index, 
        autopct= '%.2f%%', startangle= 90, shadow= True)
plt.title('SEX', fontsize= 16)

plt.subplot(4,2,3)
plt.pie(df.cp.value_counts(), explode= [0.05, 0.05, 0.05, 0.05], labels= df.cp.value_counts().index, 
        autopct= '%.2f%%', startangle= 90, shadow= True)
plt.title('Chest Pain Type', fontsize= 16)
plt.legend(['Typical Angina', 'Atypical Angina', ' Non-Anginal Pain', 'Asymptomatic'])

plt.subplot(4,2,4)
plt.pie(df.fbs.value_counts(), explode= [0.05, 0.05], labels= df.fbs.value_counts().index, 
        autopct= '%.2f%%', startangle= 90, shadow= True)
plt.title('Fasting Blood Sugar', fontsize= 16)
plt.legend(['fbs < 120mg/dl', 'fbs > 120mg/dl'])

plt.subplot(4,2,5)
plt.pie(df.restecg.value_counts(), explode= [0.05, 0.05, 0.05], labels= df.restecg.value_counts().index, 
        autopct= '%.2f%%', startangle= 90, shadow= True)
plt.title('Resting Electrocardiographic Results', fontsize= 16)
plt.legend(['Normal', ' Having ST-T Wave Abnormality', ' Probable or Definite Left Ventricular Hypertrophy'])

plt.subplot(4,2,6)
plt.pie(df.exng.value_counts(), explode= [0.05, 0.05], labels= df.exng.value_counts().index, 
        autopct= '%.2f%%', startangle= 90, shadow= True)
plt.title('Exercise Induced Angina', fontsize= 16)
plt.legend(['No', 'Yes'])

plt.subplot(4,2,7)
plt.pie(df.caa.value_counts(), explode= [0.05, 0.05, 0.05, 0.05, 0.2], labels= df.caa.value_counts().index, 
        autopct= '%.2f%%', startangle= 90, shadow= True)
plt.title('Number of Major Vessels', fontsize= 16)

plt.subplot(4,2,8)
plt.pie(df.thall.value_counts(), explode= [0.05, 0.05, 0.05, 0.2], labels= df.thall.value_counts().index, 
        autopct= '%.2f%%', startangle= 90, shadow= True)
plt.title('Maximum Heart Rate Achieved', fontsize= 16)

plt.show()

In [None]:
corr= df.corr()

plt.figure(figsize= (15,7))
sns.heatmap(corr, annot= True, linewidths= 2)
plt.title('Correlation Matrix', fontsize= 16)

plt.show()

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
seed= 123
np.random.seed(seed)

x= df.drop(columns= 'output')
y= df.output

x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.5, stratify= y)

In [None]:
pipelines= []

pipelines.append(('AdaBoost', Pipeline([('Scaler', StandardScaler()), ('AdaBoost', AdaBoostClassifier())])))
pipelines.append(('RandomForest', Pipeline([('Scaler', StandardScaler()), ('RandomForest', RandomForestClassifier())])))
pipelines.append(('SVC', Pipeline([('Scaler', StandardScaler()), ('SVC', SVC())])))
pipelines.append(('Logistic', Pipeline([('Scaler', StandardScaler()), ('Logistic', LogisticRegression())])))
pipelines.append(('KNeighbors', Pipeline([('Scaler', StandardScaler()), ('KNeighbors', KNeighborsClassifier())])))
pipelines.append(('DecisionTree', Pipeline([('Scaler', StandardScaler()), ('DecisionTree', DecisionTreeClassifier())])))
pipelines

In [None]:
np.random.seed(seed)

results= []
names= []

for name, model in pipelines:
    kfold= KFold(n_splits= 10, shuffle= True)
    cv_results= cross_val_score(estimator= model, X= x_train, y= y_train, cv= kfold, scoring= 'accuracy')*100
    mean= cv_results.mean()
    std= cv_results.std()
    print('{:12} : Average Accuracy {:.2f}% - Std {:.2f}%'.format(name, mean, std))
    results.append(cv_results)
    names.append(name)

In [None]:
plt.figure(figsize= (10,5))
plt.boxplot(results, labels= names)
plt.title('Models Comparison', fontsize= 16)
plt.show()

In [None]:
np.random.seed(seed)

scaler= StandardScaler()
x_train_scaler= scaler.fit_transform(x_train)
x_test_scaler= scaler.fit_transform(x_test)

model= SVC()
model.fit(x_train_scaler, y_train)
y_pred= model.predict(x_test_scaler)

In [None]:
print('Classification Report\n', classification_report(y_test, y_pred))

print('Accuracy: {}%\n'.format(round((accuracy_score(y_test, y_pred)*100),2)))

In [None]:
plt.figure(figsize= (15,7))

plt.subplot(1,2,1)
cm= confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot= True, fmt= 'd', xticklabels=['Less Chance', 'More Chance'],
           yticklabels=['Less Chance', 'More Chance'])
plt.title('Confusion Matrix - Normal')

plt.subplot(1,2,2)
cmp= confusion_matrix(y_test, y_pred, normalize= 'true')
sns.heatmap(cmp, annot= True, xticklabels=['Less Chance', 'More Chance'],
           yticklabels=['Less Chance', 'More Chance'])
plt.title('Confusion Matrix - Percent')