### About Dataset

Age : Age of the patient

Sex : Sex of the patient

cp : Chest Pain type chest pain type

Value 0: asymptomatic
Value 1: typical angina
Value 2: atypical angina
Value 3: non-anginal pain
trtbps : resting blood pressure (in mm Hg)

chol : cholestoral in mg/dl fetched via BMI sensor

fbs : (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)

rest_ecg : resting electrocardiographic results

Value 0: normal
Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria
thalach : maximum heart rate achieved

exang: exercise induced angina (1 = yes; 0 = no)

oldpeak: ST depression induced by exercise relative to rest

slp: the slope of the peak exercise ST segment (1 = upsloping; 2 = flat; 3 = downsloping)

ca: number of major vessels (0-3)

thal - 3 = normal; 6 = fixed defect; 7 = reversable defect

num : 0= low Chances 1= High Chances

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from collections import Counter
import plotly.express as px
from sklearn.preprocessing import StandardScaler,Normalizer
from sklearn.metrics import confusion_matrix,accuracy_score, mean_squared_error, log_loss
from imblearn.combine import SMOTETomek
from sklearn.model_selection import RandomizedSearchCV

In [None]:
data = pd.read_csv('../input/heart-attack-analysis-prediction-dataset/heart.csv')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
df = data.copy()

In [None]:
df.head()

In [None]:
data.isnull().sum()

In [None]:
data.dtypes

In [None]:
df['sex'] = df['sex'].map({1:'Male',0:'Female'})

In [None]:
sns.distplot(df['age'],kde=True)

In [None]:
sns.catplot(x='age',hue='output',col='sex',data=df,kind='count', orient=True)

In [None]:
df.head()

In [None]:
df['output'] = df['output'].map({0:'less chances',1:'More chances'})

In [None]:
sns.catplot(x='sex',hue='output',data=df,kind='count', orient=True)

In [None]:
sns.catplot(x='sex',y = 'age', hue='output',data=df,kind='bar')

In [None]:
df.groupby(['output'])['output'].count().plot.bar()

In [None]:
decision =  df['output'].value_counts().reset_index()
decision.columns = ['Decision','Count']
px.pie(decision,names='Decision',values='Count',color_discrete_sequence=px.colors.sequential.Emrld_r)

In [None]:
sns.pairplot(data=df,hue='output')

In [None]:
data.head()

In [None]:
X = data.iloc[:,:-1]

In [None]:
y=data.iloc[:,-1]

In [None]:
from imblearn.combine import SMOTETomek
imb = SMOTETomek(random_state=42)
X,y = imb.fit_resample(X,y)

In [None]:
Counter(y)

In [None]:
IMP = ExtraTreesClassifier()
IMP.fit(X,y)
ranked_feat = pd.Series(IMP.feature_importances_,index=X.columns)
ranked_feat.nlargest(13).plot(kind='barh')

In [None]:
stand = Normalizer()

In [None]:
x = pd.DataFrame(stand.fit_transform(X),columns=X.columns)

In [None]:
x.head()

In [None]:
from sklearn.feature_selection import mutual_info_classif
mutual_info = mutual_info_classif(x,y)
mutual_data = pd.Series(mutual_info, index=x.columns)
mutual_data.sort_values(ascending=True)

In [None]:
print(x.shape)
print(y.shape)

In [None]:
XX_train,X_test,yy_train,y_test = train_test_split(x,y,random_state=42,test_size=42)
X_train,X_cv,y_train,y_cv = train_test_split(XX_train,yy_train,test_size=42,random_state=42)

In [None]:
RF = RandomForestClassifier()
RF_param = {'n_estimators':range(1,500,10),'max_depth':range(1,50,2)}
RF_search = RandomizedSearchCV(RF,RF_param,scoring='neg_mean_squared_error',cv=10,n_jobs=-1)
RF_search.fit(X_cv,y_cv)

In [None]:
print(RF_search.best_params_)
print(RF_search.best_score_)
print(RF_search.best_estimator_)

In [None]:
RF_result = RandomForestClassifier(n_estimators=391,max_depth=37)
RF_result.fit(X_train,y_train)
pred1 = RF_result.predict(X_test)

In [None]:
ACC1= accuracy_score(y_test,pred1)
print(accuracy_score(y_test,pred1))

In [None]:
labels = [0,1]
C = confusion_matrix(y_test,pred1)
sns.heatmap(C,annot=True,xticklabels=labels,yticklabels=labels,vmax=2,vmin=1,linewidth=3,color='red')

In [None]:
DT = DecisionTreeClassifier()

In [None]:
param_d = {'criterion':['gini','entropy'],'max_depth':range(1,10)}
Grida=GridSearchCV(DT,param_d,cv=10,n_jobs=-1)
Grida.fit(X_cv,y_cv)

In [None]:
print(Grida.best_params_)
print(Grida.best_score_)
print(Grida.best_estimator_)

In [None]:
DTC = DecisionTreeClassifier(criterion='entropy',max_depth=6)
DTC.fit(X_train,y_train)
pred2 = DTC.predict(X_test)
ACC2 = accuracy_score(pred2,y_test)
print(accuracy_score(pred2,y_test))

In [None]:
labels = [0,1]
C = confusion_matrix(y_test,pred2)
sns.heatmap(C,annot=True,xticklabels=labels,yticklabels=labels)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
KN = KNeighborsClassifier()
parama = {'n_neighbors':range(1,30)}
Grd = GridSearchCV(KN, parama,cv=10,scoring='neg_mean_squared_error')
Grd.fit(X_cv,y_cv)

In [None]:
print(Grd.best_params_)
print(Grd.best_estimator_)
print(Grd.best_score_)

In [None]:
KNN = KNeighborsClassifier(n_neighbors=12)
KNN.fit(X_train,y_train)
pred3 = KNN.predict(X_test)
ACC3 = accuracy_score(pred3,y_test)
print(accuracy_score(pred3,y_test))

In [None]:
labels = [0,1]
C = confusion_matrix(y_test,pred3)
sns.heatmap(C,annot=True,xticklabels=labels,yticklabels=labels)

In [None]:
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

In [None]:
LR = LogisticRegression()
LR.fit(X_train,y_train)
pred4 = LR.predict(X_test)
print(accuracy_score(pred4,y_test))
ACC4 = accuracy_score(pred4,y_test)

In [None]:
labels = [0,1]
C = confusion_matrix(y_test,pred4)
sns.heatmap(C,annot=True,xticklabels=labels,yticklabels=labels)

In [None]:
XGB = XGBClassifier()
XGB.fit(X_train,y_train)
pred5 = XGB.predict(X_test)
print(accuracy_score(pred5,y_test))
ACC5 = accuracy_score(pred5,y_test)

In [None]:
labels = [0,1]
C = confusion_matrix(y_test,pred5)
sns.heatmap(C,annot=True,xticklabels=labels,yticklabels=labels)

In [None]:
Final_Result = pd.DataFrame({'Algo':['RandomForest','DecisionTree','KNN','Logistic-Regresion','XGBoost'],
                      'Accuracy':[ACC1,ACC2,ACC3,ACC4,ACC5]})

In [None]:
sns.catplot(x='Algo',y='Accuracy',data=Final_Result,kind='bar',orient='verticle')