## Import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("../input/heart-attack-analysis-prediction-dataset/heart.csv")
df.head()

In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.shape

## EDA

In [None]:
df.columns

In [None]:
sns.histplot(x="age",data=df);

In [None]:
sns.countplot(x="sex",data=df);
plt.xticks(ticks=[0,1],labels=["female","male"])
plt.show()

In [None]:
df2=df[df["output"]==1]
sns.histplot(df2["thalachh"],bins=10);
plt.xlabel("MAXIMUM HEART RATE REACHED WHEN OUTPUT 1")
plt.show()

In [None]:
df3=df[df["output"]==0]
sns.histplot(df3["thalachh"],bins=10);
plt.xlabel("MAXIMUM HEART RATE REACHED WHEN OUTPUT 0")
plt.show()

In [None]:
s=df["sex"].value_counts().reset_index()
px.pie(s,names="index",values="sex",title="%AGE OF MALE AND FEMALE PATIENTS:")

In [None]:
sns.stripplot(x="cp",y="trtbps",hue="output",data=df);

In [None]:
sns.catplot(x="fbs", y="oldpeak", hue="output", kind="strip", data=df);

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x="age",y="thalachh",hue="output",data=df)
plt.title("EFFECT OF HEART ATTACK WITH INCREASE IN AGE AND MAXIMUM HEART RATE")
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x="age",y="chol",hue="output",data=df)
plt.title("EFFECT OF HEART ATTACK WITH INCREASE IN AGE AND CHOLESTROL")
plt.show()

In [None]:
sns.set_style('darkgrid')
figure = plt.figure(figsize=(20,8))
sns.countplot(x= 'trtbps',data=df)


In [None]:
sns.set_style('darkgrid')
figure = plt.figure(figsize=(15,6))
sns.barplot(x= 'age', y ='thall',data=df,palette='rainbow')

In [None]:
figure = plt.figure(figsize=(12,6))

sns.heatmap(df.corr(),data = df,annot= True,cmap = 'Greys')

## Model Building

In [None]:
X=df.drop("output",axis=1).values
y=df["output"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75)


#### Decision tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt= DecisionTreeClassifier(max_depth=3,random_state=1)
dt.fit(X_train,y_train)
y_pred=dt.predict(X_test)
accuracy_score(y_test,y_pred)

#### RandomForest

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=400,min_samples_leaf=0.12)
rfc.fit(X_train,y_train)
y_pred=rfc.predict(X_test)
accuracy_score(y_test,y_pred)

#### XGBOOST


In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import r2_score

xgb = XGBClassifier(colsample_bylevel= 0.9,
                    colsample_bytree = 0.8, 
                    gamma=0.99,
                    max_depth= 5,
                    min_child_weight= 1,
                    n_estimators= 8,
                    nthread= 5,
                    random_state= 0,
                    )
xgb.fit(X_train,y_train)

In [None]:
print('Accuracy of XGBoost classifier on training set: {:.2f}'
     .format(xgb.score(X_train, y_train)))
print('Accuracy of XGBoost classifier on test set: {:.2f}'
     .format(xgb.score(X_test, y_test)))

In [None]:
y_pred=xgb.predict(X_test)
print("Accuracy of XG Boost model is:",
accuracy_score(y_test, y_pred)*100)