# The link of the data set:
https://www.kaggle.com/rashikrahmanpritom/heart-attack-analysis-prediction-dataset

## Importing the libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings 
warnings.filterwarnings('ignore')

## Read the data

In [None]:
df = pd.read_csv('../input/heart-attack-analysis-prediction-dataset/heart.csv')

## Explore the data set

In [None]:
f'The data has {df.shape[0]} Rows and {df.shape[1]} Columns' 

In [None]:
df.head()

In [None]:
df.isna().sum()

## Explore some columns

In [None]:
sns.countplot(x=df['output'])
plt.title('Had heart attack')
plt.show()

In [None]:
sns.countplot(x=df['fbs'])
plt.title('fasting blood sugar > 120 mg/dl (1 = true; 0 = false) column')
plt.show()

In [None]:
sns.countplot(x=df['sex'])
plt.title('Sex column')
plt.show()

In [None]:
plt.plot(df['age'])
plt.title('Age column')
plt.show()

In [None]:
sns.countplot(x=df['cp'])
plt.title('Cp (Chest Pain type chest pain type) column')
plt.text(1,140, 'typical angina: 0', color='b')
plt.text(1,130, 'atypical angina: 1', color='orange')
plt.text(2.2,140, 'non-anginal pain: 2', color='green')
plt.text(2.2,130, 'asymptomatic: 3', color='red')

plt.show()

## Define X and y

In [None]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

## Split X and y to X_train, X_test, y_train, y_test

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2)

## Scale X

In [None]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Model

## XGBoost Classifier

In [None]:
from sklearn.metrics import accuracy_score, plot_confusion_matrix
from xgboost import XGBClassifier
# Fit the model
model = XGBClassifier().fit(X_train,y_train)
# Confusion Matrix
plot_confusion_matrix(model, X_test, y_test, cmap=plt.cm.OrRd_r)
plt.title('Confusion matrix for the XGBClassifier')
plt.show()
# Accuracy score
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver="liblinear", max_iter=120).fit(X_train, y_train)

plot_confusion_matrix(model, X_test, y_test, cmap=plt.cm.OrRd_r)
plt.title('Confusion matrix for the LogisticRegression')
plt.show()

y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

## Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
# Define and fit the classifier
model = RandomForestClassifier(n_estimators=200, max_depth=10, criterion='gini').fit(X_train, y_train)
# Show the confusion matrix
plot_confusion_matrix(model, X_test, y_test, cmap=plt.cm.OrRd_r)
plt.title('Confusion matrix for the Random Forest')
plt.show()
# Predict the X_test 
y_pred = model.predict(X_test)
print('\nRandom Forest Accuracy score: ', round(accuracy_score(y_test, y_pred),3))

In [None]:
from sklearn.neural_network import MLPClassifier
# Define and fit the classifier
model = MLPClassifier(activation='tanh', random_state=5).fit(X_train, y_train)

# Show the confusion matrix
plot_confusion_matrix(model, X_test, y_test, cmap=plt.cm.OrRd_r)
plt.title('Confusion matrix for the MLPClassifier')
plt.show()
# Predict the X_test 
y_pred = model.predict(X_test)
print('\nAccuracy score: ', round(accuracy_score(y_test, y_pred),3))