In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("../input/heart-attack-analysis-prediction-dataset/heart.csv")
data.head()

In [None]:
data.describe()

In [None]:
data.dtypes

In [None]:
corr_data = data.corr()
plt.figure(figsize=(15,8))
sns.heatmap(data=corr_data)

In [None]:
features = data.columns
features = features.drop("fbs")

In [None]:
print("Distplot of all the features are shown below:")

for feature in features:
    plt.figure(figsize=(12,5))
    sns.histplot(data[feature])  

In [None]:
print("Countplot of all the features are shown below:")
    
for feature in features:
    plt.figure(figsize=(12,5))
    sns.countplot(x = data[feature])

In [None]:
print("Boxplot of all the features are shown below:")
    
for feature in features:
    plt.figure(figsize=(12,5))
    sns.boxplot(x = data[feature])

In [None]:
plt.figure(figsize=(12,5))
sns.pairplot(data)

Zoom in to get a better view.

# Model Building

Feature Engineering

In [None]:
data.isnull().sum()

In [None]:
numeric_columns = data.select_dtypes(exclude="O")

for feature in numeric_columns:
    q1 = data[feature].quantile(0.05)
    q3 = data[feature].quantile(0.95)
    iqr = q3-q1
    Lower_tail = q1 - 1.5 * iqr
    Upper_tail = q3 + 1.5 * iqr
    med = np.mean(data[feature])
    for i in data[feature]:
        if i > Upper_tail or i < Lower_tail:
                data[feature] = data[feature].replace(i, med)

In [None]:
feature = data.columns
for col in feature:
    data["age_min"] = np.where(data["age"]<40,1,0)
    data["age_max"] = np.where(data["age"]>60,1,0)
    data["resting_BP_min"] = np.where(data["trtbps"]<120,1,0)
    data["resting_BP_max"] = np.where(data["trtbps"]>140,1,0)
    data["heart_rate_min"] = np.where(data["thalachh"]<120,1,0)
    data["heart_rate_max"] = np.where(data["thalachh"]>170,1,0)
    data["cholesterol_min"] = np.where(data["chol"]<200,1,0)
    data["cholesterol_max"] = np.where(data["chol"]>=300,1,0)

In [None]:
data

In [None]:
data.isnull().any()

In [None]:
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [None]:
model1 = XGBClassifier()
model2 = LogisticRegression()
model3 = RandomForestClassifier()

x_train, x_test, y_train, y_test = train_test_split(data.drop(columns=["output"]), data["output"], test_size=0.3)

model1.fit(x_train, y_train)
model2.fit(x_train, y_train)
model3.fit(x_train, y_train)

score1 = model1.score(x_test, y_test)
score2 = model2.score(x_test, y_test)
score3 = model2.score(x_test, y_test)

print("accuracy of XGBClassifier is ", score1*100)
print("accuracy of LogisticRegression is ", score2*100)
print("accuracy of RandomForestClassifier is ", score3*100)

In [None]:
data.columns

In [None]:
import eli5
from eli5.sklearn import PermutationImportance

perm = PermutationImportance(model3, random_state=1).fit(x_test, y_test)
eli5.show_weights(perm, feature_names = x_test.columns.tolist())

In [None]:
features = ['sex', 'cp', 'caa', 'output', "thall", "slp", "restecg", "age_min"]

data = data[features]

train_x, test_x, train_y, test_y = train_test_split(data.drop(columns=["output"]), data["output"], test_size=0.2)

test_model = RandomForestClassifier()

test_model.fit(train_x, train_y)

In [None]:
params = {
    "max_depth"         : [3,4,5,6,7,8],
    "n_estimators"      : [50, 70, 80, 90, 100, 200, 400, 600],
    "random_state"      : [1,2,3,4,5,6],
    "n_jobs"            : [1,2,3,4,5]
    }

from sklearn.model_selection import RandomizedSearchCV

random_search = RandomizedSearchCV(test_model, param_distributions=params, n_iter=5, cv=5)

random_search.fit(train_x, train_y)

In [None]:
random_search.best_estimator_

In [None]:
random_search.best_params_

In [None]:
final_model = RandomForestClassifier(max_depth=4, n_jobs=3, random_state=2)

final_model.fit(train_x, train_y)

final_score = final_model.score(test_x, test_y)

print("Accuracy of our model is ", final_score*100)