In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
from cycler import cycler
import matplotlib as mpl

raw_light_palette = [
    (0, 122, 255), # Blue
    (255, 149, 0), # Orange
    (52, 199, 89), # Green
    (255, 59, 48), # Red
    (175, 82, 222),# Purple
    (255, 45, 85), # Pink
    (88, 86, 214), # Indigo
    (90, 200, 250),# Teal
    (255, 204, 0)  # Yellow
]

raw_dark_palette = [
    (10, 132, 255), # Blue
    (255, 159, 10), # Orange
    (48, 209, 88),  # Green
    (255, 69, 58),  # Red
    (191, 90, 242), # Purple
    (94, 92, 230),  # Indigo
    (255, 55, 95),  # Pink
    (100, 210, 255),# Teal
    (255, 214, 10)  # Yellow
]

raw_gray_light_palette = [
    (142, 142, 147),# Gray
    (174, 174, 178),# Gray (2)
    (199, 199, 204),# Gray (3)
    (209, 209, 214),# Gray (4)
    (229, 229, 234),# Gray (5)
    (242, 242, 247),# Gray (6)
]

raw_gray_dark_palette = [
    (142, 142, 147),# Gray
    (99, 99, 102),  # Gray (2)
    (72, 72, 74),   # Gray (3)
    (58, 58, 60),   # Gray (4)
    (44, 44, 46),   # Gray (5)
    (28, 28, 39),   # Gray (6)
]

light_palette = np.array(raw_light_palette)/255
dark_palette = np.array(raw_dark_palette)/255
gray_light_palette = np.array(raw_gray_light_palette)/255
gray_dark_palette = np.array(raw_gray_dark_palette)/255

mpl.rcParams['axes.prop_cycle'] = cycler('color',dark_palette)
mpl.rcParams['figure.facecolor']  = gray_dark_palette[-2]
mpl.rcParams['figure.edgecolor']  = gray_dark_palette[-2]
mpl.rcParams['axes.facecolor'] =  gray_dark_palette[-2]

white_color = gray_light_palette[-2]
mpl.rcParams['text.color'] = white_color
mpl.rcParams['axes.labelcolor'] = white_color
mpl.rcParams['axes.edgecolor'] = white_color
mpl.rcParams['xtick.color'] = white_color
mpl.rcParams['ytick.color'] = white_color

mpl.rcParams['figure.dpi'] = 200

mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False


In [None]:
data = pd.read_csv(r'../input/heart-attack-analysis-prediction-dataset/heart.csv')

In [None]:
data.head()

About this dataset
* Age : Age of the patient

* Sex : Sex of the patient

* exang: exercise induced angina (1 = yes; 0 = no)

* ca: number of major vessels (0-3)

* cp : Chest Pain type chest pain type
      * Value 1: typical angina
      * Value 2: atypical angina
      * Value 3: non-anginal pain
      * Value 4: asymptomatic
* trtbps : resting blood pressure (in mm Hg)

* chol : cholestoral in mg/dl fetched via BMI sensor

* fbs : (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)

* rest_ecg : resting electrocardiographic results
      * Value 0: normal
      * Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
      * Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria
* thalach : maximum heart rate achieved

* target : 0= less chance of heart attack 1= more chance of heart attack

In [None]:
data.shape

In [None]:
data.dtypes

In [None]:
data.nunique()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

### Data Visualization

In [None]:
sns.countplot(x='sex',data=data,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0) 
plt.title('"Distribution of sex\naccording to target variable')
plt.show()

In [None]:
plt.figure(figsize=(14,6))
plt.subplot(121)
sns.kdeplot(x='age',hue='output',data=data,fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0)
plt.title('"Distribution of age\naccording to target variable')

plt.subplot(122)
sns.kdeplot(x='trtbps',hue='output',data=data,fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0) 
plt.title('"Distribution of trtbps\naccording to target variable')
plt.show()

In [None]:
plt.figure(figsize=(14,7))
plt.subplot(121)
sns.kdeplot(x='chol',hue='output',data=data,fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0) 
plt.title('"Distribution of chol\naccording to target variable')

plt.subplot(122)
sns.kdeplot(x='oldpeak',hue='output',data=data,fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0) 
plt.title('"Distribution of oldpeak\naccording to target variable')
plt.show()

In [None]:
sns.catplot(x='cp',y='thalachh',hue='output',kind='swarm',data=data)
plt.show()

Higher the chest pain and higher the heart rate, very likely is person to suffer from heart attack.

### Model building

In [None]:
X = data.drop('output',axis=1)
y = data['output']

In [None]:
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import accuracy_score

def cross_val(X, y, model, params, folds=5):

    skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=21)
    for fold, (train_idx, test_idx) in enumerate(skf.split(X, y)):
        print(f"Fold: {fold}")
        x_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        x_test, y_test = X.iloc[test_idx], y.iloc[test_idx]

        alg = model(**params)
        alg.fit(x_train, y_train,
                eval_set=[(x_test, y_test)],
                early_stopping_rounds=100,
                verbose=400)

        pred = alg.predict(x_test)
        accuracy = accuracy_score(y_test, pred)
        print(f" accuracy_score: {accuracy}")
        print("-"*50)
    
    return alg

In [None]:
catboost_params= {"iterations": 1600,
                  "learning_rate": 0.033,
                  "depth": 6,
                  "bagging_temperature": 0.3,
                  "metric_period":200,
                  "od_type":'Iter',
                  "od_wait": 400,
                  "random_seed":17,
                  "allow_writing_files":False}

In [None]:
from catboost import CatBoostClassifier
cat_model = cross_val(X, y, CatBoostClassifier, catboost_params)

#### if you like this notebook please upvote it.
#### thank you