In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Load data using pandas
data = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')
data.head()

In [None]:
data.shape         

In [None]:
data.describe().T    # It describes the dataset statistically

In [None]:
 data.info()   #Basic info about dataset

In [None]:
data.isnull().sum()             # check null values

# EDA

## Univariate Analysis

In [None]:
for i in data.columns:
    sns.distplot(data[i])
    plt.xlabel(i)
    plt.ylabel('Count')
    plt.show()

## MULTIVARIATE 

In [None]:
sns.pairplot(data)

In [None]:
plt.figure(figsize=(12,10))
sns.heatmap(data.corr(),annot=True)     # It shows us the correlation between features using heatmap

In [None]:
# Divide the dataset into dependent and independent Features
X = data.drop(['target'],axis=1)
y = data['target']

In [None]:
#import libraries and split dataset
from sklearn.model_selection import train_test_split,cross_val_score,RandomizedSearchCV,GridSearchCV

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Models
- DecisionTreeClassifier
- RandomForestClassifier
- KNeighborsClassifier
- XGBClassifier
- CatBoostClassifier
- GaussianNB
- ExtraTreesClassifier
- AdaBoostClassifier
- LightGBMClassifier
- Tuned Xgboostclassifier
- Artificial Neural Network
- Tuned LGBMClassifier


In [None]:
models =[]
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


#Decision Tree

tr = DecisionTreeClassifier(criterion = 'entropy',random_state=1)
tr.fit(X_train,y_train)
tr_pred=tr.predict(X_test)
print('DecisionTreeClassifier: ',accuracy_score(y_test,tr_pred)*100)
models.append(['DecisionTreeClassifier',accuracy_score(y_test,tr_pred)*100])


In [None]:
#RandomForest
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=50, random_state=42)
rf.fit(X_train,y_train)
rf_pred=rf.predict(X_test)
models.append(['RandomForestClassifier: ',accuracy_score(y_test,rf_pred)*100])

In [None]:
# K nearest Neighbors
from sklearn.neighbors import KNeighborsClassifier

kn = KNeighborsClassifier(n_neighbors=6)
kn.fit(X_train,y_train)
kn_pred=kn.predict(X_test)
models.append(['KNeighborsClassifier: ',accuracy_score(y_test,kn_pred)*100])

In [None]:
#XGB Classifier

from xgboost import XGBClassifier

xgb = XGBClassifier()

xgb.fit(X_train,y_train)
xgb_pred = xgb.predict(X_test)
models.append(['XGBClassifier: ',accuracy_score(y_test,xgb_pred)*100])

In [None]:
#CatBoostClassifier

from catboost import CatBoostClassifier

cb = CatBoostClassifier(iterations=50, 
    learning_rate=0.01,depth = 3)
cb.fit(X_train,y_train)
cb_pred = cb.predict(X_test)
models.append(['CatBoostClassifier: ',accuracy_score(y_test,cb_pred)*100])

In [None]:
#GaussianNB
from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(X_train,y_train)
nb_pred = nb.predict(X_test)
models.append(['GaussianNB: ',accuracy_score(y_test,nb_pred)*100])

In [None]:
#ExtraTreesClassifier

from sklearn.ensemble import ExtraTreesClassifier

et = ExtraTreesClassifier()
et.fit(X_train,y_train)
et_pred = et.predict(X_test)
models.append(['ExtraTreeClassifier: ',accuracy_score(y_test,et_pred)*100])

In [None]:
#AdaboostClassifier
from sklearn.ensemble import AdaBoostClassifier

ab = AdaBoostClassifier()
ab.fit(X_train,y_train)
ab_pred = ab.predict(X_test)
models.append(['AdaBoostClassifier: ',accuracy_score(y_test,ab_pred)*100])

In [None]:
#LightGBM
from lightgbm import LGBMClassifier

lgbm = LGBMClassifier(learning_rate=0.01,n_estimators=40)
lgbm.fit(X_train,y_train)
lgbm_pred = lgbm.predict(X_test)
models.append(['LGBMClassifier: ',accuracy_score(y_test,lgbm_pred)*100])

In [None]:
# ANN

import keras
from keras.models import Sequential
from keras.layers import Activation,Dropout,Dense,Flatten

import warnings

In [None]:
model = Sequential()
model.add(Dense(128,activation='relu',input_dim=13))
model.add(Dropout(0.4))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(1,activation='sigmoid'))

model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])


In [None]:
model.fit(X_train,y_train,batch_size=50,epochs=90,validation_data=(X_test,y_test))

In [None]:
models.append(['ANN_Accuracy: ','61.0000000'])

In [None]:

# Parameter for hyperparameter tuning of XGBOOSTClassifier
params={
    "learning_rate" :[0.05,0.10,0.15,0.20,0.25,0.30],
    "max_depth" : [3,4,5,6,7,8,10,12,15],
    "min_child_weight": [1,3,5,7],
    "gamma": [0.0,0.1,0.2,0.3,0.4,0.5],
    "colsample_bytree" :[0.3,0.4,0.5,0.7,0.9]
}

random_search = RandomizedSearchCV(xgb,param_distributions=params,n_iter=5,scoring='roc_auc',cv=5,verbose=3,n_jobs=-1)

random_search.fit(X,y)

random_search.best_estimator_

In [None]:
classifier= XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.5, gamma=0.2, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.1, max_delta_step=0, max_depth=8,
              min_child_weight=5, monotone_constraints='()',
              n_estimators=100, n_jobs=4, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

score = cross_val_score(classifier,X,y,cv=5)

models.append(['Tunned Xgboost: ', score.mean()*100])

Here we can see there are two models having heighest score Catboost and LGBMClassifier.So we try hyperparameter tuning using LGBMClassifier.

In [None]:
# hyperparameter tuning of LGBMClassifier
params={
    "learning_rate" :[0.05,0.10,0.15,0.20,0.25,0.30],
    "max_depth" : [3,4,5,6,7,8,10,12,15],
    "min_child_weight": [1,3,5,7],
    "reg_alpha": [0.0,0.1,0.2,0.3,0.4,0.5],
    "reg_lambda": [0.0,0.1,0.2,0.3,0.4,0.5],
    "colsample_bytree" :[0.3,0.4,0.5,0.7,0.9]
}

lgbm_class = LGBMClassifier()
random_search_lgbm = RandomizedSearchCV(lgbm_class,param_distributions=params,n_iter=5,scoring='roc_auc',cv=5,verbose=3,n_jobs=-1)
random_search_lgbm.fit(X,y)

In [None]:
random_search_lgbm.best_estimator_


In [None]:
lgbm_classifiers = LGBMClassifier(colsample_bytree=0.3, learning_rate=0.05, max_depth=7,
               min_child_weight=3, reg_alpha=0.5, reg_lambda=0.5)

score = cross_val_score(lgbm_classifiers,X,y,cv=5)

In [None]:
models.append(['Tuned LGBMClassifier',score.mean()*100])


In [None]:
df = pd.DataFrame(models,columns=['Models','Scores'])
df

Hurray!!!
KNeighborsClassifier won with score of 90.1639