In [2]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [7]:
x = df.drop(['output'], axis=1)
y = df.output
y.head()

0    1
1    1
2    1
3    1
4    1
Name: output, dtype: int64

In [8]:
from sklearn.tree          import DecisionTreeClassifier
from sklearn.ensemble      import RandomForestClassifier
from sklearn.ensemble      import ExtraTreesClassifier
from sklearn.ensemble      import AdaBoostClassifier
from sklearn.ensemble      import GradientBoostingClassifier
from sklearn.experimental  import enable_hist_gradient_boosting # Necesary for HistGradientBoostingClassifier
from sklearn.ensemble      import HistGradientBoostingClassifier
from xgboost               import XGBClassifier
from lightgbm              import LGBMClassifier
from catboost              import CatBoostClassifier

In [9]:
tree_classifiers = {
  "Decision Tree": DecisionTreeClassifier(),
  "Extra Trees": ExtraTreesClassifier(n_estimators=100),
  "Random Forest": RandomForestClassifier(n_estimators=100),
  "AdaBoost": AdaBoostClassifier(n_estimators=100),
  "Skl GBM": GradientBoostingClassifier(n_estimators=100),
  "Skl HistGBM": HistGradientBoostingClassifier(max_iter=100),
  "XGBoost": XGBClassifier(n_estimators=100),
  "LightGBM": LGBMClassifier(n_estimators=100),
  "CatBoost":  CatBoostClassifier(n_estimators=100),
}

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, balanced_accuracy_score
import time

In [12]:
x_train, x_val, y_train, y_val = train_test_split(x,y, test_size=0.2, random_state=0, stratify=y)

results = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})

for model_name, model in tree_classifiers.items():
    start_time = time.time()
    
    # FOR EVERY PIPELINE (PREPRO + MODEL) -> TRAIN WITH TRAIN DATA (x_train)
    model.fit(x_train, y_train)
    
    # GET PREDICTIONS USING x_val
    pred = model.predict(x_val)

    total_time = time.time() - start_time

    results = results.append({"Model":    model_name,
                              "Accuracy": accuracy_score(y_val, pred)*100,
                              "Bal Acc.": balanced_accuracy_score(y_val, pred)*100,
                              "Time":     total_time},
                              ignore_index=True)
                              
                              
results_ord = results.sort_values(by=['Accuracy'], ascending=False, ignore_index=True)
results_ord.index += 1 
results_ord.style.bar(subset=['Accuracy', 'Bal Acc.'], vmin=0, vmax=100, color='#5fba7d')



Learning rate set to 0.046433
0:	learn: 0.6790821	total: 620ms	remaining: 1m 1s
1:	learn: 0.6657594	total: 621ms	remaining: 30.4s
2:	learn: 0.6543576	total: 622ms	remaining: 20.1s
3:	learn: 0.6444219	total: 623ms	remaining: 15s
4:	learn: 0.6329162	total: 624ms	remaining: 11.9s
5:	learn: 0.6221862	total: 625ms	remaining: 9.79s
6:	learn: 0.6107582	total: 626ms	remaining: 8.32s
7:	learn: 0.6009203	total: 627ms	remaining: 7.21s
8:	learn: 0.5915528	total: 628ms	remaining: 6.35s
9:	learn: 0.5844890	total: 629ms	remaining: 5.66s
10:	learn: 0.5774758	total: 630ms	remaining: 5.1s
11:	learn: 0.5699803	total: 632ms	remaining: 4.63s
12:	learn: 0.5634174	total: 633ms	remaining: 4.24s
13:	learn: 0.5535252	total: 634ms	remaining: 3.9s
14:	learn: 0.5457229	total: 635ms	remaining: 3.6s
15:	learn: 0.5388175	total: 636ms	remaining: 3.34s
16:	learn: 0.5319404	total: 637ms	remaining: 3.11s
17:	learn: 0.5259541	total: 638ms	remaining: 2.91s
18:	learn: 0.5191738	total: 639ms	remaining: 2.72s
19:	learn: 0.511

Unnamed: 0,Model,Accuracy,Bal Acc.,Time
1,CatBoost,88.52459,88.041126,4.280207
2,XGBoost,86.885246,86.525974,0.364027
3,Extra Trees,85.245902,85.010823,0.141412
4,Skl GBM,85.245902,85.010823,0.177526
5,Decision Tree,81.967213,81.709957,0.223002
6,Random Forest,81.967213,81.709957,0.171542
7,Skl HistGBM,80.327869,79.924242,0.511632
8,LightGBM,80.327869,79.924242,0.878973
9,AdaBoost,78.688525,78.138528,0.291932
