### `CatBoost` vs `Light GBM` vs `XGBoost` vs `RandomForest` vs `AdaBoost` vs ...

In [None]:
import pandas as pd
import numpy as np
from time import time

from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn import linear_model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

In [None]:
data = pd.read_csv('/kaggle/input/college-data/data.csv')
data.head()

X = data.drop('private', axis=1)
y = data.private

encoder = LabelEncoder()
y = encoder.fit_transform(y)
model_accuracies_and_speed = {}

In [None]:
models_under_test = {
    "XGB          " : XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    "LGBM         " : LGBMClassifier(silent=True, random_state=42),
    "CatBoost     " : CatBoostClassifier(verbose=False, random_state=42),
    "RandomForest " : RandomForestClassifier(random_state=42),
    "AdaBoost     " : AdaBoostClassifier(random_state=42),
    "sk-GBM       " : GradientBoostingClassifier(random_state=42),
    "KNN          " : KNeighborsClassifier(),
    "SDG          " : linear_model.SGDClassifier(random_state=42)
}

In [None]:
def print_and_get_accuracy(name, model, data, labels, model_accuracies_and_speed):
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=42)
    if (refresh) or (name not in model_accuracies_and_speed):
        start = time()
        accuracies = cross_val_score(model, data, labels, cv=cv, scoring='accuracy')
        speed = np.round(time() - start, 2)
    else:
        accuracies = model_accuracies_and_speed[name][0]
        speed = model_accuracies_and_speed[name][1]
        
    print(f"{name} : Score {np.round(accuracies.mean()*100,2)}, Speed {speed}s")
    
    return (accuracies, speed)

In [None]:
refresh = False
for name, model in models_under_test.items():
    model_accuracies_and_speed[name] = print_and_get_accuracy(name, model, X, y, model_accuracies_and_speed)