In [2]:
from skopt import BayesSearchCV
from datetime import datetime as dt

import numpy as np
import pandas as pd

from preprocessing import Preprocessing
from sklearn.model_selection import StratifiedKFold, GridSearchCV


from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

In [3]:
col_drop = ['CustomerId','Surname']
types = {
    'HasCrCard':bool,
    'IsActiveMember':bool
}
train = pd.read_csv('data/train.csv', index_col='id', dtype=types).drop(columns=col_drop)
train.drop_duplicates(inplace=True)

In [4]:
X = train.drop(columns='Exited')
y = train.Exited

In [5]:
prepro = Preprocessing()
X_scld = prepro.fit_transform(X).astype(float)

In [6]:
skf = StratifiedKFold(n_splits=5)

In [7]:
params = {
    "n_estimators" : [100,200,400,500,800],
    'max_depth':[8,16,32],
    'min_samples_split':[8,16,32,64],
    'min_samples_leaf':[8,16,32],

}
grid = GridSearchCV(
    RandomForestClassifier(),
    params,
    cv=skf,
    scoring='roc_auc',
    n_jobs = -1,
    verbose=10
)

In [8]:
grid.fit(X_scld,y)

Fitting 5 folds for each of 180 candidates, totalling 900 fits
[CV 3/5; 1/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=100
[CV 4/5; 1/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=100
[CV 2/5; 1/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=100
[CV 4/5; 2/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=200
[CV 1/5; 3/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=400
[CV 5/5; 1/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=100
[CV 1/5; 1/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=100
[CV 2/5; 3/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=400
[CV 3/5; 2/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=200
[CV 5/5; 2/180] START max_depth=8, min_samples_leaf=8, min_samples_split=8, n_estimators=200
[CV 2/5

KeyboardInterrupt: 

In [None]:
grid.best_params_

{'criterion': 'gini',
 'max_depth': 16,
 'min_samples_leaf': 16,
 'min_samples_split': 32,
 'n_estimators': 500}

In [None]:
grid.best_score_

0.8874574135987047