In [2]:
import sys 
sys.path.append('..')

In [3]:
import numpy as np
import pandas as pd

from preprocessing import Preprocessing
from sklearn.model_selection import StratifiedKFold, GridSearchCV


from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

In [7]:
col_drop = ['CustomerId','Surname']
types = {
    'HasCrCard':bool,
    'IsActiveMember':bool
}
train = pd.read_csv('../data/train.csv', index_col='id', dtype=types).drop(columns=col_drop)
train.drop_duplicates(inplace=True)

In [8]:
X = train.drop(columns='Exited')
y = train.Exited

In [9]:
prepro = Preprocessing()
X_scld = prepro.fit_transform(X).astype(float)

In [10]:
skf = StratifiedKFold(n_splits=4)

In [19]:
weight_target = y.value_counts(True).to_dict()

In [34]:
params = {
    "n_estimators" : [200,250,300],
    'max_depth':[16,24,32],
    'min_samples_split':[16,32,64,96,128],
    'min_samples_leaf':[8,12,16,24,32],
    'class_weight':[weight_target]

}
grid = GridSearchCV(
    RandomForestClassifier(),
    params,
    cv=skf,
    scoring='roc_auc',
    n_jobs = -1,
    verbose=10
)

In [35]:
grid.fit(X_scld,y)

Fitting 4 folds for each of 225 candidates, totalling 900 fits
[CV 1/4; 3/225] START class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=16, min_samples_leaf=8, min_samples_split=16, n_estimators=300
[CV 3/4; 2/225] START class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=16, min_samples_leaf=8, min_samples_split=16, n_estimators=250
[CV 1/4; 2/225] START class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=16, min_samples_leaf=8, min_samples_split=16, n_estimators=250
[CV 2/4; 2/225] START class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=16, min_samples_leaf=8, min_samples_split=16, n_estimators=250
[CV 2/4; 3/225] START class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=16, min_samples_leaf=8, min_samples_split=16, n_estimators=300
[CV 2/4; 1/225] START class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=16, min_samples_leaf=8, min_samples_split=16, n_estimators=200
[



[CV 3/4; 156/225] START class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=32, min_samples_leaf=8, min_samples_split=32, n_estimators=300
[CV 1/4; 153/225] END class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=32, min_samples_leaf=8, min_samples_split=16, n_estimators=300;, score=0.888 total time=  39.7s
[CV 4/4; 156/225] START class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=32, min_samples_leaf=8, min_samples_split=32, n_estimators=300
[CV 3/4; 153/225] END class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=32, min_samples_leaf=8, min_samples_split=16, n_estimators=300;, score=0.885 total time=  39.4s
[CV 1/4; 157/225] START class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=32, min_samples_leaf=8, min_samples_split=64, n_estimators=200
[CV 2/4; 153/225] END class_weight={0: 0.7883161220294583, 1: 0.2116838779705417}, max_depth=32, min_samples_leaf=8, min_samples_split=16, n_estimat

In [36]:
grid.best_params_

{'class_weight': {0: 0.7883161220294583, 1: 0.2116838779705417},
 'max_depth': 16,
 'min_samples_leaf': 16,
 'min_samples_split': 96,
 'n_estimators': 300}

In [37]:
grid.best_score_

0.8876261969735139

In [38]:
pd.DataFrame(grid.cv_results_).sort_values('rank_test_score').head(10)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_class_weight,param_max_depth,param_min_samples_leaf,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
41,29.618516,0.120538,1.278353,0.108288,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,16,96,300,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.890077,0.887582,0.886851,0.885995,0.887626,0.001522,1
71,28.748201,0.545755,1.341214,0.043963,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,32,96,300,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.8902,0.887563,0.886764,0.885888,0.887604,0.001612,2
39,22.314217,3.871946,0.928413,0.105115,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,16,96,200,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.890191,0.887669,0.88643,0.886074,0.887591,0.001613,3
53,30.312822,0.374262,1.42253,0.042093,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,24,64,300,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.890071,0.887691,0.886715,0.88586,0.887584,0.001575,4
26,30.743776,0.286997,1.284483,0.042156,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,12,96,300,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.890095,0.887655,0.886682,0.885881,0.887578,0.001583,5
62,31.318865,0.366837,1.368924,0.077719,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,32,16,300,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.890147,0.887446,0.886745,0.885931,0.887567,0.001583,6
23,30.955155,0.407414,1.392975,0.075213,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,12,64,300,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.890048,0.887467,0.886901,0.885846,0.887565,0.001547,7
74,27.931416,0.22564,1.214174,0.056346,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,32,128,300,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.890042,0.887603,0.886906,0.885704,0.887564,0.001584,8
27,22.859671,4.862737,0.914231,0.142312,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,12,128,200,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.889878,0.887625,0.886819,0.885928,0.887563,0.001465,9
59,30.576011,0.627541,1.357052,0.053655,"{0: 0.7883161220294583, 1: 0.2116838779705417}",16,24,128,300,"{'class_weight': {0: 0.7883161220294583, 1: 0....",0.889958,0.887499,0.886841,0.885927,0.887556,0.001495,10
