In [1]:
import sys 
sys.path.append('..')

In [2]:
import numpy as np
import pandas as pd

from preprocessing import Preprocessing
from sklearn.model_selection import StratifiedKFold, GridSearchCV


from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

In [3]:
col_drop = ['CustomerId','Surname']
types = {
    'HasCrCard':bool,
    'IsActiveMember':bool
}
train = pd.read_csv('../data/train.csv', index_col='id', dtype=types).drop(columns=col_drop)
train.drop_duplicates(inplace=True)

In [4]:
X = train.drop(columns='Exited')
y = train.Exited

In [5]:
prepro = Preprocessing()
X_scld = prepro.fit_transform(X).astype(float)

In [6]:
skf = StratifiedKFold(n_splits=4)

In [7]:
weight_target = y.value_counts(True).to_dict()

In [8]:
params = {
    'depth': [6, 8, 10],
    'learning_rate': [0.01, 0.05, 0.1],
    'l2_leaf_reg': [1, 3, 5],
    'iterations': [100, 200, 300],
    'class_weights':[weight_target, None]
}
grid = GridSearchCV(
    CatBoostClassifier(verbose=False),
    params,
    cv=skf,
    scoring='roc_auc',
    n_jobs = -1,
    verbose=10
)

In [9]:
grid.fit(X_scld,y)

Fitting 4 folds for each of 162 candidates, totalling 648 fits
[CV 2/4; 1/162] START class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=6, iterations=100, l2_leaf_reg=1, learning_rate=0.01
[CV 2/4; 2/162] START class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=6, iterations=100, l2_leaf_reg=1, learning_rate=0.05
[CV 4/4; 1/162] START class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=6, iterations=100, l2_leaf_reg=1, learning_rate=0.01
[CV 3/4; 1/162] START class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=6, iterations=100, l2_leaf_reg=1, learning_rate=0.01
[CV 1/4; 1/162] START class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=6, iterations=100, l2_leaf_reg=1, learning_rate=0.01
[CV 3/4; 2/162] START class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=6, iterations=100, l2_leaf_reg=1, learning_rate=0.05
[CV 4/4; 2/162] START class_weights={0: 0.7883161220294583, 1: 0.21168387797054



[CV 1/4; 60/162] END class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=10, iterations=100, l2_leaf_reg=3, learning_rate=0.1;, score=0.891 total time=   3.6s
[CV 4/4; 59/162] END class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=10, iterations=100, l2_leaf_reg=3, learning_rate=0.05;, score=0.887 total time=   3.6s
[CV 4/4; 62/162] START class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=10, iterations=100, l2_leaf_reg=5, learning_rate=0.05
[CV 1/4; 63/162] START class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=10, iterations=100, l2_leaf_reg=5, learning_rate=0.1
[CV 4/4; 60/162] END class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=10, iterations=100, l2_leaf_reg=3, learning_rate=0.1;, score=0.888 total time=   3.7s
[CV 2/4; 63/162] START class_weights={0: 0.7883161220294583, 1: 0.2116838779705417}, depth=10, iterations=100, l2_leaf_reg=5, learning_rate=0.1
[CV 2/4; 60/162] END class_weights={0: 0.

324 fits failed out of a total of 648.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
324 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/marc/Library/Mobile Documents/com~apple~CloudDocs/Projects/Playground/playgroundseries_s4e1/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/marc/Library/Mobile Documents/com~apple~CloudDocs/Projects/Playground/playgroundseries_s4e1/.venv/lib/python3.11/site-packages/catboost/core.py", line 5100, in fit
    self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
  F

In [10]:
grid.best_params_

{'class_weights': {0: 0.7883161220294583, 1: 0.2116838779705417},
 'depth': 6,
 'iterations': 300,
 'l2_leaf_reg': 5,
 'learning_rate': 0.1}

In [11]:
grid.best_score_

0.8894957732349174

In [12]:
pd.DataFrame(grid.cv_results_).sort_values('rank_test_score').head(10)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_class_weights,param_depth,param_iterations,param_l2_leaf_reg,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
26,11.271042,0.72661,0.060702,0.017993,"{0: 0.7883161220294583, 1: 0.2116838779705417}",6,300,5,0.1,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.891411,0.888888,0.889035,0.888649,0.889496,0.001114,1
20,10.731468,0.331531,0.066911,0.019382,"{0: 0.7883161220294583, 1: 0.2116838779705417}",6,300,1,0.1,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.891285,0.889055,0.889157,0.88848,0.889494,0.001066,2
17,7.532519,0.311285,0.038064,0.013295,"{0: 0.7883161220294583, 1: 0.2116838779705417}",6,200,5,0.1,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.891288,0.888977,0.889048,0.888657,0.889492,0.001047,3
11,7.18749,0.289776,0.044153,0.009796,"{0: 0.7883161220294583, 1: 0.2116838779705417}",6,200,1,0.1,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.891099,0.889166,0.889089,0.888462,0.889454,0.000988,4
52,12.94109,1.097131,0.043928,0.019233,"{0: 0.7883161220294583, 1: 0.2116838779705417}",8,300,5,0.05,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.89131,0.888952,0.8892,0.888168,0.889408,0.001163,5
22,10.746091,1.474843,0.032208,0.018264,"{0: 0.7883161220294583, 1: 0.2116838779705417}",6,300,3,0.05,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.89116,0.889052,0.889016,0.888381,0.889402,0.001049,6
23,10.998767,1.628235,0.05339,0.02277,"{0: 0.7883161220294583, 1: 0.2116838779705417}",6,300,3,0.1,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.891422,0.888848,0.888932,0.888405,0.889402,0.001183,7
19,10.57918,0.380656,0.042001,0.019105,"{0: 0.7883161220294583, 1: 0.2116838779705417}",6,300,1,0.05,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.891271,0.888852,0.889045,0.888263,0.889358,0.001142,8
25,9.8187,1.21886,0.055868,0.010291,"{0: 0.7883161220294583, 1: 0.2116838779705417}",6,300,5,0.05,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.891138,0.888854,0.889129,0.888221,0.889336,0.001092,9
14,7.3159,0.718169,0.033867,0.002643,"{0: 0.7883161220294583, 1: 0.2116838779705417}",6,200,3,0.1,"{'class_weights': {0: 0.7883161220294583, 1: 0...",0.891247,0.888741,0.889004,0.888307,0.889324,0.001137,10
