In [1]:
from stacking import *
import catboost as cb
import xgboost as xgb
import lightgbm as lgb

2024-07-31 07:36:35.794050: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-31 07:36:35.802238: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-31 07:36:35.812949: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-31 07:36:35.812969: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-31 07:36:35.821153: I tensorflow/core/platform/cpu_feature_gua

In [2]:
x_train_xgb, x_train_cb, x_valid_xgb, x_valid_cb, y_train, y_valid = load_train_valid_for_all()

In [3]:
train_predictions, valid_predictions = stack_prediction()

## LightGBM

In [29]:
lgbm_params = {
    'eval_metric': 'auc',
    'max_bin': 32767,
    'num_threads': 24,
    'bagging_freq': 5,
    'bagging_fraction': 0.75,
    'n_esimators': 1000,
    'early_stopping_rounds': 200,
    'verbosity': -1,
    'learning_rate': 0.0005,
}

In [30]:
model = lgb.LGBMClassifier(**lgbm_params)

In [31]:
log_callback = lgb.log_evaluation(period=100)
model.fit(train_predictions, y_train, eval_set=[(valid_predictions, y_valid)], eval_metric='auc', callbacks=[log_callback])

[100]	valid_0's auc: 0.88716	valid_0's binary_logloss: 0.356677


## XGBoost

In [9]:
xgb_params = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'device': 'cuda',
    'max_bin': 32767,
}

In [10]:
model = xgb.XGBRegressor(
    **xgb_params,
    n_estimators=1000, max_depth=5, learning_rate=0.3, random_state=SEED, n_jobs=4)

In [11]:
model.fit(train_predictions, y_train, eval_set=[(valid_predictions, y_valid)], verbose=100)

[0]	validation_0-auc:0.89096
[100]	validation_0-auc:0.88827
[200]	validation_0-auc:0.88809
[300]	validation_0-auc:0.88801
[400]	validation_0-auc:0.88795
[500]	validation_0-auc:0.88789
[600]	validation_0-auc:0.88787
[700]	validation_0-auc:0.88786
[800]	validation_0-auc:0.88783
[900]	validation_0-auc:0.88776
[999]	validation_0-auc:0.88773


## CatBoost

In [11]:
params = {
    **const_params,
    # First round of tuning
    'depth': 6,
    'l2_leaf_reg': 0.03705811087698245, 
    'bagging_temperature': 0.8095495431376838,

    # Second round of tuning
    'learning_rate': 0.00953078367830516, 
    'depth': 2, 
    'l2_leaf_reg': 0.0023690246370781133, 
    'random_strength': 3.924507179051416, 
    'min_data_in_leaf': 743, 
    'border_count': 115,

    # Params for training
    'learning_rate': 0.001,
    'n_estimators': 3000,
    'early_stopping_rounds': 300,
}

In [18]:
dataset = cb.Pool(
    data=train_predictions,
    label=y_train,
)

In [15]:
model = cb.CatBoostClassifier(**params)
model.fit(dataset, verbose=100, eval_set=(valid_predictions, y_valid))

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.7572985	best: 0.7572985 (0)	total: 39.1ms	remaining: 1m 57s
100:	test: 0.8811986	best: 0.8812006 (97)	total: 3.81s	remaining: 1m 49s
200:	test: 0.8843971	best: 0.8843998 (193)	total: 7.59s	remaining: 1m 45s
300:	test: 0.8863511	best: 0.8863513 (298)	total: 11.3s	remaining: 1m 41s
400:	test: 0.8881707	best: 0.8881734 (373)	total: 15.1s	remaining: 1m 37s
500:	test: 0.8904568	best: 0.8904580 (492)	total: 18.8s	remaining: 1m 33s
600:	test: 0.8904825	best: 0.8904825 (599)	total: 22.5s	remaining: 1m 29s
700:	test: 0.8912564	best: 0.8912565 (697)	total: 26.3s	remaining: 1m 26s
800:	test: 0.8912984	best: 0.8912996 (768)	total: 30s	remaining: 1m 22s
900:	test: 0.8923662	best: 0.8923669 (898)	total: 33.7s	remaining: 1m 18s
1000:	test: 0.8924890	best: 0.8924890 (999)	total: 37.5s	remaining: 1m 14s
1100:	test: 0.8925404	best: 0.8925411 (1092)	total: 41.2s	remaining: 1m 11s
1200:	test: 0.8925586	best: 0.8925586 (1200)	total: 45s	remaining: 1m 7s
1300:	test: 0.8929563	best: 0.8929563 (129

<catboost.core.CatBoostClassifier at 0x7fd7fe969ba0>

In [7]:
import pprint
pprint.pprint(model.best_iteration_)
pprint.pprint(model.best_score_)

229
{'learn': {'Logloss': 0.20830323683107735},
 'validation': {'AUC': 0.8935578167438507, 'Logloss': 0.24694435344902518}}
