<a href="https://www.kaggle.com/rsizem2/tps-10-21-xgboost-optuna-starter-w-pruning?scriptVersionId=84910368" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# XGBoost Hyperparameter Search

In this notebook we optimize an XGBoost model using the optuna library along with a [pruner](https://optuna.readthedocs.io/en/stable/reference/pruners.html). For each set of parameters, we perform k-fold cross-validation and our pruner references past models trained on the same data and ends unpromising trials early (i.e. if the AUC on a given fold is too low).

In [1]:
# Global variables for testing changes to this notebook quickly
RANDOM_SEED = 0
NUM_FOLDS = 3
MAX_TREES = 20000
EARLY_STOP = 150
NUM_TRIALS = 50

In [2]:
# General imports
import numpy as np
import pandas as pd
import datatable as dt
import time
import gc

# Model and evaluation
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, train_test_split
from xgboost import XGBClassifier
import xgboost as xgb

# Optuna
import optuna
from optuna.visualization import plot_param_importances, plot_parallel_coordinate
from optuna.pruners import PercentilePruner

# Hide warnings (makes optuna output easier to parse)
import warnings
warnings.filterwarnings('ignore')

# Preparing the Data

1. Load data with `datatable` and convert to `pandas`
2. Reduce memory usage by downcasting datatypes
3. Get holdout set from training data using a stratified scheme

In [3]:
# Helper function for downcasting 
def reduce_memory_usage(df, verbose=True):
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col, dtype in df.dtypes.iteritems():
        if dtype.name.startswith('int'):
            df[col] = pd.to_numeric(df[col], downcast ='integer')
        elif dtype.name == 'bool':
            df[col] = df[col].astype('int8')
        elif dtype.name.startswith('float'):
            df[col] = pd.to_numeric(df[col], downcast ='float')
        
    end_mem = df.memory_usage().sum() / 1024 ** 2
    if verbose:
        print(
            "Mem. usage decreased to {:.2f} Mb ({:.1f}% reduction)".format(
                end_mem, 100 * (start_mem - end_mem) / start_mem
            )
        )
    return df

In [4]:
%%time

# Load training data
train = dt.fread(r'../input/tabular-playground-series-oct-2021/train.csv').to_pandas()
train = reduce_memory_usage(train)

# Holdout set for testing our models
train, holdout = train_test_split(
    train,
    test_size = 0.5,
    shuffle = True,
    stratify = train['target'],
    random_state = RANDOM_SEED,
)

train.reset_index(drop = True, inplace = True)
holdout.reset_index(drop = True, inplace = True)

# Get features
features = [x for x in train.columns if x not in ['id','target']]

Mem. usage decreased to 963.21 Mb (48.7% reduction)
CPU times: user 38.3 s, sys: 43.9 s, total: 1min 22s
Wall time: 1min 32s


# XGBoost

We create a function to train an XGBoost model and return the holdout AUC.

## 1. Default Parameters

In [5]:
# Default XGBoost params, used for ALL models considered
default_params = dict(            
    random_state = RANDOM_SEED,
    n_estimators = MAX_TREES,
    tree_method = 'gpu_hist',
    predictor = "gpu_predictor",
)

## 2. Scoring Function

* `model_params` - parameters passed to `XGBClassifier`
* `fit_params` - parameters passed to the `fit` method

In [6]:
def score_xgboost(trial = None, model_params = {}, fit_params = {}):
    
    # Store the holdout predictions
    holdout_preds = np.zeros((holdout.shape[0],))
    
    # Stratified k-fold cross-validation
    skf = StratifiedKFold(n_splits = NUM_FOLDS, shuffle = True, random_state = RANDOM_SEED)
    for fold, (train_idx, valid_idx) in enumerate(skf.split(train, train['target'])):
        
        # Training and Validation Sets
        start = time.time()
        X_train, y_train = train[features].iloc[train_idx], train['target'].iloc[train_idx]
        X_valid, y_valid = train[features].iloc[valid_idx], train['target'].iloc[valid_idx]
        
        # Define Model
        model = XGBClassifier(**default_params, **model_params)
        gc.collect()
        
        model.fit(
            X_train, y_train,
            verbose = False,
            eval_set = [(X_valid, y_valid)],
            eval_metric = "logloss",
            early_stopping_rounds = EARLY_STOP,
            **fit_params
        )
        
        # validation/holdout predictions
        valid_preds = model.predict_proba(X_valid)[:, 1]
        holdout_preds += model.predict_proba(holdout[features])[:, 1] / NUM_FOLDS
        valid_auc = roc_auc_score(y_valid, valid_preds)
        end = time.time()
        
        print(f'Fold {fold} AUC: {round(valid_auc, 6)} in {round((end-start) / 60, 2)} minutes.')
        
        time.sleep(0.5)
        if trial:
            # Use pruning on fold AUC
            trial.report(
                value = valid_auc,
                step = fold
            )
            # prune slow trials and bad fold AUCs
            if trial.should_prune():
                raise optuna.TrialPruned()
        
        
    return roc_auc_score(holdout['target'], holdout_preds)

# Hyperparameter Search

To tweak the pruner consider adding/adjusting the following keyword arguments:

* `percentile` - prunes trial if in lower percentile of trials at a given step
* `n_startup_trials` - number of trials (models trained) before pruning starts
* `n_warmup_steps` - number of iterations before pruning checks
* `interval_steps` - number of iterations between pruning checks
* `n_min_trials` - skip pruning check if too few trials

In [7]:
# Percentile Pruner settings
pruner = PercentilePruner(
    percentile = 66,
    n_startup_trials = 5,
    n_warmup_steps = 0,
    interval_steps = 1,
    n_min_trials = 5,
)

In [8]:
def parameter_search(trials):
    
    # Optuna objective function
    def objective(trial):
        model_params = dict( 
            # default 6
            max_depth = trial.suggest_int(
                "max_depth", 2, 12
            ), 
            # default 0.3
            learning_rate = trial.suggest_loguniform(
                "learning_rate", 0.01, 0.3
            ),
            # default 0
            gamma = trial.suggest_loguniform(
                "gamma", 1e-10, 100
            ), 
            # default 1
            min_child_weight = trial.suggest_loguniform(
                "min_child_weight", 1e-2, 1e2
            ),
            # default 1
            subsample = trial.suggest_discrete_uniform(
                "subsample", 0.2, 1.0, 0.01
            ),
            # default 1
            colsample_bytree = trial.suggest_discrete_uniform(
                "colsample_bytree",  0.2, 1.0, 0.01
            ),
            # default 1
            colsample_bylevel = trial.suggest_discrete_uniform(
                "colsample_bylevel",  0.2, 1.0, 0.01
            ),
            # default 1
            reg_lambda = trial.suggest_loguniform(
                "reg_lambda", 1e-10, 100
            ),
            # default 0
            reg_alpha = trial.suggest_loguniform(
                "reg_alpha", 1e-10, 100
            ),
        )
        
        return score_xgboost(trial, model_params)
    
    
    optuna.logging.set_verbosity(optuna.logging.DEBUG)
    study = optuna.create_study(pruner = pruner,direction = "maximize")
    
    # (nearly) defaults
    study.enqueue_trial({
        "max_depth": 6,
        'learning_rate': 0.3, 
        'gamma': 1e-10, 
        'min_child_weight': 1.0, 
        'subsample': 1.0,
        'colsample_bytree': 1.0,
        'colsample_bylevel': 1.0,
        'reg_alpha': 1e-10,
        'reg_lambda': 1.0,
    })
    # high auc from previous run
    study.enqueue_trial({
        'max_depth': 4, 
        'learning_rate': 0.010283092300598066, 
        'gamma': 0.03506917176837801,
        'min_child_weight': 0.3878531236460043, 
        'subsample': 0.8900000000000001, 
        'colsample_bytree': 0.69, 
        'colsample_bylevel': 0.24000000000000002, 
        'reg_lambda': 5.051637651463356e-07,
        'reg_alpha': 30.170712609605435
    })
    study.optimize(objective, n_trials=trials)
    return study

In [9]:
# Hide output
study = parameter_search(NUM_TRIALS)

[32m[I 2022-01-10 18:38:02,215][0m A new study created in memory with name: no-name-0f27a5f9-d1b6-4c49-b207-a7c8d98cd786[0m
[37m[D 2022-01-10 18:38:02,218][0m Trial 0 popped from the trial queue.[0m


Fold 0 AUC: 0.848514 in 0.3 minutes.
Fold 1 AUC: 0.84905 in 0.26 minutes.
Fold 2 AUC: 0.845638 in 0.24 minutes.


[32m[I 2022-01-10 18:38:52,038][0m Trial 0 finished with value: 0.8532821673538695 and parameters: {'max_depth': 6, 'learning_rate': 0.3, 'gamma': 1e-10, 'min_child_weight': 1.0, 'subsample': 1.0, 'colsample_bytree': 1.0, 'colsample_bylevel': 1.0, 'reg_lambda': 1.0, 'reg_alpha': 1e-10}. Best is trial 0 with value: 0.8532821673538695.[0m
[37m[D 2022-01-10 18:38:52,039][0m Trial 1 popped from the trial queue.[0m


Fold 0 AUC: 0.856565 in 4.1 minutes.
Fold 1 AUC: 0.85745 in 3.86 minutes.
Fold 2 AUC: 0.853458 in 3.88 minutes.


[32m[I 2022-01-10 18:50:43,770][0m Trial 1 finished with value: 0.857205048013703 and parameters: {'max_depth': 4, 'learning_rate': 0.010283092300598066, 'gamma': 0.03506917176837801, 'min_child_weight': 0.3878531236460043, 'subsample': 0.8900000000000001, 'colsample_bytree': 0.69, 'colsample_bylevel': 0.24000000000000002, 'reg_lambda': 5.051637651463356e-07, 'reg_alpha': 30.170712609605435}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.849412 in 0.58 minutes.
Fold 1 AUC: 0.849818 in 0.58 minutes.
Fold 2 AUC: 0.845579 in 0.56 minutes.


[32m[I 2022-01-10 18:52:28,515][0m Trial 2 finished with value: 0.8534373165232481 and parameters: {'max_depth': 9, 'learning_rate': 0.06879244476132373, 'gamma': 0.00030460284104585043, 'min_child_weight': 5.343929934611298, 'subsample': 0.52, 'colsample_bytree': 0.98, 'colsample_bylevel': 0.78, 'reg_lambda': 2.78317700571948e-07, 'reg_alpha': 1.317070785125409e-09}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856229 in 0.94 minutes.
Fold 1 AUC: 0.856968 in 1.02 minutes.
Fold 2 AUC: 0.852968 in 0.96 minutes.


[32m[I 2022-01-10 18:55:25,318][0m Trial 3 finished with value: 0.8569371944267177 and parameters: {'max_depth': 3, 'learning_rate': 0.03302465630525234, 'gamma': 0.018229667936082494, 'min_child_weight': 3.5955222712929418, 'subsample': 0.43000000000000005, 'colsample_bytree': 0.75, 'colsample_bylevel': 0.76, 'reg_lambda': 15.386496229172867, 'reg_alpha': 2.5871451195815325e-08}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.853466 in 0.4 minutes.
Fold 1 AUC: 0.854363 in 0.39 minutes.
Fold 2 AUC: 0.850541 in 0.4 minutes.


[32m[I 2022-01-10 18:56:39,071][0m Trial 4 finished with value: 0.8557396801766181 and parameters: {'max_depth': 6, 'learning_rate': 0.08903845049380787, 'gamma': 0.09761372676485479, 'min_child_weight': 7.426560206873671, 'subsample': 0.8, 'colsample_bytree': 0.63, 'colsample_bylevel': 0.8800000000000001, 'reg_lambda': 0.007270932544075969, 'reg_alpha': 9.6761897057865e-07}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856123 in 1.9 minutes.
Fold 1 AUC: 0.856752 in 1.91 minutes.
Fold 2 AUC: 0.852702 in 1.74 minutes.


[32m[I 2022-01-10 19:02:13,351][0m Trial 5 finished with value: 0.8568052146870495 and parameters: {'max_depth': 5, 'learning_rate': 0.011272274741194972, 'gamma': 2.544912527202475e-10, 'min_child_weight': 0.01670996695711299, 'subsample': 0.28, 'colsample_bytree': 0.64, 'colsample_bylevel': 0.9099999999999999, 'reg_lambda': 5.553983499419484e-10, 'reg_alpha': 6.762234559380432e-08}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.855627 in 1.45 minutes.
Fold 1 AUC: 0.856106 in 1.53 minutes.
Fold 2 AUC: 0.85241 in 1.45 minutes.


[32m[I 2022-01-10 19:06:40,775][0m Trial 6 finished with value: 0.8566457469696245 and parameters: {'max_depth': 8, 'learning_rate': 0.020592480615479185, 'gamma': 4.827706372267413e-08, 'min_child_weight': 97.70350149435765, 'subsample': 0.71, 'colsample_bytree': 0.8400000000000001, 'colsample_bylevel': 0.5, 'reg_lambda': 1.7892990845649068e-07, 'reg_alpha': 1.0748082185655805}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.842438 in 1.13 minutes.


[32m[I 2022-01-10 19:07:49,372][0m Trial 7 pruned. [0m


Fold 0 AUC: 0.851654 in 0.26 minutes.


[32m[I 2022-01-10 19:08:05,249][0m Trial 8 pruned. [0m


Fold 0 AUC: 0.84828 in 0.52 minutes.


[32m[I 2022-01-10 19:08:36,997][0m Trial 9 pruned. [0m


Fold 0 AUC: 0.85218 in 3.98 minutes.


[32m[I 2022-01-10 19:12:36,430][0m Trial 10 pruned. [0m


Fold 0 AUC: 0.856167 in 1.34 minutes.
Fold 1 AUC: 0.857025 in 1.18 minutes.
Fold 2 AUC: 0.85305 in 1.27 minutes.


[32m[I 2022-01-10 19:16:25,679][0m Trial 11 finished with value: 0.8567542995113406 and parameters: {'max_depth': 2, 'learning_rate': 0.03272449273065805, 'gamma': 0.0580428083388551, 'min_child_weight': 1.2337428364074827, 'subsample': 0.41000000000000003, 'colsample_bytree': 0.5, 'colsample_bylevel': 0.6799999999999999, 'reg_lambda': 44.97595245383759, 'reg_alpha': 0.00025541826576470125}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856132 in 1.01 minutes.
Fold 1 AUC: 0.856936 in 1.09 minutes.
Fold 2 AUC: 0.853006 in 1.07 minutes.


[32m[I 2022-01-10 19:19:37,132][0m Trial 12 finished with value: 0.8567574913223315 and parameters: {'max_depth': 3, 'learning_rate': 0.03133091864812565, 'gamma': 0.006104869121146163, 'min_child_weight': 0.2774030918195262, 'subsample': 0.43000000000000005, 'colsample_bytree': 0.5, 'colsample_bylevel': 0.2, 'reg_lambda': 3.293709769451695e-05, 'reg_alpha': 1.96360702832881e-06}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.855895 in 1.11 minutes.
Fold 1 AUC: 0.856828 in 1.07 minutes.
Fold 2 AUC: 0.852591 in 1.05 minutes.


[32m[I 2022-01-10 19:22:52,308][0m Trial 13 finished with value: 0.856628689313641 and parameters: {'max_depth': 4, 'learning_rate': 0.02022715431803866, 'gamma': 5.446971019126705e-06, 'min_child_weight': 7.103679531958222, 'subsample': 0.22, 'colsample_bytree': 0.8, 'colsample_bylevel': 0.63, 'reg_lambda': 2.655114003083757e-05, 'reg_alpha': 0.01274791203828496}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.855738 in 1.38 minutes.
Fold 1 AUC: 0.856602 in 1.37 minutes.
Fold 2 AUC: 0.852707 in 1.8 minutes.


[32m[I 2022-01-10 19:27:27,395][0m Trial 14 finished with value: 0.8563989865784039 and parameters: {'max_depth': 4, 'learning_rate': 0.04265556876461471, 'gamma': 18.86580019000732, 'min_child_weight': 1.821512367037229, 'subsample': 0.5700000000000001, 'colsample_bytree': 0.49, 'colsample_bylevel': 0.74, 'reg_lambda': 0.002039343416426235, 'reg_alpha': 5.630725485512221e-08}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856027 in 3.62 minutes.
Fold 1 AUC: 0.856783 in 3.31 minutes.
Fold 2 AUC: 0.852746 in 3.3 minutes.


[32m[I 2022-01-10 19:37:42,873][0m Trial 15 finished with value: 0.8564355200206368 and parameters: {'max_depth': 3, 'learning_rate': 0.01843745924152656, 'gamma': 8.262213920653986e-06, 'min_child_weight': 23.38493662314214, 'subsample': 0.39, 'colsample_bytree': 0.24000000000000002, 'colsample_bylevel': 0.31, 'reg_lambda': 1.1993974963950672e-08, 'reg_alpha': 89.18092727415969}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.855576 in 4.89 minutes.


[32m[I 2022-01-10 19:42:36,983][0m Trial 16 pruned. [0m


Fold 0 AUC: 0.854671 in 1.0 minutes.


[32m[I 2022-01-10 19:43:37,759][0m Trial 17 pruned. [0m


Fold 0 AUC: 0.855725 in 0.65 minutes.
Fold 1 AUC: 0.856598 in 0.6 minutes.
Fold 2 AUC: 0.85269 in 0.57 minutes.


[32m[I 2022-01-10 19:45:28,690][0m Trial 18 finished with value: 0.8567985407687959 and parameters: {'max_depth': 4, 'learning_rate': 0.05152652467521038, 'gamma': 0.008100595822439183, 'min_child_weight': 2.7096353120688743, 'subsample': 0.54, 'colsample_bytree': 0.56, 'colsample_bylevel': 0.32, 'reg_lambda': 5.193281518947832e-06, 'reg_alpha': 8.816281714216103e-09}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.855395 in 0.47 minutes.


[32m[I 2022-01-10 19:45:57,578][0m Trial 19 pruned. [0m


Fold 0 AUC: 0.855993 in 1.95 minutes.
Fold 1 AUC: 0.856997 in 2.08 minutes.
Fold 2 AUC: 0.852736 in 1.85 minutes.


[32m[I 2022-01-10 19:51:52,011][0m Trial 20 finished with value: 0.8569441877692707 and parameters: {'max_depth': 6, 'learning_rate': 0.011026575621996224, 'gamma': 0.7207161568371295, 'min_child_weight': 20.286540594915333, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.41000000000000003, 'colsample_bylevel': 0.74, 'reg_lambda': 2.4571407041421995e-06, 'reg_alpha': 1.556106121819934e-05}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856239 in 2.12 minutes.
Fold 1 AUC: 0.857008 in 2.07 minutes.
Fold 2 AUC: 0.853027 in 2.15 minutes.


[32m[I 2022-01-10 19:58:14,087][0m Trial 21 finished with value: 0.8569713557867276 and parameters: {'max_depth': 6, 'learning_rate': 0.010349200239541771, 'gamma': 0.6224700868957806, 'min_child_weight': 24.30123023371385, 'subsample': 0.33, 'colsample_bytree': 0.38, 'colsample_bylevel': 0.72, 'reg_lambda': 1.915413140734007e-06, 'reg_alpha': 8.170449559034687e-06}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.855897 in 2.0 minutes.
Fold 1 AUC: 0.856767 in 2.11 minutes.
Fold 2 AUC: 0.852668 in 2.19 minutes.


[32m[I 2022-01-10 20:04:33,900][0m Trial 22 finished with value: 0.8568230559597672 and parameters: {'max_depth': 7, 'learning_rate': 0.010362328216842211, 'gamma': 0.7019038096495663, 'min_child_weight': 19.457347103597996, 'subsample': 0.31, 'colsample_bytree': 0.4, 'colsample_bylevel': 0.67, 'reg_lambda': 2.0261419206067123e-06, 'reg_alpha': 5.039038032148258e-05}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.85556 in 1.51 minutes.


[32m[I 2022-01-10 20:06:05,233][0m Trial 23 pruned. [0m


Fold 0 AUC: 0.85505 in 1.97 minutes.


[32m[I 2022-01-10 20:08:04,157][0m Trial 24 pruned. [0m


Fold 0 AUC: 0.855723 in 0.98 minutes.


[32m[I 2022-01-10 20:09:03,477][0m Trial 25 pruned. [0m


Fold 0 AUC: 0.855343 in 2.74 minutes.


[32m[I 2022-01-10 20:11:48,268][0m Trial 26 pruned. [0m


Fold 0 AUC: 0.855913 in 1.37 minutes.
Fold 1 AUC: 0.856589 in 1.45 minutes.


[32m[I 2022-01-10 20:14:38,847][0m Trial 27 pruned. [0m


Fold 0 AUC: 0.848254 in 1.11 minutes.


[32m[I 2022-01-10 20:15:45,889][0m Trial 28 pruned. [0m


Fold 0 AUC: 0.856209 in 1.82 minutes.
Fold 1 AUC: 0.856948 in 2.09 minutes.
Fold 2 AUC: 0.852862 in 1.83 minutes.


[32m[I 2022-01-10 20:21:32,219][0m Trial 29 finished with value: 0.8569981893078715 and parameters: {'max_depth': 6, 'learning_rate': 0.017473290461865663, 'gamma': 0.4495422516432708, 'min_child_weight': 40.89059946487913, 'subsample': 1.0, 'colsample_bytree': 0.28, 'colsample_bylevel': 0.8600000000000001, 'reg_lambda': 3.3054786614803134e-08, 'reg_alpha': 0.18667768589557016}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856392 in 2.71 minutes.
Fold 1 AUC: 0.857084 in 2.76 minutes.
Fold 2 AUC: 0.853279 in 2.82 minutes.


[32m[I 2022-01-10 20:29:51,471][0m Trial 30 finished with value: 0.8571265700121671 and parameters: {'max_depth': 7, 'learning_rate': 0.01696319622840795, 'gamma': 0.09241758601351309, 'min_child_weight': 44.84116293736015, 'subsample': 1.0, 'colsample_bytree': 0.29000000000000004, 'colsample_bylevel': 0.99, 'reg_lambda': 4.7951960511519966e-08, 'reg_alpha': 72.8170606569486}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856392 in 3.14 minutes.
Fold 1 AUC: 0.8571 in 2.96 minutes.
Fold 2 AUC: 0.853181 in 3.12 minutes.


[32m[I 2022-01-10 20:39:06,668][0m Trial 31 finished with value: 0.8571600891315753 and parameters: {'max_depth': 7, 'learning_rate': 0.01652258524059627, 'gamma': 0.07036725804471833, 'min_child_weight': 45.01624964009418, 'subsample': 0.99, 'colsample_bytree': 0.28, 'colsample_bylevel': 0.98, 'reg_lambda': 2.6779582456290387e-09, 'reg_alpha': 68.17697136701538}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856136 in 1.28 minutes.
Fold 1 AUC: 0.856903 in 1.17 minutes.
Fold 2 AUC: 0.852963 in 1.16 minutes.


[32m[I 2022-01-10 20:42:45,158][0m Trial 32 finished with value: 0.8570303251859833 and parameters: {'max_depth': 7, 'learning_rate': 0.04535962850977234, 'gamma': 0.002082607338842231, 'min_child_weight': 47.469706137931745, 'subsample': 1.0, 'colsample_bytree': 0.26, 'colsample_bylevel': 0.95, 'reg_lambda': 1.5263653184449147e-09, 'reg_alpha': 64.20126822539743}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856042 in 1.37 minutes.
Fold 1 AUC: 0.856541 in 1.44 minutes.


[32m[I 2022-01-10 20:45:34,589][0m Trial 33 pruned. [0m


Fold 0 AUC: 0.85579 in 2.09 minutes.


[32m[I 2022-01-10 20:47:40,618][0m Trial 34 pruned. [0m


Fold 0 AUC: 0.853892 in 0.56 minutes.


[32m[I 2022-01-10 20:48:14,633][0m Trial 35 pruned. [0m


Fold 0 AUC: 0.854014 in 0.93 minutes.


[32m[I 2022-01-10 20:49:11,238][0m Trial 36 pruned. [0m


Fold 0 AUC: 0.854644 in 1.02 minutes.


[32m[I 2022-01-10 20:50:13,315][0m Trial 37 pruned. [0m


Fold 0 AUC: 0.855821 in 3.18 minutes.


[32m[I 2022-01-10 20:53:24,600][0m Trial 38 pruned. [0m


Fold 0 AUC: 0.855138 in 2.52 minutes.


[32m[I 2022-01-10 20:55:56,597][0m Trial 39 pruned. [0m


Fold 0 AUC: 0.855284 in 0.61 minutes.


[32m[I 2022-01-10 20:56:33,811][0m Trial 40 pruned. [0m


Fold 0 AUC: 0.855861 in 1.93 minutes.


[32m[I 2022-01-10 20:58:29,950][0m Trial 41 pruned. [0m


Fold 0 AUC: 0.856548 in 1.74 minutes.
Fold 1 AUC: 0.857267 in 1.77 minutes.
Fold 2 AUC: 0.853234 in 1.52 minutes.


[32m[I 2022-01-10 21:03:33,354][0m Trial 42 finished with value: 0.8572034951722479 and parameters: {'max_depth': 5, 'learning_rate': 0.021818876859233612, 'gamma': 3.830103934984407, 'min_child_weight': 54.23045451392181, 'subsample': 0.9000000000000001, 'colsample_bytree': 0.29000000000000004, 'colsample_bylevel': 0.8800000000000001, 'reg_lambda': 9.448901525244554e-10, 'reg_alpha': 18.750798356618432}. Best is trial 1 with value: 0.857205048013703.[0m


Fold 0 AUC: 0.856469 in 1.53 minutes.
Fold 1 AUC: 0.857331 in 1.49 minutes.
Fold 2 AUC: 0.853274 in 1.49 minutes.


[32m[I 2022-01-10 21:08:05,066][0m Trial 43 finished with value: 0.8572401149505615 and parameters: {'max_depth': 5, 'learning_rate': 0.026880720997682954, 'gamma': 2.5089080090119036, 'min_child_weight': 58.94694029769476, 'subsample': 0.9000000000000001, 'colsample_bytree': 0.23, 'colsample_bylevel': 0.9000000000000001, 'reg_lambda': 6.403194895991562e-10, 'reg_alpha': 25.092145873684277}. Best is trial 43 with value: 0.8572401149505615.[0m


Fold 0 AUC: 0.85662 in 2.8 minutes.
Fold 1 AUC: 0.857485 in 2.96 minutes.
Fold 2 AUC: 0.853523 in 2.92 minutes.


[32m[I 2022-01-10 21:16:47,482][0m Trial 44 finished with value: 0.857277651529734 and parameters: {'max_depth': 5, 'learning_rate': 0.012821160367713622, 'gamma': 3.048558277783227, 'min_child_weight': 67.70483838842425, 'subsample': 0.8900000000000001, 'colsample_bytree': 0.22, 'colsample_bylevel': 1.0, 'reg_lambda': 4.5259275709072366e-10, 'reg_alpha': 12.776367176386925}. Best is trial 44 with value: 0.857277651529734.[0m


Fold 0 AUC: 0.856469 in 1.63 minutes.
Fold 1 AUC: 0.857346 in 1.51 minutes.
Fold 2 AUC: 0.853361 in 1.51 minutes.


[32m[I 2022-01-10 21:21:28,654][0m Trial 45 finished with value: 0.8572309358699605 and parameters: {'max_depth': 4, 'learning_rate': 0.027156621923696878, 'gamma': 3.583586520346341, 'min_child_weight': 12.419697816704288, 'subsample': 0.8900000000000001, 'colsample_bytree': 0.22, 'colsample_bylevel': 0.9000000000000001, 'reg_lambda': 5.856561116773302e-10, 'reg_alpha': 16.79439576286433}. Best is trial 44 with value: 0.857277651529734.[0m


Fold 0 AUC: 0.856707 in 2.87 minutes.
Fold 1 AUC: 0.857562 in 2.95 minutes.
Fold 2 AUC: 0.853479 in 2.9 minutes.


[32m[I 2022-01-10 21:30:13,248][0m Trial 46 finished with value: 0.8572806188445146 and parameters: {'max_depth': 4, 'learning_rate': 0.012582311035292853, 'gamma': 4.423895023756934, 'min_child_weight': 10.986162188494852, 'subsample': 0.81, 'colsample_bytree': 0.22, 'colsample_bylevel': 0.9000000000000001, 'reg_lambda': 5.201700045225557e-10, 'reg_alpha': 7.9633143027509234}. Best is trial 46 with value: 0.8572806188445146.[0m


Fold 0 AUC: 0.856059 in 3.55 minutes.
Fold 1 AUC: 0.856951 in 3.8 minutes.
Fold 2 AUC: 0.852961 in 3.66 minutes.


[32m[I 2022-01-10 21:41:15,661][0m Trial 47 finished with value: 0.8565180568339759 and parameters: {'max_depth': 3, 'learning_rate': 0.0130615321624877, 'gamma': 13.818471527077508, 'min_child_weight': 12.868426142115025, 'subsample': 0.8300000000000001, 'colsample_bytree': 0.23, 'colsample_bylevel': 0.77, 'reg_lambda': 2.612328713182936e-10, 'reg_alpha': 1.3489202347655405}. Best is trial 46 with value: 0.8572806188445146.[0m


Fold 0 AUC: 0.853698 in 0.93 minutes.


[32m[I 2022-01-10 21:42:12,043][0m Trial 48 pruned. [0m


Fold 0 AUC: 0.856497 in 3.02 minutes.
Fold 1 AUC: 0.857411 in 3.26 minutes.
Fold 2 AUC: 0.853364 in 3.09 minutes.


[32m[I 2022-01-10 21:51:35,998][0m Trial 49 finished with value: 0.8571271277486897 and parameters: {'max_depth': 3, 'learning_rate': 0.01292012515933388, 'gamma': 2.951348738683981, 'min_child_weight': 4.344782548584235, 'subsample': 0.78, 'colsample_bytree': 0.36, 'colsample_bylevel': 0.42000000000000004, 'reg_lambda': 6.411832902042032e-09, 'reg_alpha': 0.0505835622207027}. Best is trial 46 with value: 0.8572806188445146.[0m


# Evaluation

## 1. Best Parameters

In [10]:
print("Best Parameters:", study.best_params)

Best Parameters: {'max_depth': 4, 'learning_rate': 0.012582311035292853, 'gamma': 4.423895023756934, 'min_child_weight': 10.986162188494852, 'subsample': 0.81, 'colsample_bytree': 0.22, 'colsample_bylevel': 0.9000000000000001, 'reg_lambda': 5.201700045225557e-10, 'reg_alpha': 7.9633143027509234}


## 2. Parameter Importances

In [11]:
plot_param_importances(study)

## 3. Parallel Coordinate Plot

Click on the vertical axes to see how certain parameter ranges affected the scores

In [12]:
# Likely broken on GitHub, view on Kaggle for interactive version
plot_parallel_coordinate(study)

# Make Submission

In [13]:
%%time
train = dt.fread(r'../input/tabular-playground-series-oct-2021/train.csv').to_pandas()
test = dt.fread(r'../input/tabular-playground-series-oct-2021/test.csv').to_pandas()
submission = dt.fread(r'../input/tabular-playground-series-oct-2021/sample_submission.csv').to_pandas()

train = reduce_memory_usage(train)
test = reduce_memory_usage(test)
gc.collect()

Mem. usage decreased to 963.21 Mb (48.7% reduction)
Mem. usage decreased to 481.13 Mb (48.8% reduction)
CPU times: user 57.5 s, sys: 1min 2s, total: 2min
Wall time: 1min 59s


37

In [14]:
# Similar to scoring function but trains on full data and predicts on test
def train_xgboost(folds, model_params = {}, fit_params = {}):
    
    # Store the holdout predictions
    test_preds = np.zeros((test.shape[0],))
    print('')
    
    # Stratified k-fold cross-validation
    skf = StratifiedKFold(n_splits = folds, shuffle = True, random_state = RANDOM_SEED)
    for fold, (train_idx, valid_idx) in enumerate(skf.split(train, train['target'])):
        
        # Training and Validation Sets
        start = time.time()
        X_train, y_train = train[features].iloc[train_idx], train['target'].iloc[train_idx]
        X_valid, y_valid = train[features].iloc[valid_idx], train['target'].iloc[valid_idx]
        
        # Define Model
        model = XGBClassifier(**default_params, **model_params)
        gc.collect()
        
        model.fit(
            X_train, y_train,
            verbose = False,
            eval_set = [(X_valid, y_valid)],
            eval_metric = "logloss",
            early_stopping_rounds = EARLY_STOP,
            **fit_params
        )
        
        # validation and test predictions
        valid_preds = model.predict_proba(X_valid)[:, 1]
        test_preds += model.predict_proba(test[features])[:, 1] / folds
        
        # fold auc score
        fold_auc = roc_auc_score(y_valid, valid_preds)
        end = time.time()
        print(f'Fold {fold} AUC: {round(fold_auc, 6)} in {round((end-start) / 60, 2)} minutes.')

        
    return test_preds

In [15]:
# Make submission
submission['target'] = train_xgboost(6, model_params = study.best_params)
submission.to_csv('xgboost_submission.csv', index=False)


Fold 0 AUC: 0.857229 in 8.04 minutes.
Fold 1 AUC: 0.857097 in 7.72 minutes.
Fold 2 AUC: 0.856949 in 7.73 minutes.
Fold 3 AUC: 0.857537 in 7.87 minutes.
Fold 4 AUC: 0.857272 in 7.81 minutes.
Fold 5 AUC: 0.856688 in 7.4 minutes.


Hope you found this notebook useful, feel free to fork it and adapt it to your own uses.