Tuning hyperparameters as learnt from Abhishek Thakur's videos.
KFold created here: https://www.kaggle.com/rishirajacharya/tps-sep-2021-kfold
KFold dataset here: https://www.kaggle.com/rishirajacharya/tpssep21-folds

Please show some love if this was helpful.

In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier
import optuna

In [None]:
df = pd.read_csv("../input/tpssep21-folds/train_folds.csv")
df_test = pd.read_csv("../input/tabular-playground-series-sep-2021/test.csv")
sample_submission = pd.read_csv("../input/tabular-playground-series-sep-2021/sample_solution.csv")

useful_features = [c for c in df.columns if c not in ("id", "claim", "kfold")]
df_test = df_test[useful_features]

In [None]:
from sklearn.impute import SimpleImputer

# Imputation
my_imputer = SimpleImputer()
imputed_df = pd.DataFrame(my_imputer.fit_transform(df))
my_imputer = SimpleImputer()
imputed_df_test = pd.DataFrame(my_imputer.fit_transform(df_test))

# Imputation removed column names; put them back
imputed_df.columns = df.columns
imputed_df_test.columns = df_test.columns

df = imputed_df
df_test = imputed_df_test

In [None]:
def run(trial):
    fold = 0
    learning_rate = trial.suggest_float("learning_rate", 1e-2, 0.25, log=True)
    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
    reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
    subsample = trial.suggest_float("subsample", 0.1, 1.0)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.1, 1.0)
    max_depth = trial.suggest_int("max_depth", 1, 7)

    xtrain = df[df.kfold != fold].reset_index(drop=True)
    xvalid = df[df.kfold == fold].reset_index(drop=True)

    ytrain = xtrain.claim
    yvalid = xvalid.claim

    xtrain = xtrain[useful_features]
    xvalid = xvalid[useful_features]

    model = XGBClassifier(
        random_state=42,
        tree_method="gpu_hist",
        gpu_id=0,
        predictor="gpu_predictor",
        n_estimators=7000,
        learning_rate=learning_rate,
        reg_lambda=reg_lambda,
        reg_alpha=reg_alpha,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        max_depth=max_depth,
    )
    model.fit(xtrain, ytrain, early_stopping_rounds=300, eval_set=[(xvalid, yvalid)], verbose=1000)
    preds_valid = model.predict(xvalid)
    auc = roc_auc_score(yvalid, preds_valid)
    return auc

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(run, n_trials=5)

In [None]:
study.best_params