<a href="https://www.kaggle.com/code/mmellinger66/s3e10-pulsar-models?scriptVersionId=122589216" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

 <div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Playground Season 3: Episode 10 - Pulsar Models</h1>
</div>

## Problem Type

Binary Classification

## Evaluation Metric

LogLoss

$$
LogLoss = \frac{1}{n} \sum_{i=1}^n [y_i log(\hat{y}_i) + (1 - y_i)log(1-\hat{y}_i)]
$$
```python
```

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Import Libraries</h1>
</div>

In [1]:
from typing import List, Set, Dict, Tuple, Optional

import os
import time
from pathlib import Path
import glob
import gc

import pandas as pd
import numpy as np

from sklearn import cluster
from sklearn import datasets
from sklearn import decomposition
from sklearn import ensemble
from sklearn import impute
from sklearn import linear_model
from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing
from sklearn import svm


import xgboost as xgb
import catboost as cb
import lightgbm as lgb

import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances

from scipy.special import boxcox1p
from scipy.stats import boxcox_normmax

# Visualization Libraries
import matplotlib as mpl
import matplotlib.pylab as plt
import seaborn as sns
import missingno as msno
from folium import Map
from folium.plugins import HeatMap
from IPython.display import display_html, display_markdown, display_latex
from colorama import Fore, Style

import warnings
warnings.filterwarnings('ignore')

pd.set_option("display.max_rows", 999)
pd.set_option("display.precision", 5)

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Configuration</h1>
</div>

In [2]:
TARGET="Class"
ID="id"

# Optuna
objective_direction = "minimize" # log_loss , auc , minimize, maximize

In [3]:
class Config:
    ensemble_models:bool = True
    optimize:bool = True
    N_ESTIMATORS:int = 500  # 100, 300, 1000, 2000, 5000, 15_000, 20_000 GBDT

    path:str = "../input/playground-series-s3e10/"
    load_original_data:bool = False # Some Competitions use synthetic data, based on real data
    original_data_path:str = "../input/pulsar-classification-for-class-prediction/Pulsar.csv"

    gpu:bool = False
    n_optuna_trials:int = 50 # 5, 10, 30, 50, 100
    fast_render:bool = False
    calc_probability:bool = True
    debug:bool = False
    seed:int = 42
    GPU_N_ESTIMATORS:int = 2000 # Want models to run fast during dev
    N_FOLDS:int = 20
        

In [4]:
class clr:
    S = Style.BRIGHT + Fore.LIGHTRED_EX
    E = Style.RESET_ALL

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Library</h1>
</div>

In [5]:
def read_data(path: str, analyze:bool=True) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
    data_dir = Path(path)

    train = pd.read_csv(data_dir / "train.csv")
    test = pd.read_csv(data_dir / "test.csv")
    submission_df = pd.read_csv(data_dir / "sample_submission.csv")

    if analyze:
        print(clr.S + "=== Shape of Data ==="+clr.E)
        print(f" train data: Rows={train.shape[0]}, Columns={train.shape[1]}")
        print(f" test data : Rows={test.shape[0]}, Columns={test.shape[1]}")

        print(clr.S + "\n=== Train Data: First 5 Rows ===\n"+clr.E)
        display(train.head())
        print(f"\n{clr.S}=== Train Column Names ==={clr.E}\n")
        display(train.columns)
        print(f"\n{clr.S}=== Features/Explanatory Variables ==={clr.E}\n")
        eval_features(train)
        print(f"\n{clr.S}=== Skewness ==={clr.E}\n")
        check_skew(train)
    return train, test, submission_df

def create_submission(model_name: str, target, preds, seed:int=42, nfolds:int=5) -> pd.DataFrame:
    sample_submission[target] = preds #.astype(int)

    if len(model_name) > 0:
        fname = f"submission_{model_name}_k{nfolds}_s{seed}.csv"
    else:
        fname = "submission.csv"

    sample_submission.to_csv(fname, index=False)

    return sample_submission

def show_classification_scores(ground_truth:List[int], yhat:List[int]) -> None:
    accuracy = metrics.accuracy_score(ground_truth, yhat)
    precision = metrics.precision_score(ground_truth, yhat)
    recall = metrics.recall_score(ground_truth, yhat)
    roc = metrics.roc_auc_score(ground_truth, yhat)
    f1 = metrics.f1_score(ground_truth, yhat)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"ROC: {roc:.4f}")
    print(f"f1: {f1:.4f}")
    

def label_encoder(train:pd.DataFrame, test:pd.DataFrame, columns:List[str]) -> (pd.DataFrame, pd.DataFrame) :
    for col in columns:
        train[col] = train[col].astype(str)
        test[col] = test[col].astype(str)
        train[col] = preprocessing.LabelEncoder().fit_transform(train[col])
        test[col] = preprocessing.LabelEncoder().fit_transform(test[col])
    return train, test   

def create_strat_folds(df:pd.DataFrame, TARGET, n_folds:int=5, seed:int=42) -> pd.DataFrame:
    print(f"TARGET={TARGET}, n_folds={n_folds}, seed={seed}")
    df["fold"] = -1

    kf = model_selection.StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
    # kf = GroupKFold(n_splits=Config.N_FOLDS)
    for fold, (train_idx, valid_idx) in enumerate(kf.split(df, df[TARGET])):
        df.loc[valid_idx, "fold"] = fold

    # df.to_csv(f"train_fold{num_folds}.csv", index=False)
    return df


def create_folds(df:pd.DataFrame, n_folds:int=5, seed:int=42) -> pd.DataFrame:
    print(f"n_folds={n_folds}, seed={seed}")
    df["fold"] = -1

    kf = model_selection.KFold(n_splits=n_folds, shuffle=True, random_state=seed)

    for fold, (train_idx, valid_idx) in enumerate(kf.split(df)):
        df.loc[valid_idx, "fold"] = fold

    # df.to_csv(f"train_fold{num_folds}.csv", index=False)
    return df

def show_fold_scores(scores: List[float]) -> (float, float):
    cv_score = np.mean(scores)  # Used in filename
    std_dev = np.std(scores)
    print(
        f"Scores -> Adjusted: {np.mean(scores) - np.std(scores):.8f} , mean: {np.mean(scores):.8f}, std: {np.std(scores):.8f}"
    )
    return cv_score, std_dev


def feature_distribution_types(df:pd.DataFrame, display:bool=True) -> (List[str], List[str]):
    continuous_features = list(df.select_dtypes(include=['int64', 'float64', 'uint8']).columns)
    categorical_features = list(df.select_dtypes(include=['object', 'bool']).columns)
    if display:
        print(f"{clr.S}Continuous Features={continuous_features}{clr.E}\n")
        print(f"{clr.S}Categorical Features={categorical_features}{clr.E}")
    return continuous_features, categorical_features   

def show_cardinality(df:pd.DataFrame, features:List[str]) -> None:
    print("=== Cardinality ===")
    print(df[features].nunique())

## === Model Support ===    

from scipy.stats import mode


def merge_test_predictions(final_test_predictions:List[float], calc_probability:bool=True) -> List[float]:

    if calc_probability:
        print("Mean")
        result = np.mean(np.column_stack(final_test_predictions), axis=1)
    else:
        print("Mode")
        mode_result = mode(np.column_stack(final_test_predictions), axis=1)
        result = mode_result[0].ravel()

    return result

def summary_statistics(X:pd.DataFrame, enhanced=True) -> None:
    desc = X.describe()
    if enhanced:
        desc.loc["var"] = X.var(numeric_only=True).tolist()
        desc.loc["skew"] = X.skew(numeric_only=True).tolist()
        desc.loc["kurt"] = X.kurtosis(numeric_only=True).tolist()

    with pd.option_context("display.precision", 2):
        style = desc.transpose().style.background_gradient(
            cmap="coolwarm"
        )  # .set_precision(4)
    display(style)
    
def show_missing_features(df:pd.DataFrame) -> None:
    missing_vals = df.isna().sum().sort_values(ascending=False)
    print(missing_vals[missing_vals > 0])


def show_duplicate_records(df:pd.DataFrame) -> None:
    dups = df.duplicated()
    print(dups.sum())


def eval_features(df:pd.DataFrame) -> (List[str], List[str], List[str]):
    ## Separate Categorical and Numerical Features
    categorical_features = list(
        df.select_dtypes(include=["category", "object"]).columns
    )
    continuous_features = list(df.select_dtypes(include=["number"]).columns)

    print(f"{clr.S}Continuous features:{clr.E} {continuous_features}")
    print(f"{clr.S}Categorical features:{clr.E} {categorical_features}")
    print("\n --- Cardinality of Categorical Features ---\n")

    for feature in categorical_features:
        cardinality = df[feature].nunique()
        if cardinality < 10:
            print(f"{clr.S}{feature}{clr.E}: cardinality={cardinality}, {df[feature].unique()}")
        else:
            print(f"{clr.S}{feature}{clr.E}: cardinality={cardinality}")
    all_features = categorical_features + continuous_features
    return all_features, categorical_features, continuous_features


def show_feature_importance(feature_importance_lst:List[str]) -> None:
    fis_df = pd.concat(feature_importance_lst, axis=1)

    fis_df.sort_values("0_importance", ascending=True).head(40).plot(
        kind="barh", figsize=(12, 12), title="Feature Importance Across Folds"
    )
    plt.show()


def show_feature_target_crosstab(df:pd.DataFrame, feature_lst:List[str], target:str) -> None:
    for feature in feature_lst:
        print(f"\n=== {feature} vs {target} ===\n")
        display(
            pd.crosstab(df[feature], df[target], margins=True)
        )  # display keeps bold formatting


def show_cardinality(df:pd.DataFrame, features:List[str]) -> None:
    print(f"{clr.S}=== Cardinality ==={clr.E}")
    print(df[features].nunique())


def show_unique_features(df:pd.DataFrame, features:List[str]) -> None:
    for col in features:
        print(col, sorted(df[col].dropna().unique()))


def feature_distribution_types(df:pd.DataFrame, display:bool=True) -> (List[str], List[str]):
    continuous_features = list(
        df.select_dtypes(include=["int64", "float64", "uint8"]).columns
    )
    categorical_features = list(df.select_dtypes(include=["object", "bool"]).columns)
    if display:
        print(f"{clr.S}Continuous Features={clr.E}{continuous_features}\n")
        print(f"{clr.S}Categorical Features={clr.E}{categorical_features}")
    return continuous_features, categorical_features


def describe(X:pd.DataFrame) -> None:
    """Deprecated: Use summary_statistics()"""
    desc = X.describe()
    desc.loc['var'] = X.var(numeric_only=True).tolist()
    desc.loc['skew'] = X.skew(numeric_only=True).tolist()
    desc.loc['kurt'] = X.kurtosis(numeric_only=True).tolist()

    with pd.option_context('display.precision', 2):
        style = desc.transpose().style.background_gradient(cmap='coolwarm') #.set_precision(4)
    display(style)
  

def check_skew(df:pd.DataFrame) -> None:
    skew = df.skew(skipna=True,numeric_only=True).sort_values(ascending=False)
    print(skew)
    
def gpu_ify_lgbm(lgbm_dict):
    if Config.gpu:
        lgbm_dict["device"] = "gpu"
        lgbm_dict["boosting_type"] = "gbdt"
        lgbm_dict["gpu_platform_id"] = 0
        lgbm_dict["gpu_device_id"] = 0
    return lgbm_dict

def gpu_ify_cb(params):
    if Config.gpu:
        params["task_type"] = "GPU"
    return params    


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Optuna Hyperparameter Optimization Library</h1>
</div>

In [6]:
def objective_xgb(trial, X_train, X_valid, y_train, y_valid):

    xgb_params = {
        #         "objective": trial.suggest_categorical("objective", ["multi:softmax"]),
        #         "eval_metric": "mlogloss",
        #         "objective": "multi:softmax",
#         "objective": trial.suggest_categorical("objective", ["mae", "rmse"]),

        "eval_metric": "rmse",  # auc, rmse, mae, logloss
        "objective": "reg:squarederror", # Normal Distribution
#         "objective": "reg:gamma", # Gamma Distribution

        #         "enable_categorical": trial.suggest_categorical("use_label_encoder", [True]),
        "use_label_encoder": trial.suggest_categorical("use_label_encoder", [False]),
        "n_estimators": trial.suggest_int("n_estimators", 1000, 5000, 100),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "max_depth": trial.suggest_int("max_depth", 1, 20),  # 10
        "gamma": trial.suggest_float("gamma", 0, 100, step=0.1),
        "booster": trial.suggest_categorical("booster", ["gbtree"]),
        "tree_method": trial.suggest_categorical(
            "tree_method", ["hist"]
        ),  # hist, gpu_hist
#         "predictor": "gpu_predictor",
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 100),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        "min_child_weight": trial.suggest_loguniform("min_child_weight", 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }

    # Model loading and training
    model = xgb.XGBRegressor(**xgb_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        early_stopping_rounds=5000,
        verbose=0,
    )

    print(f"Number of boosting rounds: {model.best_iteration}")
    #     oof = model.predict_proba(X_valid)[:, 1] # Probability
    oof = model.predict(X_valid)  # Classification: 0,1

    return metrics.mean_squared_error(y_valid, oof, squared=False)

def objective_clf_xgb(trial, X_train, X_valid, y_train, y_valid):

    xgb_params = {
        #         "objective": trial.suggest_categorical("objective", ["multi:softmax"]),
        #         "eval_metric": "mlogloss",
        #         "objective": "multi:softmax",
        "eval_metric": "logloss",  # auc, rmse, mae, logloss
        "objective": "binary:logistic",
        #         "enable_categorical": trial.suggest_categorical("use_label_encoder", [True]),
        "use_label_encoder": trial.suggest_categorical("use_label_encoder", [False]),
        "n_estimators": trial.suggest_int("n_estimators", 1000, 5000, 100),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "max_depth": trial.suggest_int("max_depth", 1, 20),  # 10
        "gamma": trial.suggest_float("gamma", 0, 100, step=0.1),
        "booster": trial.suggest_categorical("booster", ["gbtree"]),
        "tree_method": trial.suggest_categorical(
            "tree_method", ["hist"]
        ),  # hist, gpu_hist
        #         "predictor": "gpu_predictor",
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 100),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        "min_child_weight": trial.suggest_loguniform("min_child_weight", 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }

    # Model loading and training
    model = xgb.XGBClassifier(**xgb_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        early_stopping_rounds=5000,
        verbose=0,
    )

    print(f"Number of boosting rounds: {model.best_iteration}")
    #     oof = model.predict_proba(X_valid)[:, 1] # Probability
    oof = model.predict(X_valid)  # Classification: 0,1
#     validation_error = metrics.roc_auc_score(y_valid, oof)
      
    validation_error = metrics.log_loss(y_valid, oof)
    
    return validation_error


def objective_lgbm(trial, X_train, X_valid, y_train, y_valid):

    lgbm_params = {
        "objective": trial.suggest_categorical("objective", ["mae", "rmse"]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [1_000]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [5000]),
        "n_estimators": trial.suggest_int("n_estimators", 700, 5000),
        "importance_type": "gain",
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "num_leaves": trial.suggest_int("num_leaves", 2, 1000),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.1, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.1, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 0, 15),
        "min_child_samples": trial.suggest_int("min_child_samples", 1, 300),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "max_depth": trial.suggest_int("max_depth", 1, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        #         'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }

    # Model loading and training
    model = lgb.LGBMRegressor(**lgbm_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        eval_metric="mae",
        callbacks=[
            lgb.log_evaluation(500),
            lgb.early_stopping(500, False, True),
        ],
    )

    #     print(f"Number of boosting rounds: {model.best_iteration}")
    oof = model.predict(X_valid)

    return metrics.mean_squared_error(y_valid, oof, squared=False)
#     return metrics.mean_absolute_error(y_valid, oof)


def objective_clf_lgbm(trial, X_train, X_valid, y_train, y_valid):

    params = {
        "boosting_type": "gbdt",
        # "objective": trial.suggest_categorical("objective", ["mae", "rmse"]),
        #         "objective": trial.suggest_categorical("objective", ["multi:softprob"]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [1_000]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [5000]),
        "n_estimators": trial.suggest_int("n_estimators", 700, 1000),
        "importance_type": "gain",
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "num_leaves": trial.suggest_int("num_leaves", 2, 1000),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.1, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.1, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 0, 15),
        "min_child_samples": trial.suggest_int("min_child_samples", 1, 300),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "max_depth": trial.suggest_int("max_depth", 1, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        #         'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }
    if Config.gpu:
        params["device_type"] = "gpu"

    # Model loading and training
    model = lgb.LGBMClassifier(**params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        # eval_metric="mae",
        callbacks=[
            lgb.log_evaluation(500),
            lgb.early_stopping(500, False, True),
        ],
    )

    #     print(f"Number of boosting rounds: {model.best_iteration}")
    oof = model.predict(X_valid)

    #     return accuracy_score(y_valid, oof)
    return metrics.roc_auc_score(y_valid, oof)


def objective_cb(trial, X_train, X_valid, y_train, y_valid):

    cb_params = {
        "iterations": 100,
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.1, 1.0),
        "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1, 100),
        "bagging_temperature": trial.suggest_loguniform(
            "bagging_temperature", 0.1, 20.0
        ),
        "random_strength": trial.suggest_float("random_strength", 1.0, 2.0),
        "depth": trial.suggest_int("depth", 1, 10),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 300),
          "use_best_model": True,
#         "task_type": "GPU",
        "random_seed": 42,
    }

    # Model loading and training
    #  model = CatBoostClassifier(**cb_params)
    model = cb.CatBoostRegressor(**cb_params)

    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        # eval_metric="accuracy",
        early_stopping_rounds=500,
        verbose=False,
    )

#     print(f"Number of boosting rounds: {model.best_iteration}")
    # oof = model.predict_proba(X_valid)[:, 1]
    oof = model.predict(X_valid)  # Classification
    return metrics.mean_squared_error(y_valid, oof, squared=False)
#     return metrics.mean_absolute_error(y_valid, oof)
# 
#     return accuracy_score(y_valid, oof)

def objective_clf_cb(trial, X_train, X_valid, y_train, y_valid):

    cb_params = {
        "iterations": 10,  # 1000
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.1, 1.0),
        "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1, 100),
        "bagging_temperature": trial.suggest_loguniform(
            "bagging_temperature", 0.1, 20.0
        ),
        "random_strength": trial.suggest_float("random_strength", 1.0, 2.0),
        "depth": trial.suggest_int("depth", 1, 10),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 300),
        "use_best_model": True,
#             "task_type": "GPU",
        "random_seed": 42,
    }

    # Model loading and training
    model = cb.CatBoostClassifier(**cb_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        # eval_metric="accuracy",
        early_stopping_rounds=500,
        verbose=False,
    )

    # print(f"Number of boosting rounds: {model.best_iteration}")
    # oof = model.predict_proba(X_valid)[:, 1]
    oof = model.predict(X_valid)  # Classification
    return metrics.roc_auc_score(y_valid, oof)

#     return metrics.accuracy_score(y_valid, oof)

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Load Train/Test Data and Analyze</h1>
</div>

## Load the following files

 - train.csv - Data used to build our machine learning model
 - test.csv - Data used to build our machine learning model. Does not contain the target variable
 - sample_submission.csv - A file in the proper format to submit test predictions

In [7]:
%%time
train, test, sample_submission = read_data(Config.path, analyze=True)                                

[1m[91m=== Shape of Data ===[0m
 train data: Rows=117564, Columns=10
 test data : Rows=78377, Columns=9
[1m[91m
=== Train Data: First 5 Rows ===
[0m


Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve,Class
0,0,133.17188,59.71608,0.04313,-0.70338,54.91722,70.08444,0.7498,-0.64951,0
1,1,87.09375,36.25797,0.43547,2.26606,3.41722,21.86507,7.03933,52.68625,0
2,2,112.64062,39.81839,0.37964,0.92231,2.73077,15.68969,8.19347,85.64978,0
3,3,120.67969,45.91845,-0.09849,0.01178,2.69649,20.95466,8.18387,70.3329,0
4,4,134.07031,57.72011,-0.10777,-0.57334,1.10786,11.25505,16.10775,308.75377,0



[1m[91m=== Train Column Names ===[0m



Index(['id', 'Mean_Integrated', 'SD', 'EK', 'Skewness', 'Mean_DMSNR_Curve',
       'SD_DMSNR_Curve', 'EK_DMSNR_Curve', 'Skewness_DMSNR_Curve', 'Class'],
      dtype='object')


[1m[91m=== Features/Explanatory Variables ===[0m

[1m[91mContinuous features:[0m ['id', 'Mean_Integrated', 'SD', 'EK', 'Skewness', 'Mean_DMSNR_Curve', 'SD_DMSNR_Curve', 'EK_DMSNR_Curve', 'Skewness_DMSNR_Curve', 'Class']
[1m[91mCategorical features:[0m []

 --- Cardinality of Categorical Features ---


[1m[91m=== Skewness ===[0m

Skewness                4.39758
EK                      3.43500
Mean_DMSNR_Curve        3.42471
Class                   2.79694
Skewness_DMSNR_Curve    2.37403
SD_DMSNR_Curve          2.01034
id                      0.00000
EK_DMSNR_Curve         -0.04169
SD                     -0.52388
Mean_Integrated        -1.84135
dtype: float64
CPU times: user 288 ms, sys: 71.4 ms, total: 359 ms
Wall time: 672 ms


In [8]:
train.head()

Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve,Class
0,0,133.17188,59.71608,0.04313,-0.70338,54.91722,70.08444,0.7498,-0.64951,0
1,1,87.09375,36.25797,0.43547,2.26606,3.41722,21.86507,7.03933,52.68625,0
2,2,112.64062,39.81839,0.37964,0.92231,2.73077,15.68969,8.19347,85.64978,0
3,3,120.67969,45.91845,-0.09849,0.01178,2.69649,20.95466,8.18387,70.3329,0
4,4,134.07031,57.72011,-0.10777,-0.57334,1.10786,11.25505,16.10775,308.75377,0


In [9]:
def load_original_data(path:str) -> pd.DataFrame:
#     original = pd.read_csv(path, index_col=[0])
    original = pd.read_csv(path)

    original = original.reset_index()
    original['id'] = original['index'] + 100000
    original = original.drop(columns = ['index'])
    original = original.rename(columns = {'CementComponent ':'CementComponent'})
    original.set_index('id', inplace=True)
#     original = original[-original.depth.isna()]
    print(f"Shape={original.shape}")
    return original
#     original.head()

if Config.load_original_data:    
    original = load_original_data(Config.original_data_path)
    display(original.head())

In [10]:
if Config.load_original_data:
    train['is_original']    = 0
    test['is_original']     = 0
    original['is_original'] = 1
#     combined = pd.concat([train, original], ignore_index=True) #.drop_duplicates()
    combined = pd.concat([train, original])

    train = combined
#     combined.head()
    print(f"Shape={combined.shape}")

In [11]:
summary_statistics(train.drop(columns=[ID], axis=1), enhanced=True)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max,var,skew,kurt
Mean_Integrated,117564.0,111.25,24.91,6.05,104.55,116.66,126.3,189.37,620.33,-1.84,3.9
SD,117564.0,46.71,6.1,24.78,43.44,47.48,50.86,93.6,37.25,-0.52,0.76
EK,117564.0,0.5,1.13,-1.73,0.05,0.19,0.4,7.88,1.27,3.43,11.75
Skewness,117564.0,1.89,6.52,-1.79,-0.19,0.09,0.69,65.39,42.45,4.4,20.58
Mean_DMSNR_Curve,117564.0,11.96,26.72,0.21,2.09,2.81,4.12,217.37,713.96,3.42,12.15
SD_DMSNR_Curve,117564.0,26.19,20.04,7.37,14.96,18.16,24.73,109.89,401.68,2.01,2.96
EK_DMSNR_Curve,117564.0,8.04,3.84,-2.6,6.74,8.44,10.0,34.54,14.75,-0.04,1.37
Skewness_DMSNR_Curve,117564.0,93.88,79.96,-1.98,49.41,83.42,122.09,1191.0,6393.94,2.37,11.35
Class,117564.0,0.09,0.29,0.0,0.0,0.0,0.0,1.0,0.08,2.8,5.82


## Outlier Detection

In [12]:
# https://www.kaggle.com/code/lyasdemir/best-algorithm-for-prediction-xgboost
    
def iqr(data:pd.DataFrame, var:str):# outliers detecion .
    q1 = np.quantile(data[var], 0.25)
    q3 = np.quantile(data[var], 0.75)
    diff = q3 - q1
    lower_t = q1 - (1.5 * diff)
    upper_t = q3 + (1.5 * diff)
    return data[(data[var] < lower_t) | (data[var] > upper_t)]

# iqr(train, "squareMeters")

In [13]:
# https://www.kaggle.com/code/sujithmandala/playground-s3-e8-ensemble-model-98-accuracy

def detect_outliers(data:pd.DataFrame) -> pd.DataFrame:
    outlier_percents = {}
    for column in data.columns:
        if data[column].dtype != object:
            q1 = np.quantile(data[column], 0.25)
            q3 = np.quantile(data[column], 0.75)
            iqr = q3 - q1
            upper_bound = q3 + (1.5 * iqr)
            lower_bound = q1 - (1.5 * iqr)
            outliers = data[(data[column] > upper_bound) | (data[column] < lower_bound)][column]
            outlier_percentage = len(outliers) / len(data[column]) * 100
            outlier_percents[column] = outlier_percentage
            outlier_dataframe = pd.DataFrame(data = outlier_percents.values() ,index=outlier_percents.keys() ,columns=['Outlier_percentage'])
    
    return outlier_dataframe.sort_values(by = 'Outlier_percentage', ascending = False)

detect_outliers(train)


Unnamed: 0,Outlier_percentage
Mean_DMSNR_Curve,17.64486
SD_DMSNR_Curve,15.87646
EK_DMSNR_Curve,13.55007
Skewness,11.22112
EK,9.40084
Class,9.32854
Mean_Integrated,7.76768
Skewness_DMSNR_Curve,4.80844
SD,3.19656
id,0.0


In [14]:
# https://www.kaggle.com/code/sujithmandala/playground-s3-e8-ensemble-model-98-accuracy
    
def detect_outliers(data:pd.DataFrame) -> pd.DataFrame:
    outlier_percents = {}
    for column in data.columns:
        if data[column].dtype != object:
            q1 = np.quantile(data[column], 0.25)
            q3 = np.quantile(data[column], 0.75)
            iqr = q3 - q1
            upper_bound = q3 + (1.5 * iqr)
            lower_bound = q1 - (1.5 * iqr)
            outliers = data[(data[column] > upper_bound) | (data[column] < lower_bound)][column]
            outlier_percentage = len(outliers) / len(data[column]) * 100
            outlier_percents[column] = outlier_percentage
            outlier_dataframe = pd.DataFrame(data = outlier_percents.values() ,index=outlier_percents.keys() ,columns=['Outlier_percentage'])
    
    return outlier_dataframe.sort_values(by = 'Outlier_percentage', ascending = False)

detect_outliers(test)


Unnamed: 0,Outlier_percentage
Mean_DMSNR_Curve,17.51662
SD_DMSNR_Curve,15.72655
EK_DMSNR_Curve,13.73234
Skewness,11.11168
EK,9.19275
Mean_Integrated,7.62979
Skewness_DMSNR_Curve,4.75395
SD,3.19609
id,0.0


## Identify Outliers in this Dataset

- https://www.kaggle.com/competitions/playground-series-s3e10/discussion/393093
- https://www.kaggle.com/code/sujaykapadnis/s3e10-eda

In [15]:
def remove_pulsar_outliers(train):
    outliers = train[
        (train['Class'] == 1) &
        (train['Mean_Integrated'] > 115) &
        (train['SD'] > 45) &
        (train['EK'] < 0.03) &
        (train['Skewness'] < 1) &
        (train['Mean_DMSNR_Curve'] < 20)
    ].index
    train.drop(outliers,inplace=True)
    train.reset_index()
    return train

In [16]:
train = remove_pulsar_outliers(train)

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Feature Engineering</h1>
</div>

## Categorical/Numerical Variables

In [17]:
# train.drop(['cityCode'], axis=1, inplace=True)
# test.drop(['cityCode'], axis=1, inplace=True)


## Handle Outliers
- https://www.kaggle.com/code/lyasdemir/best-algorithm-for-prediction-xgboost
- https://www.kaggle.com/code/mnokno/paris-housing-price-prediction-using-xgboost

In [18]:
# features_with_outliers = ['attic', 'garage', 'made', 'basement', 'floors', 'cityCode', 'squareMeters']
# features_with_outliers = ['attic', 'garage', 'made', 'basement', 'floors',  'squareMeters']

In [19]:
# https://www.kaggle.com/code/mnokno/paris-housing-price-prediction-using-xgboost

def remove_outliers(df:pd.DataFrame) -> pd.DataFrame:
    for c in features_with_outliers:
        if c == 'garage':
            first_percentile = df[c].quantile(0.001)
            df = df[df[c] > first_percentile]

        ninety_ninth_percentile = df[c].quantile(0.999)
        df = df[df[c] < ninety_ninth_percentile]
        #df_t = df_t[(df_t[c] > first_percentile) & (df_t[c] < ninety_ninth_percentile)]
    return df


In [20]:
# print(f'Before: {len(train)}')
# train = remove_outliers(train)
# print(f'After: {len(train)}')

In [21]:
train.head(10)

Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve,Class
0,0,133.17188,59.71608,0.04313,-0.70338,54.91722,70.08444,0.7498,-0.64951,0
1,1,87.09375,36.25797,0.43547,2.26606,3.41722,21.86507,7.03933,52.68625,0
2,2,112.64062,39.81839,0.37964,0.92231,2.73077,15.68969,8.19347,85.64978,0
3,3,120.67969,45.91845,-0.09849,0.01178,2.69649,20.95466,8.18387,70.3329,0
4,4,134.07031,57.72011,-0.10777,-0.57334,1.10786,11.25505,16.10775,308.75377,0
5,5,131.63281,52.56321,-0.07525,-0.49583,2.19482,15.53743,9.03344,97.03241,0
6,6,110.9375,41.55695,0.31284,0.55902,1.96572,17.19147,10.39677,118.72427,0
7,7,120.20312,49.9279,-0.08999,-0.32137,3.2801,18.37684,8.19056,77.91724,0
8,8,112.41406,46.93987,0.28255,0.15178,3.33696,21.92953,7.69333,65.18628,0
9,9,99.85938,48.08919,0.69371,0.28166,3.41472,24.18191,7.95868,65.08458,0


In [22]:
train = train.reset_index(drop=True).copy()
train.head(10)

Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve,Class
0,0,133.17188,59.71608,0.04313,-0.70338,54.91722,70.08444,0.7498,-0.64951,0
1,1,87.09375,36.25797,0.43547,2.26606,3.41722,21.86507,7.03933,52.68625,0
2,2,112.64062,39.81839,0.37964,0.92231,2.73077,15.68969,8.19347,85.64978,0
3,3,120.67969,45.91845,-0.09849,0.01178,2.69649,20.95466,8.18387,70.3329,0
4,4,134.07031,57.72011,-0.10777,-0.57334,1.10786,11.25505,16.10775,308.75377,0
5,5,131.63281,52.56321,-0.07525,-0.49583,2.19482,15.53743,9.03344,97.03241,0
6,6,110.9375,41.55695,0.31284,0.55902,1.96572,17.19147,10.39677,118.72427,0
7,7,120.20312,49.9279,-0.08999,-0.32137,3.2801,18.37684,8.19056,77.91724,0
8,8,112.41406,46.93987,0.28255,0.15178,3.33696,21.92953,7.69333,65.18628,0
9,9,99.85938,48.08919,0.69371,0.28166,3.41472,24.18191,7.95868,65.08458,0


In [23]:
excluded_features = [TARGET, ID, "fold", "is_original"]

In [24]:
cont_features, cat_features = feature_distribution_types(train, display=True)
show_cardinality(train, cat_features)

cont_features = [feature for feature in cont_features if feature not in excluded_features]
cat_features = [feature for feature in cat_features if feature not in excluded_features]

FEATURES = cont_features + cat_features
FEATURES

[1m[91mContinuous Features=[0m['id', 'Mean_Integrated', 'SD', 'EK', 'Skewness', 'Mean_DMSNR_Curve', 'SD_DMSNR_Curve', 'EK_DMSNR_Curve', 'Skewness_DMSNR_Curve', 'Class']

[1m[91mCategorical Features=[0m[]
[1m[91m=== Cardinality ===[0m
Series([], dtype: float64)


['Mean_Integrated',
 'SD',
 'EK',
 'Skewness',
 'Mean_DMSNR_Curve',
 'SD_DMSNR_Curve',
 'EK_DMSNR_Curve',
 'Skewness_DMSNR_Curve']

In [25]:
train, test = label_encoder(train, test, cat_features)
# train = pd.get_dummies(train,columns=['cut','color','clarity']) # Will remove original feature names
# test = pd.get_dummies(test,columns=['cut','color','clarity'])

In [26]:
train.head()

Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve,Class
0,0,133.17188,59.71608,0.04313,-0.70338,54.91722,70.08444,0.7498,-0.64951,0
1,1,87.09375,36.25797,0.43547,2.26606,3.41722,21.86507,7.03933,52.68625,0
2,2,112.64062,39.81839,0.37964,0.92231,2.73077,15.68969,8.19347,85.64978,0
3,3,120.67969,45.91845,-0.09849,0.01178,2.69649,20.95466,8.18387,70.3329,0
4,4,134.07031,57.72011,-0.10777,-0.57334,1.10786,11.25505,16.10775,308.75377,0


## PCA

In [27]:
def extract_pca(df:pd.DataFrame, TARGET:str, n_pca_components:int = 3) -> pd.DataFrame:

    pca = decomposition.PCA(
        n_components=n_pca_components,
        svd_solver='full',
    )
    if len(TARGET) > 0:
        df0 = df.drop(columns=[TARGET])
    else:
        df0 = df
    pca_transform = pca.fit_transform(df0)
    
    
    pca_cols = [f'pca_{i}' for i in list(range(1, n_pca_components + 1))]  
    
    pca_df = pd.DataFrame(
        data=pca_transform,
        columns=pca_cols,
    )
    display(pca_df)
    df = df.join(pca_df)
    display(df.head())
    return df
    
train = extract_pca(train, TARGET, 3)    
test = extract_pca(test, "", 3)    

Unnamed: 0,pca_1,pca_2,pca_3
0,-58781.14074,-104.02370,9.96848
1,-58780.14081,-40.29253,-2.44142
2,-58779.14102,-4.86700,-12.49372
3,-58778.14100,-19.72698,-19.23299
4,-58777.14183,214.42156,23.76944
...,...,...,...
117555,58777.85904,-44.53022,-29.81473
117556,58778.85879,51.13643,-1.37680
117557,58779.85905,-32.88273,-22.35276
117558,58780.85896,7.94947,-6.22873


Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve,Class,pca_1,pca_2,pca_3
0,0,133.17188,59.71608,0.04313,-0.70338,54.91722,70.08444,0.7498,-0.64951,0,-58781.14074,-104.0237,9.96848
1,1,87.09375,36.25797,0.43547,2.26606,3.41722,21.86507,7.03933,52.68625,0,-58780.14081,-40.29253,-2.44142
2,2,112.64062,39.81839,0.37964,0.92231,2.73077,15.68969,8.19347,85.64978,0,-58779.14102,-4.867,-12.49372
3,3,120.67969,45.91845,-0.09849,0.01178,2.69649,20.95466,8.18387,70.3329,0,-58778.141,-19.72698,-19.23299
4,4,134.07031,57.72011,-0.10777,-0.57334,1.10786,11.25505,16.10775,308.75377,0,-58777.14183,214.42156,23.76944


Unnamed: 0,pca_1,pca_2,pca_3
0,39187.99989,29.55181,-22.30818
1,39186.99921,90.00711,9.66664
2,39186.00035,-29.36010,-29.42598
3,39185.00052,-64.14201,-18.32798
4,39183.99967,24.36826,4.85341
...,...,...,...
78372,-39184.00098,120.03285,11.06661
78373,-39185.00034,46.10375,-9.80298
78374,-39186.00040,57.17396,-9.95505
78375,-39187.00060,47.96830,18.85757


Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve,pca_1,pca_2,pca_3
0,117564,140.04688,54.5078,0.05886,-0.56726,2.33779,14.86833,9.59176,117.98878,39187.99989,29.55181,-22.30818
1,117565,107.82812,51.57897,0.28437,-0.33843,1.57441,12.50144,11.69497,182.70482,39186.99921,90.00711,9.66664
2,117566,135.0625,49.81234,-0.08778,-0.09434,3.57692,21.24334,7.25239,59.0215,39186.00035,-29.3601,-29.42598
3,117567,112.8125,41.92665,0.51992,1.28776,6.66973,29.01315,5.09766,27.10524,39185.00052,-64.14201,-18.32798
4,117568,96.21094,35.32262,0.48129,2.44308,2.21823,17.04106,9.76601,117.13178,39183.99967,24.36826,4.85341


## KMeans

- https://www.kaggle.com/code/xaviernogueira/baseline-pulsars-w-catboost-xgboost

In [28]:
# make an array of weights where 0->0.1 and 1->1
def swap(v) -> pd.Series:
    if v == 0:
        return 0.1
    else: return 1
weights = train.Class.apply(swap)

In [29]:
%%time
# make 2 and 5 class Kmeans clusters
k_means_obj2 = cluster.KMeans(
    n_clusters=2,
).fit(
    train.drop(columns=[TARGET]),
    sample_weight=weights,
)
k_means_obj5 = cluster.KMeans(
    n_clusters=5,
).fit(
    train.drop(columns=[TARGET]),
    sample_weight=weights,
)

CPU times: user 12.3 s, sys: 2.93 s, total: 15.3 s
Wall time: 3.99 s


In [30]:
train['k2'] = k_means_obj2.predict(train.drop(columns=[TARGET]))
train['k5'] = k_means_obj5.predict(train.drop(columns=[TARGET, 'k2']))

test['k2'] = k_means_obj2.predict(test)
test['k5'] = k_means_obj5.predict(test.drop(columns=['k2']))


In [31]:
train.head()

Unnamed: 0,id,Mean_Integrated,SD,EK,Skewness,Mean_DMSNR_Curve,SD_DMSNR_Curve,EK_DMSNR_Curve,Skewness_DMSNR_Curve,Class,pca_1,pca_2,pca_3,k2,k5
0,0,133.17188,59.71608,0.04313,-0.70338,54.91722,70.08444,0.7498,-0.64951,0,-58781.14074,-104.0237,9.96848,1,3
1,1,87.09375,36.25797,0.43547,2.26606,3.41722,21.86507,7.03933,52.68625,0,-58780.14081,-40.29253,-2.44142,1,3
2,2,112.64062,39.81839,0.37964,0.92231,2.73077,15.68969,8.19347,85.64978,0,-58779.14102,-4.867,-12.49372,1,3
3,3,120.67969,45.91845,-0.09849,0.01178,2.69649,20.95466,8.18387,70.3329,0,-58778.141,-19.72698,-19.23299,1,3
4,4,134.07031,57.72011,-0.10777,-0.57334,1.10786,11.25505,16.10775,308.75377,0,-58777.14183,214.42156,23.76944,1,3


In [32]:
cont_features, cat_features = feature_distribution_types(train, display=True)
show_cardinality(train, cat_features)

cont_features = [feature for feature in cont_features if feature not in excluded_features]
cat_features = [feature for feature in cat_features if feature not in excluded_features]

FEATURES = cont_features + cat_features
FEATURES

[1m[91mContinuous Features=[0m['id', 'Mean_Integrated', 'SD', 'EK', 'Skewness', 'Mean_DMSNR_Curve', 'SD_DMSNR_Curve', 'EK_DMSNR_Curve', 'Skewness_DMSNR_Curve', 'Class', 'pca_1', 'pca_2', 'pca_3']

[1m[91mCategorical Features=[0m[]
[1m[91m=== Cardinality ===[0m
Series([], dtype: float64)


['Mean_Integrated',
 'SD',
 'EK',
 'Skewness',
 'Mean_DMSNR_Curve',
 'SD_DMSNR_Curve',
 'EK_DMSNR_Curve',
 'Skewness_DMSNR_Curve',
 'pca_1',
 'pca_2',
 'pca_3']

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Optuna Hyperparameter Optimization</h1>
</div>

In [33]:
%%time

study_name=objective_direction # Need better name

if Config.optimize:
    y = train[TARGET]
    X = train[FEATURES].copy()

    X_test = test[FEATURES].copy()
    X_train, X_valid, y_train, y_valid = model_selection.train_test_split(
        X, y, test_size=0.2, random_state=Config.seed
    )

# === XGB ===

time_limit = 3600 * 3
best_xgb_params = {}

if Config.optimize:
    study = optuna.create_study(direction=objective_direction, study_name=f"xgboost {objective_direction}")
    study.optimize(
        lambda trial: objective_clf_xgb(trial, X_train, X_valid, y_train, y_valid),
#         lambda trial: objective_xgb(trial, X_train, X_valid, y_train, y_valid),        
        n_trials=Config.n_optuna_trials,
        # timeout=time_limit,  # this or n_trials
    )

if Config.optimize:
    print("Number of finished trials:", len(study.trials))
    print("Best XGB trial parameters:", study.best_trial.params)
    print("Best score:", study.best_value)
    best_xgb_params = study.best_trial.params

## === LGBM ===

time_limit = 3600 * 3
best_lgbm_params = {}

if Config.optimize:
    study = optuna.create_study(direction=objective_direction,study_name=f"LGBM {objective_direction}")
    study.optimize(
        lambda trial: objective_clf_lgbm(trial, X_train, X_valid, y_train, y_valid),
#         lambda trial: objective_lgbm(trial, X_train, X_valid, y_train, y_valid),        
        n_trials=Config.n_optuna_trials,
        # timeout=time_limit,  # this or n_trials
    )

if Config.optimize:
    print("Number of finished trials:", len(study.trials))
    print("Best LGBM trial parameters:", study.best_trial.params)
    print("Best score:", study.best_value)
    best_lgbm_params = study.best_trial.params

## === CatBoost

time_limit = 3600 * 3
# best_cb_params = {}
best_cb_params = {'learning_rate': 0.45743264601999495,
                  'l2_leaf_reg': 41.338946049390074,
                  'bagging_temperature': 0.3472567739474319,
                  'random_strength': 1.7332249677756242, 
                  'depth': 1,
                  'min_data_in_leaf': 6}

if Config.optimize:
    study = optuna.create_study(direction=objective_direction,study_name=f"CatBoost {objective_direction}")
    study.optimize(
        lambda trial: objective_clf_cb(trial, X_train, X_valid, y_train, y_valid),
#         lambda trial: objective_cb(trial, X_train, X_valid, y_train, y_valid),
        n_trials=Config.n_optuna_trials,
        # timeout=time_limit,  # this or n_trials
    )

if Config.optimize:
    print("Number of finished trials:", len(study.trials))
    print("Best Cat trial parameters:", study.best_trial.params)
    print("Best score:", study.best_value)
    best_cb_params = study.best_trial.params

[32m[I 2023-03-18 18:19:44,486][0m A new study created in memory with name: xgboost minimize[0m
[32m[I 2023-03-18 18:20:50,019][0m Trial 0 finished with value: 0.31583433543352685 and parameters: {'use_label_encoder': False, 'n_estimators': 4400, 'learning_rate': 0.013760320097376755, 'subsample': 0.58, 'colsample_bytree': 0.6900000000000001, 'max_depth': 20, 'gamma': 52.900000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.0013547260281083237, 'reg_alpha': 5.779451040284329, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.23432831860572692}. Best is trial 0 with value: 0.31583433543352685.[0m


Number of boosting rounds: 4265


[32m[I 2023-03-18 18:21:34,433][0m Trial 1 finished with value: 0.3114273462883176 and parameters: {'use_label_encoder': False, 'n_estimators': 3000, 'learning_rate': 0.025119349865121026, 'subsample': 0.29000000000000004, 'colsample_bytree': 0.42, 'max_depth': 19, 'gamma': 11.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 5.636777409234949, 'reg_alpha': 0.046566631740032324, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.6125447599393494}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 2999


[32m[I 2023-03-18 18:22:24,674][0m Trial 2 finished with value: 0.37753197941811734 and parameters: {'use_label_encoder': False, 'n_estimators': 3300, 'learning_rate': 0.01289471026880874, 'subsample': 0.83, 'colsample_bytree': 0.12000000000000001, 'max_depth': 10, 'gamma': 0.7000000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 4.771306469109571e-05, 'reg_alpha': 0.12039202548494876, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 476.06710588585116}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 3280


[32m[I 2023-03-18 18:22:52,915][0m Trial 3 finished with value: 0.31289643335820533 and parameters: {'use_label_encoder': False, 'n_estimators': 1900, 'learning_rate': 0.19100974008135435, 'subsample': 0.35, 'colsample_bytree': 0.9800000000000001, 'max_depth': 16, 'gamma': 41.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.13453193962984764, 'reg_alpha': 2.9740673096362475e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.9230415546839236}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 1349


[32m[I 2023-03-18 18:23:42,828][0m Trial 4 finished with value: 0.35255889356035175 and parameters: {'use_label_encoder': False, 'n_estimators': 3500, 'learning_rate': 0.21427196689528963, 'subsample': 0.23, 'colsample_bytree': 0.35, 'max_depth': 14, 'gamma': 83.9, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.3879172343971989e-06, 'reg_alpha': 0.05917840970129065, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.35378531727206114}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 1342


[32m[I 2023-03-18 18:24:10,640][0m Trial 5 finished with value: 0.3437449832860465 and parameters: {'use_label_encoder': False, 'n_estimators': 1800, 'learning_rate': 0.01444374868288307, 'subsample': 0.31, 'colsample_bytree': 0.11, 'max_depth': 8, 'gamma': 31.3, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.3349535754009794, 'reg_alpha': 5.63240464028171e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 15.382117047086735}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 1793


[32m[I 2023-03-18 18:25:09,915][0m Trial 6 finished with value: 0.3319931709364197 and parameters: {'use_label_encoder': False, 'n_estimators': 4100, 'learning_rate': 0.17994558550443965, 'subsample': 0.31, 'colsample_bytree': 0.51, 'max_depth': 15, 'gamma': 69.9, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 13.43610981227196, 'reg_alpha': 0.0015427641242901431, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 32.45580686868824}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 633


[32m[I 2023-03-18 18:25:29,204][0m Trial 7 finished with value: 0.31436504431529866 and parameters: {'use_label_encoder': False, 'n_estimators': 1300, 'learning_rate': 0.1882495365498091, 'subsample': 0.53, 'colsample_bytree': 0.67, 'max_depth': 18, 'gamma': 99.60000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.0013197657801969943, 'reg_alpha': 7.002950357737251e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 210.18993746222674}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 527


[32m[I 2023-03-18 18:26:32,700][0m Trial 8 finished with value: 0.3217104456566807 and parameters: {'use_label_encoder': False, 'n_estimators': 4300, 'learning_rate': 0.011704529080720772, 'subsample': 0.5, 'colsample_bytree': 0.8, 'max_depth': 9, 'gamma': 37.800000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.5150907501488033, 'reg_alpha': 4.079175344463022e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 31.034032516211347}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 4265


[32m[I 2023-03-18 18:27:17,175][0m Trial 9 finished with value: 0.32464817769171855 and parameters: {'use_label_encoder': False, 'n_estimators': 3000, 'learning_rate': 0.010675523016887758, 'subsample': 0.45999999999999996, 'colsample_bytree': 0.46, 'max_depth': 3, 'gamma': 73.3, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.06921510594337121, 'reg_alpha': 7.95667378853694e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 2.8855932355682725}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 2997


[32m[I 2023-03-18 18:27:51,399][0m Trial 10 finished with value: 0.3319931709364198 and parameters: {'use_label_encoder': False, 'n_estimators': 2500, 'learning_rate': 0.03607362375302318, 'subsample': 0.1, 'colsample_bytree': 0.28, 'max_depth': 1, 'gamma': 0.6000000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 94.21550035657454, 'reg_alpha': 37.72470470078153, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.10636503966039805}. Best is trial 1 with value: 0.3114273462883176.[0m


Number of boosting rounds: 2482


[32m[I 2023-03-18 18:28:23,197][0m Trial 11 finished with value: 0.30995832723454325 and parameters: {'use_label_encoder': False, 'n_estimators': 2200, 'learning_rate': 0.08699788592607745, 'subsample': 0.73, 'colsample_bytree': 1.0, 'max_depth': 16, 'gamma': 22.8, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.05121424978105106, 'reg_alpha': 2.0202668740588463e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 1.122514647794157}. Best is trial 11 with value: 0.30995832723454325.[0m


Number of boosting rounds: 1836


[32m[I 2023-03-18 18:28:56,252][0m Trial 12 finished with value: 0.3040822170113894 and parameters: {'use_label_encoder': False, 'n_estimators': 2300, 'learning_rate': 0.07744552772159326, 'subsample': 0.77, 'colsample_bytree': 0.9900000000000001, 'max_depth': 20, 'gamma': 17.400000000000002, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.6722222246923843e-08, 'reg_alpha': 3.451588064817989e-05, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 1.1987597721497363}. Best is trial 12 with value: 0.3040822170113894.[0m


Number of boosting rounds: 1836


[32m[I 2023-03-18 18:29:27,815][0m Trial 13 finished with value: 0.3084892061565987 and parameters: {'use_label_encoder': False, 'n_estimators': 2200, 'learning_rate': 0.0719444640701711, 'subsample': 0.8099999999999999, 'colsample_bytree': 1.0, 'max_depth': 13, 'gamma': 21.6, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.2715171322314134e-07, 'reg_alpha': 6.891788980354823e-05, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 3.5153156577204854}. Best is trial 12 with value: 0.3040822170113894.[0m


Number of boosting rounds: 1836


[32m[I 2023-03-18 18:29:44,298][0m Trial 14 finished with value: 0.3026131979576151 and parameters: {'use_label_encoder': False, 'n_estimators': 1100, 'learning_rate': 0.06637424549779447, 'subsample': 0.99, 'colsample_bytree': 0.8600000000000001, 'max_depth': 12, 'gamma': 21.3, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 7.244206643433305e-08, 'reg_alpha': 0.0001897819892003827, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 3.8781813676561137}. Best is trial 14 with value: 0.3026131979576151.[0m


Number of boosting rounds: 1064


[32m[I 2023-03-18 18:29:58,271][0m Trial 15 finished with value: 0.32171027561639703 and parameters: {'use_label_encoder': False, 'n_estimators': 1000, 'learning_rate': 0.07069759623710617, 'subsample': 0.95, 'colsample_bytree': 0.8400000000000001, 'max_depth': 6, 'gamma': 53.300000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.4586439949599222e-08, 'reg_alpha': 0.000257983816388431, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 3.477606324011225}. Best is trial 14 with value: 0.3026131979576151.[0m


Number of boosting rounds: 390


[32m[I 2023-03-18 18:30:20,773][0m Trial 16 finished with value: 0.3011442469199543 and parameters: {'use_label_encoder': False, 'n_estimators': 1500, 'learning_rate': 0.09906826852794229, 'subsample': 0.98, 'colsample_bytree': 0.8300000000000001, 'max_depth': 12, 'gamma': 16.400000000000002, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.1056260595632869e-08, 'reg_alpha': 3.90861259301175e-05, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 8.37602988688603}. Best is trial 16 with value: 0.3011442469199543.[0m


Number of boosting rounds: 1419


[32m[I 2023-03-18 18:30:40,885][0m Trial 17 finished with value: 0.29673718975863145 and parameters: {'use_label_encoder': False, 'n_estimators': 1400, 'learning_rate': 0.11524131265506392, 'subsample': 1.0, 'colsample_bytree': 0.66, 'max_depth': 12, 'gamma': 9.1, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 7.507978566099247e-07, 'reg_alpha': 0.001744666040605678, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 7.345598030690941}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 1368


[32m[I 2023-03-18 18:31:04,142][0m Trial 18 finished with value: 0.30995839525065677 and parameters: {'use_label_encoder': False, 'n_estimators': 1700, 'learning_rate': 0.1021294620174943, 'subsample': 0.91, 'colsample_bytree': 0.63, 'max_depth': 11, 'gamma': 10.200000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 4.960375910077639e-06, 'reg_alpha': 0.0026560165413308696, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 10.233718511985515}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 1572


[32m[I 2023-03-18 18:31:25,826][0m Trial 19 finished with value: 0.31730318444701744 and parameters: {'use_label_encoder': False, 'n_estimators': 1500, 'learning_rate': 0.12873656816328471, 'subsample': 0.6799999999999999, 'colsample_bytree': 0.5900000000000001, 'max_depth': 6, 'gamma': 31.3, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.9853014483989617e-07, 'reg_alpha': 0.003585985845685547, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 63.66044218601318}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 1377


[32m[I 2023-03-18 18:32:38,412][0m Trial 20 finished with value: 0.3011442129118976 and parameters: {'use_label_encoder': False, 'n_estimators': 5000, 'learning_rate': 0.12493627005591088, 'subsample': 0.9, 'colsample_bytree': 0.7500000000000001, 'max_depth': 7, 'gamma': 8.4, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.1992761328416944e-06, 'reg_alpha': 5.6480765398118225e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 7.421726591221978}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 4910


[32m[I 2023-03-18 18:33:43,559][0m Trial 21 finished with value: 0.3055512700732204 and parameters: {'use_label_encoder': False, 'n_estimators': 4700, 'learning_rate': 0.11588412900487678, 'subsample': 0.88, 'colsample_bytree': 0.7500000000000001, 'max_depth': 7, 'gamma': 9.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.15956888089998e-06, 'reg_alpha': 9.163010578993331e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 8.276056247909521}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 3894


[32m[I 2023-03-18 18:34:54,058][0m Trial 22 finished with value: 0.29820614079629226 and parameters: {'use_label_encoder': False, 'n_estimators': 5000, 'learning_rate': 0.1409071808062699, 'subsample': 1.0, 'colsample_bytree': 0.8800000000000001, 'max_depth': 4, 'gamma': 7.4, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.1855704417647767e-08, 'reg_alpha': 4.882921861343949e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 9.56632387304644}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 2449


[32m[I 2023-03-18 18:36:05,203][0m Trial 23 finished with value: 0.2996754319145205 and parameters: {'use_label_encoder': False, 'n_estimators': 5000, 'learning_rate': 0.14040117134457625, 'subsample': 0.88, 'colsample_bytree': 0.9, 'max_depth': 4, 'gamma': 3.9000000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.4806145545856997e-05, 'reg_alpha': 2.3147281432204877e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 20.961070068461616}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 1828


[32m[I 2023-03-18 18:37:02,316][0m Trial 24 finished with value: 0.3026134020059556 and parameters: {'use_label_encoder': False, 'n_estimators': 3800, 'learning_rate': 0.1600928416884624, 'subsample': 0.65, 'colsample_bytree': 0.89, 'max_depth': 4, 'gamma': 3.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.8073125017748515e-05, 'reg_alpha': 6.206336335280333e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 21.205578107423392}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 253


[32m[I 2023-03-18 18:38:13,244][0m Trial 25 finished with value: 0.3114272442641473 and parameters: {'use_label_encoder': False, 'n_estimators': 5000, 'learning_rate': 0.2468445845088547, 'subsample': 0.85, 'colsample_bytree': 0.91, 'max_depth': 4, 'gamma': 29.400000000000002, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.8835261084421938e-07, 'reg_alpha': 2.3747455694289977e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 99.86233545374587}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 2712


[32m[I 2023-03-18 18:39:18,098][0m Trial 26 finished with value: 0.31289626331792153 and parameters: {'use_label_encoder': False, 'n_estimators': 4600, 'learning_rate': 0.14434533120372237, 'subsample': 1.0, 'colsample_bytree': 0.5700000000000001, 'max_depth': 1, 'gamma': 13.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 6.01315920301967e-05, 'reg_alpha': 1.1940809584572206e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 48.13352152508061}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 1817


[32m[I 2023-03-18 18:40:13,929][0m Trial 27 finished with value: 0.3187721354846782 and parameters: {'use_label_encoder': False, 'n_estimators': 3900, 'learning_rate': 0.15669732124580762, 'subsample': 0.92, 'colsample_bytree': 0.76, 'max_depth': 3, 'gamma': 42.400000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 5.228103575275825e-08, 'reg_alpha': 1.1507393682951413e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 19.603728586808238}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 3894


[32m[I 2023-03-18 18:40:51,319][0m Trial 28 finished with value: 0.3158343694415836 and parameters: {'use_label_encoder': False, 'n_estimators': 2600, 'learning_rate': 0.055635462193344545, 'subsample': 0.73, 'colsample_bytree': 0.92, 'max_depth': 5, 'gamma': 26.1, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 3.019043591988946e-07, 'reg_alpha': 1.050011096856751e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 93.04253476645526}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 1836


[32m[I 2023-03-18 18:41:57,608][0m Trial 29 finished with value: 0.31877230552496194 and parameters: {'use_label_encoder': False, 'n_estimators': 4700, 'learning_rate': 0.10403366501987524, 'subsample': 0.62, 'colsample_bytree': 0.7400000000000001, 'max_depth': 9, 'gamma': 56.900000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.00018339513265977527, 'reg_alpha': 6.74756511918289e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 14.162984424557044}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 4262


[32m[I 2023-03-18 18:42:58,326][0m Trial 30 finished with value: 0.2982062428204625 and parameters: {'use_label_encoder': False, 'n_estimators': 4400, 'learning_rate': 0.2404073626058315, 'subsample': 0.79, 'colsample_bytree': 0.67, 'max_depth': 2, 'gamma': 5.1000000000000005, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 7.989941420784963e-06, 'reg_alpha': 2.775978783018568e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 1.9938684374796538}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 1947


[32m[I 2023-03-18 18:44:00,731][0m Trial 31 finished with value: 0.29673718975863145 and parameters: {'use_label_encoder': False, 'n_estimators': 4400, 'learning_rate': 0.24932671790174488, 'subsample': 0.82, 'colsample_bytree': 0.66, 'max_depth': 2, 'gamma': 5.300000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 6.6749568528911695e-06, 'reg_alpha': 4.3913343342799236e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 2.637260191241658}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 4388


[32m[I 2023-03-18 18:45:02,934][0m Trial 32 finished with value: 0.3011441789038408 and parameters: {'use_label_encoder': False, 'n_estimators': 4300, 'learning_rate': 0.24433109542053905, 'subsample': 0.8, 'colsample_bytree': 0.6200000000000001, 'max_depth': 2, 'gamma': 6.9, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.5996008509712562e-06, 'reg_alpha': 7.334931300650689e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 2.1450698947603652}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 4187


[32m[I 2023-03-18 18:46:07,883][0m Trial 33 finished with value: 0.3070202891269947 and parameters: {'use_label_encoder': False, 'n_estimators': 4500, 'learning_rate': 0.20924463271930496, 'subsample': 0.75, 'colsample_bytree': 0.6900000000000001, 'max_depth': 2, 'gamma': 15.3, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 4.2308218542090786e-07, 'reg_alpha': 2.5157274546697246e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 1.6967563966765125}. Best is trial 17 with value: 0.29673718975863145.[0m


Number of boosting rounds: 4418


[32m[I 2023-03-18 18:46:56,332][0m Trial 34 finished with value: 0.296737121742518 and parameters: {'use_label_encoder': False, 'n_estimators': 3400, 'learning_rate': 0.16191797407674197, 'subsample': 0.95, 'colsample_bytree': 0.51, 'max_depth': 1, 'gamma': 1.2000000000000002, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 5.894498382619684e-06, 'reg_alpha': 4.0916562955780636e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.6863094192383999}. Best is trial 34 with value: 0.296737121742518.[0m


Number of boosting rounds: 1371


[32m[I 2023-03-18 18:47:44,706][0m Trial 35 finished with value: 0.31142731228026077 and parameters: {'use_label_encoder': False, 'n_estimators': 3400, 'learning_rate': 0.17501305553535548, 'subsample': 0.95, 'colsample_bytree': 0.5, 'max_depth': 3, 'gamma': 15.3, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 5.459520598832689e-08, 'reg_alpha': 1.0714719772044192e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.6901028249755882}. Best is trial 34 with value: 0.296737121742518.[0m


Number of boosting rounds: 2794


[32m[I 2023-03-18 18:48:36,361][0m Trial 36 finished with value: 0.296737121742518 and parameters: {'use_label_encoder': False, 'n_estimators': 3600, 'learning_rate': 0.16069031016832594, 'subsample': 0.85, 'colsample_bytree': 0.33999999999999997, 'max_depth': 1, 'gamma': 0.6000000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 7.040974151345294e-07, 'reg_alpha': 5.021048215403799e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.5682660183795146}. Best is trial 34 with value: 0.296737121742518.[0m


Number of boosting rounds: 1491


[32m[I 2023-03-18 18:49:31,226][0m Trial 37 finished with value: 0.29526810268874365 and parameters: {'use_label_encoder': False, 'n_estimators': 3700, 'learning_rate': 0.19043535191389377, 'subsample': 0.86, 'colsample_bytree': 0.22999999999999998, 'max_depth': 1, 'gamma': 0.6000000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.2638123270635874e-06, 'reg_alpha': 7.922548355441824e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.3888427139138523}. Best is trial 37 with value: 0.29526810268874365.[0m


Number of boosting rounds: 1665


[32m[I 2023-03-18 18:50:22,930][0m Trial 38 finished with value: 0.32611712872937937 and parameters: {'use_label_encoder': False, 'n_estimators': 3600, 'learning_rate': 0.19336132540612327, 'subsample': 0.85, 'colsample_bytree': 0.22000000000000003, 'max_depth': 1, 'gamma': 62.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 9.895424749092587e-07, 'reg_alpha': 4.776643062825142e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.385875777892307}. Best is trial 37 with value: 0.29526810268874365.[0m


Number of boosting rounds: 2712


[32m[I 2023-03-18 18:51:22,439][0m Trial 39 finished with value: 0.29820603877212204 and parameters: {'use_label_encoder': False, 'n_estimators': 3100, 'learning_rate': 0.16826448644769543, 'subsample': 0.94, 'colsample_bytree': 0.38, 'max_depth': 14, 'gamma': 0.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.0002475580966512761, 'reg_alpha': 3.9751403442370344e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.3049665077850506}. Best is trial 37 with value: 0.29526810268874365.[0m


Number of boosting rounds: 35


[32m[I 2023-03-18 18:52:03,236][0m Trial 40 finished with value: 0.32611699269715244 and parameters: {'use_label_encoder': False, 'n_estimators': 2800, 'learning_rate': 0.11689939252260594, 'subsample': 0.37, 'colsample_bytree': 0.16999999999999998, 'max_depth': 10, 'gamma': 35.4, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.976910877173137e-06, 'reg_alpha': 0.009740231680881012, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.573370907436321}. Best is trial 37 with value: 0.29526810268874365.[0m


Number of boosting rounds: 2521


[32m[I 2023-03-18 18:52:50,582][0m Trial 41 finished with value: 0.29233013259730867 and parameters: {'use_label_encoder': False, 'n_estimators': 3200, 'learning_rate': 0.2061163297603283, 'subsample': 0.84, 'colsample_bytree': 0.31, 'max_depth': 1, 'gamma': 0.30000000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 8.765191504372664e-06, 'reg_alpha': 1.1302447998256317e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.6150849036119327}. Best is trial 41 with value: 0.29233013259730867.[0m


Number of boosting rounds: 1509


[32m[I 2023-03-18 18:53:38,800][0m Trial 42 finished with value: 0.2967371557505747 and parameters: {'use_label_encoder': False, 'n_estimators': 3300, 'learning_rate': 0.19783568608485733, 'subsample': 0.86, 'colsample_bytree': 0.31, 'max_depth': 1, 'gamma': 0.30000000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 7.280589675760533e-07, 'reg_alpha': 2.460342858800108e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.192865951708374}. Best is trial 41 with value: 0.29233013259730867.[0m


Number of boosting rounds: 1464


[32m[I 2023-03-18 18:54:26,154][0m Trial 43 finished with value: 0.2967371557505747 and parameters: {'use_label_encoder': False, 'n_estimators': 3200, 'learning_rate': 0.2000153904771624, 'subsample': 0.7, 'colsample_bytree': 0.05, 'max_depth': 1, 'gamma': 0.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 3.103398623621341e-05, 'reg_alpha': 9.844850473309257e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.16574819124485674}. Best is trial 41 with value: 0.29233013259730867.[0m


Number of boosting rounds: 2459


[32m[I 2023-03-18 18:55:16,493][0m Trial 44 finished with value: 0.3349310390036846 and parameters: {'use_label_encoder': False, 'n_estimators': 3600, 'learning_rate': 0.1758829186604678, 'subsample': 0.85, 'colsample_bytree': 0.32, 'max_depth': 3, 'gamma': 92.7, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 4.349266313629217e-06, 'reg_alpha': 2.7405536425067525e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.24310160483635238}. Best is trial 41 with value: 0.29233013259730867.[0m


Number of boosting rounds: 750


[32m[I 2023-03-18 18:56:12,881][0m Trial 45 finished with value: 0.30995819120231627 and parameters: {'use_label_encoder': False, 'n_estimators': 3900, 'learning_rate': 0.20995291468284244, 'subsample': 0.87, 'colsample_bytree': 0.26, 'max_depth': 1, 'gamma': 12.200000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.224779372764346e-06, 'reg_alpha': 2.479185252627233e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.49891151938859624}. Best is trial 41 with value: 0.29233013259730867.[0m


Number of boosting rounds: 1947


[32m[I 2023-03-18 18:56:54,694][0m Trial 46 finished with value: 0.309958225210373 and parameters: {'use_label_encoder': False, 'n_estimators': 2900, 'learning_rate': 0.15161997133961821, 'subsample': 0.76, 'colsample_bytree': 0.42, 'max_depth': 2, 'gamma': 3.1, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 5.259918653666244e-07, 'reg_alpha': 1.3183279455665068e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.14600652356082908}. Best is trial 41 with value: 0.29233013259730867.[0m


Number of boosting rounds: 609


[32m[I 2023-03-18 18:57:43,152][0m Trial 47 finished with value: 0.3217102076002835 and parameters: {'use_label_encoder': False, 'n_estimators': 3300, 'learning_rate': 0.17640379709049728, 'subsample': 0.6, 'colsample_bytree': 0.33, 'max_depth': 1, 'gamma': 19.400000000000002, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.3202715301714452e-07, 'reg_alpha': 1.0203970452472873e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.7635417475262821}. Best is trial 41 with value: 0.29233013259730867.[0m


Number of boosting rounds: 3267


[32m[I 2023-03-18 18:58:36,655][0m Trial 48 finished with value: 0.34962095747697347 and parameters: {'use_label_encoder': False, 'n_estimators': 3700, 'learning_rate': 0.20764445865217518, 'subsample': 0.45000000000000007, 'colsample_bytree': 0.2, 'max_depth': 5, 'gamma': 76.9, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.2706521225089424e-05, 'reg_alpha': 5.0778443023134156e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.38594271663840335}. Best is trial 41 with value: 0.29233013259730867.[0m


Number of boosting rounds: 3545


[32m[I 2023-03-18 18:59:31,563][0m Trial 49 finished with value: 0.2996751938581233 and parameters: {'use_label_encoder': False, 'n_estimators': 3400, 'learning_rate': 0.1330302483292728, 'subsample': 0.56, 'colsample_bytree': 0.39, 'max_depth': 3, 'gamma': 0.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 4.455972698561717e-05, 'reg_alpha': 1.78090110574376e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.2163028547747177}. Best is trial 41 with value: 0.29233013259730867.[0m
[32m[I 2023-03-18 18:59:31,582][0m A new study created in memory with name: LGBM minimize[0m


Number of boosting rounds: 163
Number of finished trials: 50
Best XGB trial parameters: {'use_label_encoder': False, 'n_estimators': 3200, 'learning_rate': 0.2061163297603283, 'subsample': 0.84, 'colsample_bytree': 0.31, 'max_depth': 1, 'gamma': 0.30000000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 8.765191504372664e-06, 'reg_alpha': 1.1302447998256317e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.6150849036119327}
Best score: 0.29233013259730867
Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0130388	valid_1's binary_logloss: 0.0332659
Early stopping, best iteration is:
[237]	training's binary_logloss: 0.02092	valid_1's binary_logloss: 0.0316433


[32m[I 2023-03-18 18:59:46,296][0m Trial 0 finished with value: 0.9648099477692607 and parameters: {'n_estimators': 804, 'reg_alpha': 0.8305406214360278, 'reg_lambda': 0.003946424510354089, 'colsample_bytree': 0.18, 'num_leaves': 879, 'feature_fraction': 0.15243578987284312, 'bagging_fraction': 0.4950120034498947, 'bagging_freq': 13, 'min_child_samples': 209, 'subsample': 0.65, 'learning_rate': 0.06430330662880626, 'max_depth': 31, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 0.9648099477692607.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.00975568	valid_1's binary_logloss: 0.0338591
Early stopping, best iteration is:
[273]	training's binary_logloss: 0.0190743	valid_1's binary_logloss: 0.0306528


[32m[I 2023-03-18 19:00:18,104][0m Trial 1 finished with value: 0.9657511242398489 and parameters: {'n_estimators': 968, 'reg_alpha': 4.918133678460707e-07, 'reg_lambda': 3.0515069936763744e-08, 'colsample_bytree': 0.9900000000000001, 'num_leaves': 852, 'feature_fraction': 0.40150261852448854, 'bagging_fraction': 0.3998363980725649, 'bagging_freq': 7, 'min_child_samples': 80, 'subsample': 0.26, 'learning_rate': 0.02322041768296521, 'max_depth': 33, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 0.9648099477692607.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.00794753	valid_1's binary_logloss: 0.0325162
Early stopping, best iteration is:
[193]	training's binary_logloss: 0.0218099	valid_1's binary_logloss: 0.0296831


[32m[I 2023-03-18 19:00:41,888][0m Trial 2 finished with value: 0.9668340800761321 and parameters: {'n_estimators': 887, 'reg_alpha': 3.036578716446601e-07, 'reg_lambda': 2.6838404522816997, 'colsample_bytree': 0.5700000000000001, 'num_leaves': 221, 'feature_fraction': 0.7755951551184177, 'bagging_fraction': 0.8461623460668595, 'bagging_freq': 3, 'min_child_samples': 178, 'subsample': 0.35, 'learning_rate': 0.03437945265370172, 'max_depth': 63, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 0.9648099477692607.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0273284	valid_1's binary_logloss: 0.0309922
Did not meet early stopping. Best iteration is:
[756]	training's binary_logloss: 0.0246516	valid_1's binary_logloss: 0.0305303


[32m[I 2023-03-18 19:00:52,044][0m Trial 3 finished with value: 0.9655158301222018 and parameters: {'n_estimators': 756, 'reg_alpha': 3.4428171732307504, 'reg_lambda': 0.0002053566613049041, 'colsample_bytree': 0.22999999999999998, 'num_leaves': 953, 'feature_fraction': 0.13811172729653698, 'bagging_fraction': 0.5324014119163393, 'bagging_freq': 15, 'min_child_samples': 63, 'subsample': 0.37, 'learning_rate': 0.036188830192647695, 'max_depth': 86, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 0.9648099477692607.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.00043967	valid_1's binary_logloss: 0.048195


[32m[I 2023-03-18 19:01:14,196][0m Trial 4 finished with value: 0.9658695249175557 and parameters: {'n_estimators': 890, 'reg_alpha': 0.06703911477250944, 'reg_lambda': 0.00048150598130618997, 'colsample_bytree': 0.93, 'num_leaves': 940, 'feature_fraction': 0.5451192632257392, 'bagging_fraction': 0.8502076176223422, 'bagging_freq': 11, 'min_child_samples': 158, 'subsample': 0.14, 'learning_rate': 0.07326111775614524, 'max_depth': 57, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 0.9648099477692607.[0m


Early stopping, best iteration is:
[82]	training's binary_logloss: 0.0180044	valid_1's binary_logloss: 0.0303005
Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0260743	valid_1's binary_logloss: 0.0301686
Did not meet early stopping. Best iteration is:
[893]	training's binary_logloss: 0.0208487	valid_1's binary_logloss: 0.0310684


[32m[I 2023-03-18 19:01:26,946][0m Trial 5 finished with value: 0.9665067784442997 and parameters: {'n_estimators': 894, 'reg_alpha': 1.1925490198781947e-06, 'reg_lambda': 1.1160746310951128e-07, 'colsample_bytree': 0.38, 'num_leaves': 838, 'feature_fraction': 0.6873156472300587, 'bagging_fraction': 0.1535960669355493, 'bagging_freq': 8, 'min_child_samples': 289, 'subsample': 0.41000000000000003, 'learning_rate': 0.019640924268705652, 'max_depth': 21, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 0.9648099477692607.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0290623	valid_1's binary_logloss: 0.0297865
Did not meet early stopping. Best iteration is:
[848]	training's binary_logloss: 0.0267128	valid_1's binary_logloss: 0.029723


[32m[I 2023-03-18 19:01:35,380][0m Trial 6 finished with value: 0.9672126607972408 and parameters: {'n_estimators': 848, 'reg_alpha': 2.3874088470640955, 'reg_lambda': 2.5769144282852295e-06, 'colsample_bytree': 0.33999999999999997, 'num_leaves': 782, 'feature_fraction': 0.4173996933356826, 'bagging_fraction': 0.14713776436089915, 'bagging_freq': 9, 'min_child_samples': 233, 'subsample': 0.65, 'learning_rate': 0.026139338695122763, 'max_depth': 67, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 0.9648099477692607.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.000878848	valid_1's binary_logloss: 0.0506543


[32m[I 2023-03-18 19:01:58,052][0m Trial 7 finished with value: 0.9659879255952626 and parameters: {'n_estimators': 917, 'reg_alpha': 0.0008328844723412691, 'reg_lambda': 7.708048063235953e-05, 'colsample_bytree': 0.25, 'num_leaves': 644, 'feature_fraction': 0.778185588925279, 'bagging_fraction': 0.7113418229836075, 'bagging_freq': 11, 'min_child_samples': 298, 'subsample': 0.7, 'learning_rate': 0.06979673975145949, 'max_depth': 92, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 0.9648099477692607.[0m


Early stopping, best iteration is:
[91]	training's binary_logloss: 0.0221047	valid_1's binary_logloss: 0.0297093
Training until validation scores don't improve for 500 rounds


[32m[I 2023-03-18 19:02:09,009][0m Trial 8 finished with value: 0.9622465984008977 and parameters: {'n_estimators': 838, 'reg_alpha': 2.4074435535826917e-06, 'reg_lambda': 0.0015252493670680537, 'colsample_bytree': 0.52, 'num_leaves': 612, 'feature_fraction': 0.4913361164982629, 'bagging_fraction': 0.3543718772335924, 'bagging_freq': 12, 'min_child_samples': 21, 'subsample': 0.64, 'learning_rate': 0.208558193748459, 'max_depth': 54, 'random_state': 42, 'n_jobs': 4}. Best is trial 8 with value: 0.9622465984008977.[0m


[500]	training's binary_logloss: 0.408003	valid_1's binary_logloss: 0.368986
Early stopping, best iteration is:
[22]	training's binary_logloss: 0.0237029	valid_1's binary_logloss: 0.0345948
Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.237759	valid_1's binary_logloss: 0.311312


[32m[I 2023-03-18 19:02:25,813][0m Trial 9 finished with value: 0.9663634918408378 and parameters: {'n_estimators': 874, 'reg_alpha': 2.8930271458223527e-06, 'reg_lambda': 1.1680025406878992e-07, 'colsample_bytree': 0.6900000000000001, 'num_leaves': 270, 'feature_fraction': 0.7570193400655486, 'bagging_fraction': 0.5322714108604055, 'bagging_freq': 13, 'min_child_samples': 177, 'subsample': 0.26, 'learning_rate': 0.14828739419657955, 'max_depth': 50, 'random_state': 42, 'n_jobs': 4}. Best is trial 8 with value: 0.9622465984008977.[0m


Early stopping, best iteration is:
[33]	training's binary_logloss: 0.0265593	valid_1's binary_logloss: 0.0304072
Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 6.4499e-06	valid_1's binary_logloss: 0.0739203


[32m[I 2023-03-18 19:02:47,388][0m Trial 10 finished with value: 0.9661048190352027 and parameters: {'n_estimators': 749, 'reg_alpha': 2.1118248281554526e-08, 'reg_lambda': 0.05947316786042752, 'colsample_bytree': 0.7100000000000001, 'num_leaves': 439, 'feature_fraction': 0.8937262843237757, 'bagging_fraction': 0.9979345046922834, 'bagging_freq': 1, 'min_child_samples': 24, 'subsample': 0.99, 'learning_rate': 0.24263940152561878, 'max_depth': 13, 'random_state': 42, 'n_jobs': 4}. Best is trial 8 with value: 0.9622465984008977.[0m


Early stopping, best iteration is:
[20]	training's binary_logloss: 0.00953458	valid_1's binary_logloss: 0.0328392
Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0427079	valid_1's binary_logloss: 0.0430362
Did not meet early stopping. Best iteration is:
[798]	training's binary_logloss: 0.0344852	valid_1's binary_logloss: 0.0381009


[32m[I 2023-03-18 19:03:22,885][0m Trial 11 finished with value: 0.9548808374521081 and parameters: {'n_estimators': 798, 'reg_alpha': 0.00010053094309084846, 'reg_lambda': 0.009444459928438837, 'colsample_bytree': 0.07, 'num_leaves': 573, 'feature_fraction': 0.10685950870898792, 'bagging_fraction': 0.3586119348119363, 'bagging_freq': 15, 'min_child_samples': 92, 'subsample': 0.75, 'learning_rate': 0.010848334364235412, 'max_depth': 34, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0249532	valid_1's binary_logloss: 0.0306357
Did not meet early stopping. Best iteration is:
[824]	training's binary_logloss: 0.0162556	valid_1's binary_logloss: 0.0305394


[32m[I 2023-03-18 19:03:44,466][0m Trial 12 finished with value: 0.9659162822935318 and parameters: {'n_estimators': 824, 'reg_alpha': 2.9263625607329874e-05, 'reg_lambda': 0.018602870679466768, 'colsample_bytree': 0.11, 'num_leaves': 507, 'feature_fraction': 0.2531446005360004, 'bagging_fraction': 0.3341117295877568, 'bagging_freq': 15, 'min_child_samples': 88, 'subsample': 0.87, 'learning_rate': 0.01087096040736058, 'max_depth': 42, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0375512	valid_1's binary_logloss: 0.034906
Did not meet early stopping. Best iteration is:
[707]	training's binary_logloss: 0.0351434	valid_1's binary_logloss: 0.0329462


[32m[I 2023-03-18 19:03:48,819][0m Trial 13 finished with value: 0.9614223153702497 and parameters: {'n_estimators': 707, 'reg_alpha': 0.0002867922555589, 'reg_lambda': 0.48644941025019905, 'colsample_bytree': 0.48, 'num_leaves': 619, 'feature_fraction': 0.34894100856948795, 'bagging_fraction': 0.3003934456925522, 'bagging_freq': 5, 'min_child_samples': 6, 'subsample': 0.8, 'learning_rate': 0.011400982804059896, 'max_depth': 2, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0403159	valid_1's binary_logloss: 0.0370059


[32m[I 2023-03-18 19:03:51,713][0m Trial 14 finished with value: 0.9622933557768737 and parameters: {'n_estimators': 702, 'reg_alpha': 0.0007221552071814783, 'reg_lambda': 0.4910589006508737, 'colsample_bytree': 0.08, 'num_leaves': 50, 'feature_fraction': 0.2841442750549987, 'bagging_fraction': 0.27521458275456295, 'bagging_freq': 5, 'min_child_samples': 115, 'subsample': 0.89, 'learning_rate': 0.010775727213762907, 'max_depth': 1, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Did not meet early stopping. Best iteration is:
[702]	training's binary_logloss: 0.037759	valid_1's binary_logloss: 0.0344031
Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0360929	valid_1's binary_logloss: 0.0332016
Did not meet early stopping. Best iteration is:
[704]	training's binary_logloss: 0.0344569	valid_1's binary_logloss: 0.0319126


[32m[I 2023-03-18 19:03:55,274][0m Trial 15 finished with value: 0.9630693741937791 and parameters: {'n_estimators': 704, 'reg_alpha': 0.00012772887173110366, 'reg_lambda': 7.913432951756149, 'colsample_bytree': 0.45, 'num_leaves': 673, 'feature_fraction': 0.2731963235422886, 'bagging_fraction': 0.2380797629184362, 'bagging_freq': 5, 'min_child_samples': 2, 'subsample': 0.75, 'learning_rate': 0.015232213884635118, 'max_depth': 2, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0379855	valid_1's binary_logloss: 0.0394873
Did not meet early stopping. Best iteration is:
[777]	training's binary_logloss: 0.0320094	valid_1's binary_logloss: 0.0370156


[32m[I 2023-03-18 19:04:24,806][0m Trial 16 finished with value: 0.9564811388996615 and parameters: {'n_estimators': 777, 'reg_alpha': 0.002729495860975144, 'reg_lambda': 0.07784902406270457, 'colsample_bytree': 0.77, 'num_leaves': 426, 'feature_fraction': 0.13228544703201875, 'bagging_fraction': 0.41964766785404745, 'bagging_freq': 5, 'min_child_samples': 123, 'subsample': 0.52, 'learning_rate': 0.014116435738807933, 'max_depth': 16, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0355533	valid_1's binary_logloss: 0.038344
Did not meet early stopping. Best iteration is:
[784]	training's binary_logloss: 0.0298651	valid_1's binary_logloss: 0.0371142


[32m[I 2023-03-18 19:04:58,943][0m Trial 17 finished with value: 0.9561757087180501 and parameters: {'n_estimators': 784, 'reg_alpha': 0.006434366660671602, 'reg_lambda': 0.04005623340869719, 'colsample_bytree': 0.8600000000000001, 'num_leaves': 360, 'feature_fraction': 0.10816749125810318, 'bagging_fraction': 0.4326982300702274, 'bagging_freq': 1, 'min_child_samples': 128, 'subsample': 0.5, 'learning_rate': 0.01625201634008682, 'max_depth': 28, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0138602	valid_1's binary_logloss: 0.0342545
Did not meet early stopping. Best iteration is:
[791]	training's binary_logloss: 0.00685229	valid_1's binary_logloss: 0.0366443


[32m[I 2023-03-18 19:05:47,030][0m Trial 18 finished with value: 0.9611621353268478 and parameters: {'n_estimators': 791, 'reg_alpha': 0.011242007835553604, 'reg_lambda': 0.008489923511701815, 'colsample_bytree': 0.8500000000000001, 'num_leaves': 274, 'feature_fraction': 0.20966379776620325, 'bagging_fraction': 0.6204928842433639, 'bagging_freq': 0, 'min_child_samples': 123, 'subsample': 0.52, 'learning_rate': 0.017366285170893888, 'max_depth': 31, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0371009	valid_1's binary_logloss: 0.0390837
Did not meet early stopping. Best iteration is:
[745]	training's binary_logloss: 0.0315892	valid_1's binary_logloss: 0.0371967


[32m[I 2023-03-18 19:06:13,965][0m Trial 19 finished with value: 0.9567164330173086 and parameters: {'n_estimators': 745, 'reg_alpha': 0.02872066632063517, 'reg_lambda': 6.479693130020139e-05, 'colsample_bytree': 0.5800000000000001, 'num_leaves': 116, 'feature_fraction': 0.10276477151277441, 'bagging_fraction': 0.4469072146170615, 'bagging_freq': 2, 'min_child_samples': 60, 'subsample': 0.54, 'learning_rate': 0.014878384651004054, 'max_depth': 74, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0213081	valid_1's binary_logloss: 0.0327965
Did not meet early stopping. Best iteration is:
[815]	training's binary_logloss: 0.0144903	valid_1's binary_logloss: 0.0342094


[32m[I 2023-03-18 19:06:33,491][0m Trial 20 finished with value: 0.9647631903932845 and parameters: {'n_estimators': 815, 'reg_alpha': 3.0301894109908155e-05, 'reg_lambda': 0.0018995632689702564, 'colsample_bytree': 0.8400000000000001, 'num_leaves': 502, 'feature_fraction': 0.2102508263745062, 'bagging_fraction': 0.22205517664294278, 'bagging_freq': 3, 'min_child_samples': 101, 'subsample': 0.45999999999999996, 'learning_rate': 0.022152508297231574, 'max_depth': 42, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 0.9548808374521081.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0448556	valid_1's binary_logloss: 0.044509
Did not meet early stopping. Best iteration is:
[776]	training's binary_logloss: 0.0365589	valid_1's binary_logloss: 0.038616


[32m[I 2023-03-18 19:07:02,591][0m Trial 21 finished with value: 0.954198333787155 and parameters: {'n_estimators': 776, 'reg_alpha': 0.00267006626029069, 'reg_lambda': 0.07780500169863878, 'colsample_bytree': 0.7100000000000001, 'num_leaves': 377, 'feature_fraction': 0.12270301761718339, 'bagging_fraction': 0.4152464820545878, 'bagging_freq': 7, 'min_child_samples': 136, 'subsample': 0.55, 'learning_rate': 0.010026076674082804, 'max_depth': 18, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0440752	valid_1's binary_logloss: 0.0436192
Did not meet early stopping. Best iteration is:
[776]	training's binary_logloss: 0.0362112	valid_1's binary_logloss: 0.0381218


[32m[I 2023-03-18 19:07:28,957][0m Trial 22 finished with value: 0.9546923007104371 and parameters: {'n_estimators': 776, 'reg_alpha': 0.0046376791297535625, 'reg_lambda': 0.043958070861213235, 'colsample_bytree': 0.66, 'num_leaves': 379, 'feature_fraction': 0.1114270071521573, 'bagging_fraction': 0.38045284119728, 'bagging_freq': 9, 'min_child_samples': 140, 'subsample': 0.62, 'learning_rate': 0.010485538905334359, 'max_depth': 23, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0346581	valid_1's binary_logloss: 0.0358056
Did not meet early stopping. Best iteration is:
[764]	training's binary_logloss: 0.0274392	valid_1's binary_logloss: 0.0325878


[32m[I 2023-03-18 19:07:46,113][0m Trial 23 finished with value: 0.96205655442146 and parameters: {'n_estimators': 764, 'reg_alpha': 0.12206063397516732, 'reg_lambda': 0.2405583111965491, 'colsample_bytree': 0.6200000000000001, 'num_leaves': 353, 'feature_fraction': 0.21961095790511173, 'bagging_fraction': 0.3435637745905047, 'bagging_freq': 9, 'min_child_samples': 148, 'subsample': 0.58, 'learning_rate': 0.010375170443994951, 'max_depth': 20, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0255065	valid_1's binary_logloss: 0.0302442
Did not meet early stopping. Best iteration is:
[734]	training's binary_logloss: 0.0193238	valid_1's binary_logloss: 0.0298795


[32m[I 2023-03-18 19:08:02,680][0m Trial 24 finished with value: 0.9653521793062856 and parameters: {'n_estimators': 734, 'reg_alpha': 0.00218344174599274, 'reg_lambda': 0.01393529757801075, 'colsample_bytree': 0.7000000000000001, 'num_leaves': 545, 'feature_fraction': 0.3087290013274031, 'bagging_fraction': 0.38900085126685124, 'bagging_freq': 7, 'min_child_samples': 198, 'subsample': 0.77, 'learning_rate': 0.013129229096822374, 'max_depth': 42, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0336369	valid_1's binary_logloss: 0.0356354
Did not meet early stopping. Best iteration is:
[803]	training's binary_logloss: 0.0260334	valid_1's binary_logloss: 0.0323522


[32m[I 2023-03-18 19:08:19,292][0m Trial 25 finished with value: 0.9632330250096954 and parameters: {'n_estimators': 803, 'reg_alpha': 0.012434503539600815, 'reg_lambda': 0.19394853258409298, 'colsample_bytree': 0.66, 'num_leaves': 736, 'feature_fraction': 0.19778665685941035, 'bagging_fraction': 0.4812728623146083, 'bagging_freq': 10, 'min_child_samples': 150, 'subsample': 0.6, 'learning_rate': 0.010321265321781174, 'max_depth': 10, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0315379	valid_1's binary_logloss: 0.0339364
Did not meet early stopping. Best iteration is:
[724]	training's binary_logloss: 0.0265857	valid_1's binary_logloss: 0.0320265


[32m[I 2023-03-18 19:08:33,737][0m Trial 26 finished with value: 0.9627390580864131 and parameters: {'n_estimators': 724, 'reg_alpha': 0.00011994839319163008, 'reg_lambda': 1.287977525934619, 'colsample_bytree': 0.78, 'num_leaves': 187, 'feature_fraction': 0.17701349294871077, 'bagging_fraction': 0.22868242471085856, 'bagging_freq': 14, 'min_child_samples': 53, 'subsample': 0.88, 'learning_rate': 0.013825490007022686, 'max_depth': 21, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0169109	valid_1's binary_logloss: 0.0305331
Did not meet early stopping. Best iteration is:
[778]	training's binary_logloss: 0.0106581	valid_1's binary_logloss: 0.0325962


[32m[I 2023-03-18 19:08:55,623][0m Trial 27 finished with value: 0.9659162822935318 and parameters: {'n_estimators': 778, 'reg_alpha': 0.001840346337329256, 'reg_lambda': 0.05379889486065072, 'colsample_bytree': 0.4, 'num_leaves': 390, 'feature_fraction': 0.3285466650772219, 'bagging_fraction': 0.5991401158586653, 'bagging_freq': 6, 'min_child_samples': 253, 'subsample': 0.7, 'learning_rate': 0.018756431127344715, 'max_depth': 39, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0412562	valid_1's binary_logloss: 0.0404207
Did not meet early stopping. Best iteration is:
[829]	training's binary_logloss: 0.0345272	valid_1's binary_logloss: 0.036199


[32m[I 2023-03-18 19:09:12,380][0m Trial 28 finished with value: 0.9577511242398489 and parameters: {'n_estimators': 829, 'reg_alpha': 0.15006296106941783, 'reg_lambda': 0.0060679340101045375, 'colsample_bytree': 0.33, 'num_leaves': 568, 'feature_fraction': 0.10131810839747236, 'bagging_fraction': 0.309867791226024, 'bagging_freq': 9, 'min_child_samples': 97, 'subsample': 0.82, 'learning_rate': 0.01264702059708043, 'max_depth': 9, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0370729	valid_1's binary_logloss: 0.036522
Did not meet early stopping. Best iteration is:
[858]	training's binary_logloss: 0.0289906	valid_1's binary_logloss: 0.0325256


[32m[I 2023-03-18 19:09:27,689][0m Trial 29 finished with value: 0.9627156793984251 and parameters: {'n_estimators': 858, 'reg_alpha': 0.8811720465549717, 'reg_lambda': 0.0027981214041948622, 'colsample_bytree': 0.77, 'num_leaves': 321, 'feature_fraction': 0.17539545274521456, 'bagging_fraction': 0.388295878092471, 'bagging_freq': 13, 'min_child_samples': 178, 'subsample': 0.69, 'learning_rate': 0.01001211936203349, 'max_depth': 25, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0230404	valid_1's binary_logloss: 0.0297705
Did not meet early stopping. Best iteration is:
[806]	training's binary_logloss: 0.0160416	valid_1's binary_logloss: 0.030019


[32m[I 2023-03-18 19:09:45,345][0m Trial 30 finished with value: 0.9660580616592266 and parameters: {'n_estimators': 806, 'reg_alpha': 0.3351206745892885, 'reg_lambda': 0.0128240702689618, 'colsample_bytree': 0.16, 'num_leaves': 451, 'feature_fraction': 0.23677146891425904, 'bagging_fraction': 0.47765143618824835, 'bagging_freq': 11, 'min_child_samples': 221, 'subsample': 0.96, 'learning_rate': 0.016078045254453426, 'max_depth': 36, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0384625	valid_1's binary_logloss: 0.0399107
Did not meet early stopping. Best iteration is:
[782]	training's binary_logloss: 0.0319963	valid_1's binary_logloss: 0.0372869


[32m[I 2023-03-18 19:10:18,682][0m Trial 31 finished with value: 0.9560339293523553 and parameters: {'n_estimators': 782, 'reg_alpha': 0.004879844208007229, 'reg_lambda': 0.04786393301103769, 'colsample_bytree': 0.89, 'num_leaves': 353, 'feature_fraction': 0.10640756166115845, 'bagging_fraction': 0.4485575192192344, 'bagging_freq': 3, 'min_child_samples': 135, 'subsample': 0.45999999999999996, 'learning_rate': 0.013564802212055473, 'max_depth': 28, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0285117	valid_1's binary_logloss: 0.0342751
Did not meet early stopping. Best iteration is:
[799]	training's binary_logloss: 0.0195166	valid_1's binary_logloss: 0.0327257


[32m[I 2023-03-18 19:10:42,062][0m Trial 32 finished with value: 0.9625739000327301 and parameters: {'n_estimators': 799, 'reg_alpha': 0.03496908112084477, 'reg_lambda': 0.15169961902885123, 'colsample_bytree': 1.0, 'num_leaves': 306, 'feature_fraction': 0.16100143611870954, 'bagging_fraction': 0.46987471333923303, 'bagging_freq': 3, 'min_child_samples': 137, 'subsample': 0.45000000000000007, 'learning_rate': 0.012312560659687525, 'max_depth': 27, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0294407	valid_1's binary_logloss: 0.0339668
Did not meet early stopping. Best iteration is:
[767]	training's binary_logloss: 0.0217731	valid_1's binary_logloss: 0.0323376


[32m[I 2023-03-18 19:11:04,008][0m Trial 33 finished with value: 0.9627858154623892 and parameters: {'n_estimators': 767, 'reg_alpha': 0.003935801368244999, 'reg_lambda': 0.03688417649766689, 'colsample_bytree': 0.92, 'num_leaves': 185, 'feature_fraction': 0.1652459974491297, 'bagging_fraction': 0.3705039522650686, 'bagging_freq': 4, 'min_child_samples': 166, 'subsample': 0.31, 'learning_rate': 0.013041664877373642, 'max_depth': 33, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0348263	valid_1's binary_logloss: 0.0366593
Did not meet early stopping. Best iteration is:
[960]	training's binary_logloss: 0.0284049	valid_1's binary_logloss: 0.0357998


[32m[I 2023-03-18 19:11:33,850][0m Trial 34 finished with value: 0.9578227675415798 and parameters: {'n_estimators': 960, 'reg_alpha': 0.015790862552368924, 'reg_lambda': 1.0164293671891116, 'colsample_bytree': 0.5900000000000001, 'num_leaves': 399, 'feature_fraction': 0.10102373926941603, 'bagging_fraction': 0.4100460519286179, 'bagging_freq': 8, 'min_child_samples': 78, 'subsample': 0.61, 'learning_rate': 0.020151702539021186, 'max_depth': 17, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.010148	valid_1's binary_logloss: 0.031621
Did not meet early stopping. Best iteration is:
[723]	training's binary_logloss: 0.00618949	valid_1's binary_logloss: 0.0342134


[32m[I 2023-03-18 19:11:58,174][0m Trial 35 finished with value: 0.9647631903932845 and parameters: {'n_estimators': 723, 'reg_alpha': 0.0007185195725213335, 'reg_lambda': 0.1475181271031238, 'colsample_bytree': 0.92, 'num_leaves': 231, 'feature_fraction': 0.2615233282608617, 'bagging_fraction': 0.524765011980009, 'bagging_freq': 6, 'min_child_samples': 108, 'subsample': 0.13, 'learning_rate': 0.02402877348508536, 'max_depth': 25, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0241896	valid_1's binary_logloss: 0.0328611
[1000]	training's binary_logloss: 0.0135917	valid_1's binary_logloss: 0.0339353
Did not meet early stopping. Best iteration is:
[1000]	training's binary_logloss: 0.0135917	valid_1's binary_logloss: 0.0339353


[32m[I 2023-03-18 19:12:25,491][0m Trial 36 finished with value: 0.9630678669560123 and parameters: {'n_estimators': 1000, 'reg_alpha': 0.006290465346339599, 'reg_lambda': 0.005271690675302556, 'colsample_bytree': 0.52, 'num_leaves': 460, 'feature_fraction': 0.16971436399177114, 'bagging_fraction': 0.4347248024834367, 'bagging_freq': 14, 'min_child_samples': 194, 'subsample': 0.39, 'learning_rate': 0.017562441256050085, 'max_depth': 50, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0291214	valid_1's binary_logloss: 0.0298725
Did not meet early stopping. Best iteration is:
[765]	training's binary_logloss: 0.0271074	valid_1's binary_logloss: 0.0298069


[32m[I 2023-03-18 19:12:32,928][0m Trial 37 finished with value: 0.9655640947359445 and parameters: {'n_estimators': 765, 'reg_alpha': 0.056006449207369184, 'reg_lambda': 2.6398889240413843, 'colsample_bytree': 0.64, 'num_leaves': 562, 'feature_fraction': 0.3574198564486943, 'bagging_fraction': 0.10021928483239972, 'bagging_freq': 7, 'min_child_samples': 138, 'subsample': 0.47, 'learning_rate': 0.028721041532931833, 'max_depth': 46, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0257055	valid_1's binary_logloss: 0.0338918
Did not meet early stopping. Best iteration is:
[741]	training's binary_logloss: 0.0181132	valid_1's binary_logloss: 0.0324573


[32m[I 2023-03-18 19:12:57,289][0m Trial 38 finished with value: 0.9625972787207182 and parameters: {'n_estimators': 741, 'reg_alpha': 0.001472813920732536, 'reg_lambda': 0.022838050723982286, 'colsample_bytree': 0.7400000000000001, 'num_leaves': 111, 'feature_fraction': 0.16265395285937798, 'bagging_fraction': 0.5058568923028571, 'bagging_freq': 10, 'min_child_samples': 77, 'subsample': 0.33999999999999997, 'learning_rate': 0.012507187499538027, 'max_depth': 35, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0195885	valid_1's binary_logloss: 0.0295019
Did not meet early stopping. Best iteration is:
[818]	training's binary_logloss: 0.0139548	valid_1's binary_logloss: 0.0309234


[32m[I 2023-03-18 19:13:12,978][0m Trial 39 finished with value: 0.9657058741016395 and parameters: {'n_estimators': 818, 'reg_alpha': 0.00033955567717785514, 'reg_lambda': 0.0015141441865456897, 'colsample_bytree': 0.8200000000000001, 'num_leaves': 685, 'feature_fraction': 0.409753356475209, 'bagging_fraction': 0.35691628235852546, 'bagging_freq': 4, 'min_child_samples': 157, 'subsample': 0.66, 'learning_rate': 0.021766838537950636, 'max_depth': 9, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.00743522	valid_1's binary_logloss: 0.0375599
Early stopping, best iteration is:
[206]	training's binary_logloss: 0.0195657	valid_1's binary_logloss: 0.0327371


[32m[I 2023-03-18 19:13:41,634][0m Trial 40 finished with value: 0.9632096463217072 and parameters: {'n_estimators': 852, 'reg_alpha': 0.02370808470084727, 'reg_lambda': 0.00040763423451957095, 'colsample_bytree': 0.9500000000000001, 'num_leaves': 494, 'feature_fraction': 0.13701048347179517, 'bagging_fraction': 0.5814911652774795, 'bagging_freq': 12, 'min_child_samples': 166, 'subsample': 0.58, 'learning_rate': 0.04556244606698851, 'max_depth': 22, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0354124	valid_1's binary_logloss: 0.0384901
Did not meet early stopping. Best iteration is:
[784]	training's binary_logloss: 0.0294743	valid_1's binary_logloss: 0.0375327


[32m[I 2023-03-18 19:14:22,836][0m Trial 41 finished with value: 0.9559170359124152 and parameters: {'n_estimators': 784, 'reg_alpha': 0.005357971189123415, 'reg_lambda': 0.04193579687926157, 'colsample_bytree': 0.8200000000000001, 'num_leaves': 339, 'feature_fraction': 0.12186002859900684, 'bagging_fraction': 0.4317788560867747, 'bagging_freq': 0, 'min_child_samples': 127, 'subsample': 0.5, 'learning_rate': 0.01617409410808378, 'max_depth': 27, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0109956	valid_1's binary_logloss: 0.0309948
Did not meet early stopping. Best iteration is:
[784]	training's binary_logloss: 0.00504148	valid_1's binary_logloss: 0.0335335


[32m[I 2023-03-18 19:15:00,140][0m Trial 42 finished with value: 0.9637036132449894 and parameters: {'n_estimators': 784, 'reg_alpha': 0.005293190285000351, 'reg_lambda': 0.10398932997004628, 'colsample_bytree': 0.89, 'num_leaves': 354, 'feature_fraction': 0.2399062179561619, 'bagging_fraction': 0.41545195956395203, 'bagging_freq': 0, 'min_child_samples': 108, 'subsample': 0.44000000000000006, 'learning_rate': 0.015039099927524724, 'max_depth': 31, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0286506	valid_1's binary_logloss: 0.0343227
Did not meet early stopping. Best iteration is:
[753]	training's binary_logloss: 0.0209087	valid_1's binary_logloss: 0.0324909


[32m[I 2023-03-18 19:15:26,520][0m Trial 43 finished with value: 0.9626440360966942 and parameters: {'n_estimators': 753, 'reg_alpha': 0.00967129259385173, 'reg_lambda': 0.028920567752329627, 'colsample_bytree': 0.81, 'num_leaves': 287, 'feature_fraction': 0.13868992803487784, 'bagging_fraction': 0.4732936432400193, 'bagging_freq': 2, 'min_child_samples': 137, 'subsample': 0.56, 'learning_rate': 0.012016120007053837, 'max_depth': 15, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0127218	valid_1's binary_logloss: 0.0356348
Did not meet early stopping. Best iteration is:
[792]	training's binary_logloss: 0.00650614	valid_1's binary_logloss: 0.0389221


[32m[I 2023-03-18 19:16:04,727][0m Trial 44 finished with value: 0.9591628889457312 and parameters: {'n_estimators': 792, 'reg_alpha': 0.0023465302866509767, 'reg_lambda': 0.007926880249141233, 'colsample_bytree': 0.7300000000000001, 'num_leaves': 394, 'feature_fraction': 0.20302299602597423, 'bagging_fraction': 0.3748640084805917, 'bagging_freq': 1, 'min_child_samples': 39, 'subsample': 0.64, 'learning_rate': 0.018643720146969498, 'max_depth': 57, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0265804	valid_1's binary_logloss: 0.0305405
Did not meet early stopping. Best iteration is:
[768]	training's binary_logloss: 0.0201423	valid_1's binary_logloss: 0.0298423


[32m[I 2023-03-18 19:16:20,685][0m Trial 45 finished with value: 0.9654924514342138 and parameters: {'n_estimators': 768, 'reg_alpha': 0.0008344096117361985, 'reg_lambda': 0.3836165151342877, 'colsample_bytree': 0.27, 'num_leaves': 235, 'feature_fraction': 0.2933846287569618, 'bagging_fraction': 0.31106308737480903, 'bagging_freq': 2, 'min_child_samples': 97, 'subsample': 0.74, 'learning_rate': 0.011568626536310994, 'max_depth': 29, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0403731	valid_1's binary_logloss: 0.0382095
Did not meet early stopping. Best iteration is:
[835]	training's binary_logloss: 0.035034	valid_1's binary_logloss: 0.033658


[32m[I 2023-03-18 19:16:30,230][0m Trial 46 finished with value: 0.9613974294444948 and parameters: {'n_estimators': 835, 'reg_alpha': 6.761002131724677, 'reg_lambda': 0.0687569877970152, 'colsample_bytree': 0.6900000000000001, 'num_leaves': 916, 'feature_fraction': 0.1488314774901163, 'bagging_fraction': 0.5542718658037978, 'bagging_freq': 4, 'min_child_samples': 84, 'subsample': 0.30000000000000004, 'learning_rate': 0.01006560665451728, 'max_depth': 38, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0283468	valid_1's binary_logloss: 0.0333606
Did not meet early stopping. Best iteration is:
[814]	training's binary_logloss: 0.0204468	valid_1's binary_logloss: 0.0324339


[32m[I 2023-03-18 19:16:50,055][0m Trial 47 finished with value: 0.9641508227922954 and parameters: {'n_estimators': 814, 'reg_alpha': 0.062058462906783365, 'reg_lambda': 0.01722884319424159, 'colsample_bytree': 0.9500000000000001, 'num_leaves': 329, 'feature_fraction': 0.22284835285906895, 'bagging_fraction': 0.3419761740387608, 'bagging_freq': 12, 'min_child_samples': 190, 'subsample': 0.19, 'learning_rate': 0.015507786982726297, 'max_depth': 24, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0259932	valid_1's binary_logloss: 0.0298708
Did not meet early stopping. Best iteration is:
[869]	training's binary_logloss: 0.0189658	valid_1's binary_logloss: 0.0298792


[32m[I 2023-03-18 19:17:06,892][0m Trial 48 finished with value: 0.9659396609815197 and parameters: {'n_estimators': 869, 'reg_alpha': 0.0038793372852479026, 'reg_lambda': 0.5570625597274207, 'colsample_bytree': 0.54, 'num_leaves': 461, 'feature_fraction': 0.2550819546853047, 'bagging_fraction': 0.28201234056383023, 'bagging_freq': 6, 'min_child_samples': 119, 'subsample': 0.4, 'learning_rate': 0.013987576176840091, 'max_depth': 97, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's binary_logloss: 0.0293155	valid_1's binary_logloss: 0.0371016
Did not meet early stopping. Best iteration is:
[757]	training's binary_logloss: 0.025393	valid_1's binary_logloss: 0.0384685


[32m[I 2023-03-18 19:17:36,621][0m Trial 49 finished with value: 0.9570467491246744 and parameters: {'n_estimators': 757, 'reg_alpha': 0.02110340911252225, 'reg_lambda': 0.07907173186199823, 'colsample_bytree': 0.8800000000000001, 'num_leaves': 527, 'feature_fraction': 0.1335284578547758, 'bagging_fraction': 0.44983664416739955, 'bagging_freq': 8, 'min_child_samples': 132, 'subsample': 0.64, 'learning_rate': 0.027947042153573538, 'max_depth': 19, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 0.954198333787155.[0m
[32m[I 2023-03-18 19:17:36,638][0m A new study created in memory with name: CatBoost minimize[0m


Number of finished trials: 50
Best LGBM trial parameters: {'n_estimators': 776, 'reg_alpha': 0.00267006626029069, 'reg_lambda': 0.07780500169863878, 'colsample_bytree': 0.7100000000000001, 'num_leaves': 377, 'feature_fraction': 0.12270301761718339, 'bagging_fraction': 0.4152464820545878, 'bagging_freq': 7, 'min_child_samples': 136, 'subsample': 0.55, 'learning_rate': 0.010026076674082804, 'max_depth': 18, 'random_state': 42, 'n_jobs': 4}
Best score: 0.954198333787155


[32m[I 2023-03-18 19:17:37,358][0m Trial 0 finished with value: 0.9631862676337193 and parameters: {'learning_rate': 0.423427542689462, 'l2_leaf_reg': 47.17370094963475, 'bagging_temperature': 0.250253556969293, 'random_strength': 1.5096641742938475, 'depth': 10, 'min_data_in_leaf': 27}. Best is trial 0 with value: 0.9631862676337193.[0m
[32m[I 2023-03-18 19:17:37,648][0m Trial 1 finished with value: 0.9658461462295678 and parameters: {'learning_rate': 0.8267369415861313, 'l2_leaf_reg': 23.274370086940188, 'bagging_temperature': 19.760166407824617, 'random_strength': 1.0905274915412388, 'depth': 6, 'min_data_in_leaf': 33}. Best is trial 0 with value: 0.9631862676337193.[0m
[32m[I 2023-03-18 19:17:37,906][0m Trial 2 finished with value: 0.9604781244241279 and parameters: {'learning_rate': 0.1694373517468646, 'l2_leaf_reg': 6.1611832166983085, 'bagging_temperature': 4.3324267867484, 'random_strength': 1.1353103896919776, 'depth': 3, 'min_data_in_leaf': 236}. Best is trial 2 with 

Number of finished trials: 50
Best Cat trial parameters: {'learning_rate': 0.10130029259586143, 'l2_leaf_reg': 9.722676393707983, 'bagging_temperature': 2.0866209159838576, 'random_strength': 1.843195277758638, 'depth': 1, 'min_data_in_leaf': 298}
Best score: 0.9523597237464211
CPU times: user 3h 8min 11s, sys: 8min 7s, total: 3h 16min 19s
Wall time: 58min 7s


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Train Models with Cross Validation</h1>
</div>

In [34]:
# train = create_folds(train, Config.N_FOLDS)
train = create_strat_folds(train, TARGET, Config.N_FOLDS)

TARGET=Class, n_folds=20, seed=42


In [35]:
all_cv_scores = pd.DataFrame(
    {
        "Model": pd.Series(dtype="str"),
        "Score": pd.Series(dtype="float"),
        "StdDev": pd.Series(dtype="float"),
        "RunTime": pd.Series(dtype="float"),
    }
)

oof = train[[ID, TARGET, "fold"]].copy().reset_index(drop=True).copy()
oof.set_index(ID, inplace=True)
oof.head()

Unnamed: 0_level_0,Class,fold
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,16
1,0,16
2,0,1
3,0,19
4,0,17


In [36]:
def show_tree_model_fi(model, features:List[str]) -> None:
    print("\n=== Model Feature Importance ===")
    for i in model.feature_importances_.argsort()[::-1]:
        print(features[i], model.feature_importances_[i]/model.feature_importances_.sum())

def save_oof_predictions(model_name:str, final_valid_predictions, oof:pd.DataFrame) -> pd.DataFrame:
    final_valid_predictions_df = process_valid_predictions(
        final_valid_predictions, ID, model_name
    )
    display(final_valid_predictions_df.head())
    oof[f"pred_{model_name}"] = final_valid_predictions_df[f"pred_{model_name}"]

    return oof

def save_test_predictions(model_name:str, final_test_predictions, submission_df:pd.DataFrame, result_field:str=TARGET) -> None:
    result = merge_test_predictions(final_test_predictions, Config.calc_probability)
    # result[:20]
    submission_df[f"target_{model_name}"] = result #.astype(int)
    #     submission_df.head(10)
    ss = submission_df[[ID, f"target_{model_name}"]].copy().reset_index(drop=True)
    ss.rename(columns={f"target_{model_name}": result_field}, inplace=True)
    ss.to_csv(
        f"submission_{model_name}.csv", index=False
    )  # Can submit the individual model
    print("=== Target Value Counts ===")
#     display(ss[TARGET].value_counts())
    ss.head(10)

def process_valid_predictions(final_valid_predictions, train_id, model_name:str) -> pd.DataFrame:
    model = f"pred_{model_name}"
    final_valid_predictions_df = pd.DataFrame.from_dict(
        final_valid_predictions, orient="index"
    ).reset_index()
    final_valid_predictions_df.columns = [train_id, model]
    final_valid_predictions_df.set_index(train_id, inplace=True)
    final_valid_predictions_df.sort_index(inplace=True)
    final_valid_predictions_df.to_csv(f"train_pred_{model_name}.csv", index=True)

    return final_valid_predictions_df

def add_score(score_df:pd.DataFrame, model_name:str, score:float, std:float):
    dict1 = {"Model": model_name, "Score": cv_score, "StdDev": std_dev}
    score_df = score_df.append(dict1, ignore_index=True)
    return score_df

In [37]:
def train_cv_model(
    df:pd.DataFrame,
    test:pd.DataFrame,
    get_model_fn,
    FEATURES:List[str],
    TARGET:str,
    calc_probability:bool,
    rowid,
    params,
    n_folds:int=5,
    seed:int=42,
):

    final_test_predictions = []
    final_valid_predictions = {}
    fold_scores = []  # Scores of Validation Set
    feature_importance_lst = []

    test = test[FEATURES].copy()

    for fold in range(n_folds):
        print(10 * "=", f"Fold {fold+1}/{n_folds}", 10 * "=")

        start_time = time.time()

        xtrain = df[df.fold != fold].reset_index(
            drop=True
        )  # Everything not in validation fold
        xvalid = df[df.fold == fold].reset_index(drop=True)
        xtest = test.copy()

        valid_ids = xvalid.id.values.tolist()  # Id's of everything in validation fold

        ytrain = xtrain[TARGET]
        yvalid = xvalid[TARGET]

        xtrain = xtrain[FEATURES]
        xvalid = xvalid[FEATURES]

        scaler = preprocessing.StandardScaler()
#         scaler = preprocessing.MinMaxScaler()
        xtrain = scaler.fit(xtrain).transform(xtrain)
        xvalid = scaler.transform(xvalid)
        xtest = scaler.transform(xtest)

        model = get_model_fn # ()

        model.fit(
            xtrain,
            ytrain,
        )
        if calc_probability:
            preds_valid = model.predict_proba(xvalid)[:, 1]
            test_preds = model.predict_proba(xtest)[:, 1]
        else:
            preds_valid = model.predict(xvalid)
            test_preds = model.predict(xtest)

        preds_valid_class = model.predict(xvalid)
        
        final_test_predictions.append(test_preds)
        final_valid_predictions.update(dict(zip(valid_ids, preds_valid)))

#         fold_score = metrics.accuracy_score(yvalid, preds_valid_class)  # Validation Set Score
        fold_score = metrics.log_loss(yvalid, preds_valid)
#         fold_score = metrics.mean_absolute_error(
#             yvalid, preds_valid
#         )
#         fold_score = metrics.roc_auc_score(yvalid.values, preds_valid)  # Validation Set Score
#         show_classification_scores(yvalid.values, preds_valid_class)

#         fold_score = metrics.roc_auc_score(yvalid, preds_valid)  # Validation Set Score
#         fold_score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
        fold_scores.append(fold_score)
        #         importance_list.append(model.coef_.ravel())

        fi = []
        # Feature importance
#         fi = pd.DataFrame(
#             index=FEATURES,
#             data=model.coef_.ravel(),
#             columns=[f"{fold}_importance"],
#         )
        
        feature_importance_lst.append(fi)

        run_time = time.time() - start_time

        print(f"fold: {fold+1}, Score: {fold_score}, Run Time: {run_time:.2f}")

    return (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    )


def train_xgb_model(
    df:pd.DataFrame,
    test:pd.DataFrame,
    get_model_fn,
    FEATURES:List[str],
    TARGET:str,
    calc_probability:bool,
    rowid:str,
    params,
    n_folds:int=5,
    seed:int=42,
):

    print(params)
    final_test_predictions = []
    final_valid_predictions = {}
    fold_scores = []  # Scores of Validation Set
    feature_importance_lst = []

    test = test[FEATURES].copy()

    for fold in range(n_folds):
        print(10 * "=", f"Fold {fold+1}/{n_folds}", 10 * "=")

        start_time = time.time()

        xtrain = df[df.fold != fold].reset_index(
            drop=True
        )  # Everything not in validation fold
        xvalid = df[df.fold == fold].reset_index(drop=True)
        xtest = test.copy()

        valid_ids = xvalid.id.values.tolist()  # Id's of everything in validation fold

        ytrain = xtrain[TARGET]
        yvalid = xvalid[TARGET]

        xtrain = xtrain[FEATURES]
        xvalid = xvalid[FEATURES]

        model = get_model_fn # (params)

        model.fit(
            xtrain,
            ytrain,
            eval_set=[(xvalid, yvalid)],
            #             eval_metric="acc",  # auc
            verbose=0,
            #             early_stopping_rounds=3000,
            #             callbacks=[
            #                 xgb.log_evaluation(0),
            #                 xgb.early_stopping(500, False, True),
            #             ],
        )

        if calc_probability:
            preds_valid = model.predict_proba(xvalid)[:, 1]
            test_preds = model.predict_proba(xtest)[:, 1]
        else:
            preds_valid = model.predict(xvalid)
            test_preds = model.predict(xtest)

        preds_valid_class = model.predict(xvalid)
        
        final_test_predictions.append(test_preds)
        if Config.debug:
            print(f"GT Type: {type(yvalid.values)}")
            print(f"Preds Type: {type(preds_valid_class)}")
            print(f"         GT:{yvalid.values[:20]}")
            print(f"Preds Class:{preds_valid_class[:20]}")
            print(f"Preds Prob:{preds_valid[:20]}")
        final_valid_predictions.update(dict(zip(valid_ids, preds_valid_class)))

#         fold_score = metrics.cohen_kappa_score(yvalid,  preds_valid_class, weights = "quadratic")
        fold_score = metrics.log_loss(yvalid.values, preds_valid)  # Validation Set Score
#         fold_score = metrics.roc_auc_score(yvalid.values, preds_valid)  # Validation Set Score

#         show_classification_scores(yvalid.values, preds_valid_class)
#         fold_score = metrics.mean_absolute_error(
#             yvalid, preds_valid
#         )  # Validation Set Score
#         fold_score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
        fold_scores.append(fold_score)

        # Feature importance
        fi = pd.DataFrame(
            index=FEATURES,
            data=model.feature_importances_,
            columns=[f"{fold}_importance"],
        )
        feature_importance_lst.append(fi)

        run_time = time.time() - start_time

        print(f"fold: {fold+1}, Score: {fold_score}, Run Time: {run_time:.2f}")

    return (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    )        

In [38]:
def run_linear_model(model_dict, model_name:str, features:List[str], oof:pd.DataFrame) -> (float, float, pd.DataFrame):
    (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    ) = train_cv_model(
        train,
        test,
        model_dict[model_name],
        features,
        TARGET,
        False, #Config.calc_probability,
        ID,
        {},
        Config.N_FOLDS,
        Config.seed,
    )

    cv_score, std_dev = show_fold_scores(fold_scores)

    oof = save_oof_predictions(model_name, final_valid_predictions, oof)
    oof.head()
    save_test_predictions(model_name, final_test_predictions, sample_submission, TARGET)

    return cv_score, std_dev, oof


def run_tree_model(model_dict, model_name:str, features:List[str], params, oof:pd.DataFrame) -> (float, float, pd.DataFrame):
    (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    ) = train_xgb_model(
        train,
        test,
        model_dict[model_name],
        features,
        TARGET,
        Config.calc_probability,
        ID,
        params,
        Config.N_FOLDS,
        Config.seed,
    )

    cv_score, std_dev = show_fold_scores(fold_scores)
    show_tree_model_fi(model, features)

    oof = save_oof_predictions(model_name, final_valid_predictions, oof)
    oof.head()
    save_test_predictions(model_name, final_test_predictions, sample_submission, TARGET)

    return cv_score, std_dev, oof

In [39]:
%%time

def run_models4features(model_dict, model_lst:List[str], target:str, feature_lst:List[str], all_cv_scores:pd.DataFrame, linear_models:bool=True) -> pd.DataFrame:

    oof = train[[ID, target, "fold"]].copy().reset_index(drop=True).copy()
    oof.set_index(ID, inplace=True)

    for idx, m in enumerate(model_lst):
        model = model_lst[idx]
        start_time = time.time()

        print(f"Model={model}")

        params = {}
        if linear_models:
                cv_score, std_dev, oof = run_linear_model(model_dict, model, feature_lst, oof)

        else:
            cv_score, std_dev, oof = run_tree_model(model_dict, model, feature_lst, params, oof)

        run_time = time.time() - start_time

        score_dict = {"Model": model, "Score": cv_score, "StdDev": std_dev, "RunTime": run_time}
        all_cv_scores = all_cv_scores.append(score_dict, ignore_index=True)
        print(f"Model Run Time: {run_time:.2f}")

    return all_cv_scores

CPU times: user 14 µs, sys: 0 ns, total: 14 µs
Wall time: 19.6 µs


In [40]:
lgbm_params = {'n_estimators': Config.N_ESTIMATORS,
                 'objective': 'binary',
                'metric': 'binary_logloss', #'auc',
                 'num_rounds': 404,
                 'learning_rate': 0.19,
                 'num_leaves': 17,
                 'max_depth': 8,
                 'min_data_in_leaf': 36,
                 'lambda_l1': 0.96,
                 'lambda_l2': 0.01,
                 'min_gain_to_split': 11.32,
                 'bagging_fraction': 0.6,
                 'feature_fraction': 0.9}


lgbm_params3 = {
    "n_estimators": Config.N_ESTIMATORS,
      'objective': 'binary',
#     'objective': 'regression',
      'metric': 'binary_logloss', #'auc',
    'max_depth': 9,
    'learning_rate': 0.01,
    'min_data_in_leaf': 36, 
    'num_leaves': 100, 
    'feature_fraction': 0.8, 
    'bagging_fraction': 0.89, 
    'bagging_freq': 5, 
    'lambda_l2': 28,
    
    'seed': Config.seed,

#     'boosting_type': 'gbdt',
#     'device': 'gpu', 
#     'gpu_platform_id': 0,
#     'gpu_device_id': 0,
    'n_jobs': -1,
    'metric': 'rmse',
    'verbose': -1
}
    
lgbm_params = gpu_ify_lgbm(lgbm_params)

In [41]:
xgb_params = {
    "n_estimators": Config.N_ESTIMATORS,  # 10_000,
    "max_depth": 10,  # 10
#     "objective": "reg:squarederror",
       'eval_metric'     : 'logloss', #'auc',
       'objective'       : 'binary:logistic',    
    #     "enable_categorical": True,  # Only works with gpu_hist
    #     "eval_metric": "mae",
    #     "metric": "mae",
    #     "enable_categorical": True,
    "n_jobs": 8,  # 4
    "seed": Config.seed,
    "tree_method": "hist",
    #         "gpu_id": 0,
    "subsample": 0.9,  # 0.7
    "colsample_bytree": 0.7,
    "use_label_encoder": False,
    "learning_rate": 0.05,  # 0.01
}

xgb_params3 = {
    'n_estimators': Config.N_ESTIMATORS,
       'eval_metric'     : 'logloss', #'auc',
       'objective'       : 'binary:logistic',
    'learning_rate': 0.05,
    'max_depth': 10,
    "seed": Config.seed,    
    'subsample': 0.8,
    'colsample_bytree': 0.8,
#     'objective': 'reg:squarederror'
}

xgb_params_logloss = {
    'n_estimators': Config.N_ESTIMATORS,
   'eval_metric'     : 'logloss', #'auc',
   'objective'       : 'binary:logistic',
    "seed": Config.seed,    
    'max_depth': 4,
    'learning_rate': 0.06,
    'colsample_bytree': 0.67,
    'n_jobs': -1,
    'objective': 'binary:logistic',
    'early_stopping_rounds': 150,
    'verbosity': 0,
#     'eval_metric': 'logloss'
}


if Config.gpu:
    xgb_params["tree_method"] = "gpu_hist"
else:
    xgb_params["tree_method"] = "hist"

In [42]:
cb_params = {
    #     "learning_rate": 0.3277295792305584,
    "learning_rate": 0.05,
    "l2_leaf_reg": 3.1572972266001518,
    "bagging_temperature": 0.6799604234141348,
    "random_strength": 1.99590400593318,
    "depth": 10,
    "min_data_in_leaf": 93,
    # "iterations": 100,  # 10000
    "n_estimators": Config.N_ESTIMATORS,  # 10000
    "use_best_model": True,
    #     "task_type": "GPU",
    "random_seed": Config.seed,
}

cb_params = gpu_ify_cb(cb_params)

In [43]:
model_estimator_dict = {
    "xgb2": xgb.XGBRegressor(**xgb_params),
    "xgb_best_params": xgb.XGBRegressor(**best_xgb_params),
    "xgb3": xgb.XGBRegressor(**xgb_params3),
    
    "lgbm1": lgb.LGBMRegressor(**lgbm_params),

    "cat1": cb.CatBoostRegressor(),
    "cat2": cb.CatBoostRegressor(**cb_params),
    "cat_best_params": cb.CatBoostRegressor(**best_cb_params),

    "xgb1": xgb.XGBRegressor(),
    "lgbm0": lgb.LGBMRegressor(),
    "lgbm3": lgb.LGBMRegressor(lgbm_params3),
    "lgbm2": lgb.LGBMRegressor(
        learning_rate=0.05,
        max_depth=15,
        num_leaves=11,
        feature_fraction=0.3,
        subsample=0.1,
        n_jobs=-1,
    ),
    "lgbm3": lgb.LGBMRegressor(**lgbm_params),
    "lgbm_best_params": lgb.LGBMRegressor(**best_lgbm_params),


    "lin_reg": linear_model.LinearRegression(),
    "lasso": linear_model.Lasso(),
    "ridge": linear_model.Ridge(max_iter=7000),
    "ridge_25": linear_model.Ridge(fit_intercept=True, solver='auto', alpha=0.25, max_iter=7000),
    "ridge_50": linear_model.Ridge(fit_intercept=True, solver='auto', alpha=0.5, max_iter=7000),
}

model_estimator_dict = {
    "xgb1": xgb.XGBClassifier(**xgb_params),
    "xgb_best_params": xgb.XGBClassifier(**best_xgb_params),
    "xgb3": xgb.XGBClassifier(**xgb_params3),
    "xgb_params_logloss": xgb.XGBClassifier(**xgb_params_logloss),

    "lgbm1": lgb.LGBMClassifier(**lgbm_params),
    "lgbm_best_params": lgb.LGBMClassifier(**best_lgbm_params),
    "lgbm2": lgb.LGBMClassifier(
        learning_rate=0.05,
        max_depth=15,
        num_leaves=11,
        feature_fraction=0.3,
        subsample=0.1,
        n_jobs=-1,
    ),

    #     "lgbm2": lgb.LGBMClassifier(**lgb_params_best_bsmith),
    #     "lgbm3": lgb.LGBMClassifier(**lgbm_params03),
#     "cat1": cb.CatBoostClassifier(**cb_params),
    "cat1": cb.CatBoostClassifier(),
    "cat2": cb.CatBoostClassifier(**cb_params),
    "cat_best_params": cb.CatBoostClassifier(**best_cb_params),

    #     "cat2": cb.CatBoostClassifier(**cb_params2),
    #     "cat3": cb.CatBoostClassifier(**cb_params3),
}

## Tree Models

In [44]:
%%time

# model_lst = ["xgb3","xgb_best_params", "lgbm_best_params", "cat_best_params", "xgb1", "xgb2", "lgbm1", "lgbm2", "cat1", "cat2"]
model_lst = ["xgb_params_logloss","xgb_best_params", "lgbm_best_params", "cat_best_params","xgb3", "xgb1", "lgbm1", "lgbm2", "cat1", "cat2"]
# model_lst = ["xgb1", "xgb_best_params"]
all_cv_scores = run_models4features(model_estimator_dict, model_lst, TARGET, FEATURES, all_cv_scores, linear_models=False)    

all_cv_scores.sort_values(by=["Score"], ascending=False)

Model=xgb_params_logloss
{}
fold: 1, Score: 0.027825908449409145, Run Time: 60.14
fold: 2, Score: 0.023524611097681482, Run Time: 57.15
fold: 3, Score: 0.02465302442261081, Run Time: 44.60
fold: 4, Score: 0.033821109774929865, Run Time: 60.79
fold: 5, Score: 0.03015612027433193, Run Time: 59.83
fold: 6, Score: 0.027907857871322196, Run Time: 59.66
fold: 7, Score: 0.0353773273739031, Run Time: 56.87
fold: 8, Score: 0.03351301675119297, Run Time: 60.36
fold: 9, Score: 0.03248165170669, Run Time: 51.16
fold: 10, Score: 0.026431469698222503, Run Time: 60.11
fold: 11, Score: 0.029087000718092584, Run Time: 46.42
fold: 12, Score: 0.033534725400321236, Run Time: 60.21
fold: 13, Score: 0.036174859171950124, Run Time: 60.38
fold: 14, Score: 0.029931180453277143, Run Time: 51.81
fold: 15, Score: 0.031079865768824967, Run Time: 60.23
fold: 16, Score: 0.025185399118825058, Run Time: 60.40
fold: 17, Score: 0.03390002073661282, Run Time: 60.34
fold: 18, Score: 0.035996606160497734, Run Time: 50.59
f

Unnamed: 0_level_0,pred_xgb_params_logloss
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 1141.70
Model=xgb_best_params
{}
fold: 1, Score: 0.028522722857826876, Run Time: 24.33
fold: 2, Score: 0.02519895365506145, Run Time: 25.22
fold: 3, Score: 0.02726018603070474, Run Time: 25.37
fold: 4, Score: 0.0351817830788693, Run Time: 25.45
fold: 5, Score: 0.03321048604886033, Run Time: 25.48
fold: 6, Score: 0.029701348160457372, Run Time: 24.35
fold: 7, Score: 0.03785677838383442, Run Time: 25.37
fold: 8, Score: 0.03501344013042066, Run Time: 25.38
fold: 9, Score: 0.03399688488214029, Run Time: 25.28
fold: 10, Score: 0.026824639363017988, Run Time: 24.55
fold: 11, Score: 0.03129382217715321, Run Time: 25.02
fold: 12, Score: 0.03656778947804233, Run Time: 25.44
fold: 13, Score: 0.03934693519212958, Run Time: 25.37
fold: 14, Score: 0.03306841663165155, Run Time: 25.32
fold: 15, Score: 0.033271158212304926, Run Time: 24.44
fold: 16, Score: 0.026908368957446305, Run Time: 25.36
fold: 17, Score: 0.03392872978296958, Run Time: 25.40
fold:

Unnamed: 0_level_0,pred_xgb_best_params
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 502.70
Model=lgbm_best_params
{}
fold: 1, Score: 0.039141912929445646, Run Time: 34.12
fold: 2, Score: 0.034621836812203946, Run Time: 32.68
fold: 3, Score: 0.034908869448247046, Run Time: 32.95
fold: 4, Score: 0.04245974296783997, Run Time: 32.92
fold: 5, Score: 0.04102197797339125, Run Time: 33.15
fold: 6, Score: 0.03990525239753057, Run Time: 33.08
fold: 7, Score: 0.04118791893770301, Run Time: 27.12
fold: 8, Score: 0.04349033841879987, Run Time: 33.69
fold: 9, Score: 0.044175707465326695, Run Time: 33.23
fold: 10, Score: 0.0362985446409957, Run Time: 33.52
fold: 11, Score: 0.03784975114110614, Run Time: 33.38
fold: 12, Score: 0.043998554030036024, Run Time: 33.41
fold: 13, Score: 0.046282724400848664, Run Time: 33.62
fold: 14, Score: 0.04128023554941556, Run Time: 33.30
fold: 15, Score: 0.04331714907254595, Run Time: 33.91
fold: 16, Score: 0.03659649865292226, Run Time: 33.46
fold: 17, Score: 0.045604428997859, Run Time: 33.61
fold: 

Unnamed: 0_level_0,pred_lgbm_best_params
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 662.93
Model=cat_best_params
{}
fold: 1, Score: 0.030959698384354317, Run Time: 10.00
fold: 2, Score: 0.02542965872154459, Run Time: 9.86
fold: 3, Score: 0.02672592180211778, Run Time: 9.92
fold: 4, Score: 0.03588212867511601, Run Time: 9.98
fold: 5, Score: 0.033430833871000744, Run Time: 10.42
fold: 6, Score: 0.030558898856886924, Run Time: 9.87
fold: 7, Score: 0.03671022998178769, Run Time: 9.95
fold: 8, Score: 0.035407576557919485, Run Time: 10.39
fold: 9, Score: 0.0348788785807433, Run Time: 9.75
fold: 10, Score: 0.027960207460245433, Run Time: 9.87
fold: 11, Score: 0.030183061369909434, Run Time: 10.21
fold: 12, Score: 0.0369429998212061, Run Time: 9.85
fold: 13, Score: 0.0398557747157921, Run Time: 9.72
fold: 14, Score: 0.03324048708078285, Run Time: 10.34
fold: 15, Score: 0.03363218173146281, Run Time: 9.87
fold: 16, Score: 0.02699772548745893, Run Time: 9.94
fold: 17, Score: 0.03554596339501986, Run Time: 10.39
fold: 18, Score: 0

Unnamed: 0_level_0,pred_cat_best_params
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 200.53
Model=xgb3
{}
fold: 1, Score: 0.031513960197277924, Run Time: 154.40
fold: 2, Score: 0.025349501751639712, Run Time: 155.07
fold: 3, Score: 0.02703720636919883, Run Time: 154.48
fold: 4, Score: 0.03847118217338511, Run Time: 154.48
fold: 5, Score: 0.0340060722480201, Run Time: 154.37
fold: 6, Score: 0.032754498630487225, Run Time: 152.94
fold: 7, Score: 0.0423065126982033, Run Time: 154.00
fold: 8, Score: 0.03880636587395736, Run Time: 154.05
fold: 9, Score: 0.03732258461938002, Run Time: 154.70
fold: 10, Score: 0.029459085487990935, Run Time: 154.13
fold: 11, Score: 0.03143752776218568, Run Time: 154.00
fold: 12, Score: 0.04048047606269599, Run Time: 154.31
fold: 13, Score: 0.041411567949006745, Run Time: 154.41
fold: 14, Score: 0.032171541422966146, Run Time: 154.54
fold: 15, Score: 0.03583140137705035, Run Time: 154.08
fold: 16, Score: 0.026165418078968623, Run Time: 154.46
fold: 17, Score: 0.03900176418427555, Run Time: 153.65

Unnamed: 0_level_0,pred_xgb3
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 3081.83
Model=xgb1
{}
fold: 1, Score: 0.03143775922447396, Run Time: 14.92
fold: 2, Score: 0.025446552125429126, Run Time: 15.07
fold: 3, Score: 0.02734649106398051, Run Time: 14.94
fold: 4, Score: 0.03967540812066759, Run Time: 14.86
fold: 5, Score: 0.03573418224932937, Run Time: 14.93
fold: 6, Score: 0.03391379722414033, Run Time: 14.80
fold: 7, Score: 0.04320943520717456, Run Time: 14.91
fold: 8, Score: 0.0403520454475879, Run Time: 15.18
fold: 9, Score: 0.036608162060110916, Run Time: 15.00
fold: 10, Score: 0.029788102266686187, Run Time: 14.98
fold: 11, Score: 0.03274836602561104, Run Time: 15.12
fold: 12, Score: 0.041417897194243956, Run Time: 14.93
fold: 13, Score: 0.04218296029331991, Run Time: 14.94
fold: 14, Score: 0.032087105784606544, Run Time: 14.94
fold: 15, Score: 0.03560434662262791, Run Time: 14.92
fold: 16, Score: 0.02714884533009593, Run Time: 15.18
fold: 17, Score: 0.038814331211491214, Run Time: 15.12
fold: 18, Score

Unnamed: 0_level_0,pred_xgb1
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 300.43
Model=lgbm1
{}
fold: 1, Score: 0.029437250295212773, Run Time: 1.47
fold: 2, Score: 0.025578188889183423, Run Time: 1.53
fold: 3, Score: 0.02700735947735329, Run Time: 1.47
fold: 4, Score: 0.03616908417805216, Run Time: 1.46
fold: 5, Score: 0.03176431870137264, Run Time: 1.47
fold: 6, Score: 0.029873813852306662, Run Time: 1.47
fold: 7, Score: 0.03668590020093641, Run Time: 1.47
fold: 8, Score: 0.03604788582232644, Run Time: 1.48
fold: 9, Score: 0.03409192317087498, Run Time: 1.46
fold: 10, Score: 0.027980929961326305, Run Time: 1.52
fold: 11, Score: 0.030123285928263566, Run Time: 1.47
fold: 12, Score: 0.036544464072707644, Run Time: 2.33
fold: 13, Score: 0.038483341141767054, Run Time: 1.48
fold: 14, Score: 0.032447398750152345, Run Time: 1.46
fold: 15, Score: 0.03312419287981429, Run Time: 1.47
fold: 16, Score: 0.02741695185704189, Run Time: 1.46
fold: 17, Score: 0.03746423463180864, Run Time: 1.53
fold: 18, Score: 0.0374136257

Unnamed: 0_level_0,pred_lgbm1
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 30.83
Model=lgbm2
{}
fold: 1, Score: 0.0317121771224417, Run Time: 0.80
fold: 2, Score: 0.02794211803766968, Run Time: 0.80
fold: 3, Score: 0.028556700997545127, Run Time: 0.80
fold: 4, Score: 0.037456512314689085, Run Time: 0.86
fold: 5, Score: 0.03429831829474381, Run Time: 0.80
fold: 6, Score: 0.03271954099167806, Run Time: 0.80
fold: 7, Score: 0.03798507116160362, Run Time: 0.85
fold: 8, Score: 0.03786665787948778, Run Time: 0.81
fold: 9, Score: 0.03634026783945061, Run Time: 0.81
fold: 10, Score: 0.03096196425894194, Run Time: 0.80
fold: 11, Score: 0.031756675087252384, Run Time: 0.81
fold: 12, Score: 0.038053163866395526, Run Time: 0.79
fold: 13, Score: 0.039990173297249944, Run Time: 0.80
fold: 14, Score: 0.03445490868564296, Run Time: 0.80
fold: 15, Score: 0.03626724286506697, Run Time: 0.87
fold: 16, Score: 0.03028038907522509, Run Time: 0.80
fold: 17, Score: 0.03850792270052427, Run Time: 0.81
fold: 18, Score: 0.040424266126683

Unnamed: 0_level_0,pred_lgbm2
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 16.84
Model=cat1
{}
fold: 1, Score: 0.027742173777016675, Run Time: 17.85
fold: 2, Score: 0.023281133735012848, Run Time: 18.08
fold: 3, Score: 0.024898893903100686, Run Time: 17.56
fold: 4, Score: 0.03456589176229821, Run Time: 18.08
fold: 5, Score: 0.03195864376287025, Run Time: 17.82
fold: 6, Score: 0.028493127404201624, Run Time: 18.12
fold: 7, Score: 0.03492396721629019, Run Time: 17.76
fold: 8, Score: 0.03454559006784704, Run Time: 18.09
fold: 9, Score: 0.03219458609983587, Run Time: 18.13
fold: 10, Score: 0.02679978085573292, Run Time: 17.69
fold: 11, Score: 0.028885630670507274, Run Time: 18.10
fold: 12, Score: 0.03448781284033092, Run Time: 17.66
fold: 13, Score: 0.03735939041740206, Run Time: 18.10
fold: 14, Score: 0.02850329821830618, Run Time: 17.74
fold: 15, Score: 0.029864568951485634, Run Time: 18.15
fold: 16, Score: 0.025253071677059213, Run Time: 17.80
fold: 17, Score: 0.033689245938213035, Run Time: 18.22
fold: 18, Scor

Unnamed: 0_level_0,pred_cat1
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 359.34
Model=cat2
{}
fold: 1, Score: 0.028019458676343575, Run Time: 28.57
fold: 2, Score: 0.022906535468748254, Run Time: 28.44
fold: 3, Score: 0.025106360777106318, Run Time: 28.07
fold: 4, Score: 0.035350219650003394, Run Time: 28.46
fold: 5, Score: 0.03184250933799785, Run Time: 28.54
fold: 6, Score: 0.02928888738598948, Run Time: 28.62
fold: 7, Score: 0.035061264922142316, Run Time: 28.37
fold: 8, Score: 0.035136965498251974, Run Time: 28.47
fold: 9, Score: 0.03183031896839953, Run Time: 28.67
fold: 10, Score: 0.026692486279446478, Run Time: 28.46
fold: 11, Score: 0.02926592232738069, Run Time: 28.39
fold: 12, Score: 0.035226356655800206, Run Time: 28.14
fold: 13, Score: 0.03816813311115488, Run Time: 28.27
fold: 14, Score: 0.02927803295676287, Run Time: 28.33
fold: 15, Score: 0.03059345528719015, Run Time: 28.48
fold: 16, Score: 0.025844338852599045, Run Time: 28.42
fold: 17, Score: 0.03460014129636894, Run Time: 28.39
fold: 18, Sc

Unnamed: 0_level_0,pred_cat2
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0


Mean
=== Target Value Counts ===
Model Run Time: 568.51
CPU times: user 3h 16min 29s, sys: 8min 51s, total: 3h 25min 21s
Wall time: 1h 54min 25s


Unnamed: 0,Model,Score,StdDev,RunTime
2,lgbm_best_params,0.04105,0.00359,662.93337
5,xgb1,0.03533,0.00558,300.43333
7,lgbm2,0.03474,0.00368,16.83702
4,xgb3,0.03468,0.00539,3081.83072
3,cat_best_params,0.03294,0.00403,200.53155
6,lgbm1,0.03264,0.00396,30.82605
1,xgb_best_params,0.0325,0.00403,502.70059
9,cat2,0.03114,0.00426,568.50863
8,cat1,0.0308,0.00418,359.34299
0,xgb_params_logloss,0.03057,0.00392,1141.70029


## Linear Models

In [45]:
# model_lst = ["lin_reg", "lasso", "ridge", "ridge_25", "ridge_50"]
# model_lst = ["lasso", "ridge",  "ridge_50"]
model_lst = []
# all_cv_scores = run_models4features(model_lst, TARGET, FEATURES, all_cv_scores, linear_models=True)    
all_cv_scores = run_models4features(model_estimator_dict, model_lst, TARGET, FEATURES, all_cv_scores, linear_models=True)    

all_cv_scores.head()

Unnamed: 0,Model,Score,StdDev,RunTime
0,xgb_params_logloss,0.03057,0.00392,1141.70029
1,xgb_best_params,0.0325,0.00403,502.70059
2,lgbm_best_params,0.04105,0.00359,662.93337
3,cat_best_params,0.03294,0.00403,200.53155
4,xgb3,0.03468,0.00539,3081.83072


In [46]:
sample_submission.head(20)

Unnamed: 0,id,Class,target_xgb_params_logloss,target_xgb_best_params,target_lgbm_best_params,target_cat_best_params,target_xgb3,target_xgb1,target_lgbm1,target_lgbm2,target_cat1,target_cat2
0,117564,0.5,9e-05,2e-05,0.00032,0.00029,6.90523e-06,4.02599e-06,0.0006,0.00127,7e-05,0.0001
1,117565,0.5,0.00126,0.00246,0.00339,0.00197,0.00045471,0.000528287,0.00167,0.00262,0.00154,0.00133
2,117566,0.5,7e-05,2e-05,0.00107,0.00052,3.28126e-05,2.05074e-05,0.00063,0.00148,0.00013,0.0002
3,117567,0.5,0.0863,0.03325,0.05458,0.04101,0.0274453,0.0192685,0.03305,0.03869,0.09322,0.07256
4,117568,0.5,0.00165,0.00093,0.01111,0.00278,0.000246295,0.000220552,0.00395,0.00822,0.0024,0.00252
5,117569,0.5,0.99567,0.99653,0.96273,0.99023,0.999009,0.998968,0.98589,0.98078,0.9953,0.99594
6,117570,0.5,0.0001,1e-05,0.00084,0.00036,1.9514e-05,1.36638e-05,0.00061,0.00138,8e-05,0.00012
7,117571,0.5,0.00675,0.00868,0.00826,0.01478,0.00133968,0.00137148,0.00871,0.00706,0.01375,0.01914
8,117572,0.5,0.00019,9e-05,0.00947,0.00448,2.49897e-05,1.98285e-05,0.00203,0.00448,0.00116,0.00066
9,117573,0.5,0.00048,0.00054,0.00197,0.00059,3.12593e-05,3.3017e-05,0.00078,0.00138,0.00033,0.00032


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Blend Models</h1>
</div>

In [47]:
all_blend_scores = pd.DataFrame(
    {
        "Model": pd.Series(dtype="str"),
        "Score": pd.Series(dtype="float"),
        "StdDev": pd.Series(dtype="float"),
    }
)

In [48]:
def equal_wt_model(model_lst:List[str], fname:str) -> None:
    target_names = [f"target_{model}" for model in model_lst]
# target_names
    sample_submission[TARGET] = sample_submission[target_names].sum(axis=1) / len(model_lst)
    sample_submission[[ID, TARGET]].to_csv(fname, index=False)
    sample_submission[[ID, TARGET]].tail(8)

In [49]:
def wt_avg_model() -> None:
    sample_submission[TARGET] = (
    #     (sample_submission["target_xgb_bp"] * 2 )
    #     + (sample_submission["target_lgbm_bp"]  )
        (sample_submission["target_xgb1"] * 3 )
        + (sample_submission["target_lgbm1"])
    #     + (sample_submission["target_lgbm2"])    
    #     + (sample_submission["target_lgbm2"])
        + (sample_submission["target_cat1"] )
        + (sample_submission["target_cat2"] )    
    #     + (sample_submission["target_cat_bp"] )
    #     + (sample_submission["target_svc"] )
    #     + (sample_submission["target_log_reg3"] )
    #     + (sample_submission["target_cat2"] )
    )/6

    # sample_submission[TARGET] = sample_submission[TARGET].astype(int)
    sample_submission[[ID, TARGET]].to_csv("submission_wt_avg.csv", index=False)
    sample_submission[[ID, TARGET]].tail(8)



In [50]:
if Config.ensemble_models:
    wt_avg_model()
    model_lst = ["xgb1", "cat1", "lgbm1"]
    equal_wt_model(model_lst, "submission_models_wt_avg.csv")

In [51]:
all_cv_scores.sort_values(by=["Score"], ascending=False)

Unnamed: 0,Model,Score,StdDev,RunTime
2,lgbm_best_params,0.04105,0.00359,662.93337
5,xgb1,0.03533,0.00558,300.43333
7,lgbm2,0.03474,0.00368,16.83702
4,xgb3,0.03468,0.00539,3081.83072
3,cat_best_params,0.03294,0.00403,200.53155
6,lgbm1,0.03264,0.00396,30.82605
1,xgb_best_params,0.0325,0.00403,502.70059
9,cat2,0.03114,0.00426,568.50863
8,cat1,0.0308,0.00418,359.34299
0,xgb_params_logloss,0.03057,0.00392,1141.70029


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Level 1 Stack Models</h1>
</div>

In [52]:
## TODO: Generate these dictionaries from model names

train_oof_dict = {
    "train_pred_cat1": "train_pred_cat1.csv",
    "train_pred_cat2": "train_pred_cat2.csv",
    "train_pred_lgbm1": "train_pred_lgbm1.csv",    
    "train_pred_lgbm2": "train_pred_lgbm2.csv",    
    "train_pred_xgb1": "train_pred_xgb1.csv"
}

test_pred_dict = {
    "submission_cat1": "submission_cat1.csv",
    "submission_cat2": "submission_cat2.csv",
    "submission_lgbm1": "submission_lgbm1.csv",
    "submission_lgbm2": "submission_lgbm2.csv",
    "submission_xgb1": "submission_xgb1.csv",
}

In [53]:
def blend_results(train_oof_dict, test_pred_dict):
    oof_df = pd.DataFrame()
    test_preds_df = pd.DataFrame()

    for name, train_oof_fname in train_oof_dict.items():
        fname = "../working/" + train_oof_fname
        print(f"Processing {name}, {train_oof_fname}")
        df = pd.read_csv(fname)
        print(df.head())
#         print(df.iloc[:,1])
        preds = pd.Series(df.iloc[:,1], name=name)
#         print(preds[:5])
        oof_df = pd.concat([oof_df, preds], axis=1)
    #     oof_df = pd.concat([oof_df, pd.Series(np.load(TRAIN_PATH / train_oof), name=name)], axis=1)

    for name, test_pred_fname in test_pred_dict.items():
        fname = "../working/" + test_pred_fname
        print(f"{name}, {test_pred_fname}")
        df = pd.read_csv(fname)
        print(df.head())
        preds = pd.Series(df.iloc[:,1], name=name)
        test_preds_df = pd.concat([test_preds_df, preds], axis=1)

    print("=== oof ===")
    print(oof_df.head())
    print("=== test_preds ===")
    print(test_preds_df.head())
    return oof_df, test_preds_df
    
# (oof_df, preds_df) = blend_results(train_oof_dict, test_pred_dict)    

In [54]:
def load_oof_results(train_oof_dict, test_pred_dict):
    oof_df = pd.DataFrame()
    test_preds_df = pd.DataFrame()

    for name, train_oof_fname in train_oof_dict.items():
        fname = "../working/" + train_oof_fname
        print(f"Processing {name}, {train_oof_fname}")
        df = pd.read_csv(fname)
        print(df.head())
#         print(df.iloc[:,1])
        preds = pd.Series(df.iloc[:,1], name=name)
#         print(preds[:5])
        oof_df = pd.concat([oof_df, preds], axis=1)
    #     oof_df = pd.concat([oof_df, pd.Series(np.load(TRAIN_PATH / train_oof), name=name)], axis=1)

    for name, test_pred_fname in test_pred_dict.items():
        fname = "../working/" + test_pred_fname
        print(f"{name}, {test_pred_fname}")
        df = pd.read_csv(fname)
        print(df.head())
        preds = pd.Series(df.iloc[:,1], name=name)
        test_preds_df = pd.concat([test_preds_df, preds], axis=1)

    print("=== oof ===")
    print(oof_df.head())
    print("=== test_preds ===")
    print(test_preds_df.head())
    return oof_df, test_preds_df
    


In [55]:
if Config.ensemble_models:
    (oof_df, preds_df) = load_oof_results(train_oof_dict, test_pred_dict)
    display(oof_df.head())
    display(preds_df.head())

Processing train_pred_cat1, train_pred_cat1.csv
   id  pred_cat1
0   0          0
1   1          0
2   2          0
3   3          0
4   4          0
Processing train_pred_cat2, train_pred_cat2.csv
   id  pred_cat2
0   0          0
1   1          0
2   2          0
3   3          0
4   4          0
Processing train_pred_lgbm1, train_pred_lgbm1.csv
   id  pred_lgbm1
0   0           0
1   1           0
2   2           0
3   3           0
4   4           0
Processing train_pred_lgbm2, train_pred_lgbm2.csv
   id  pred_lgbm2
0   0           0
1   1           0
2   2           0
3   3           0
4   4           0
Processing train_pred_xgb1, train_pred_xgb1.csv
   id  pred_xgb1
0   0          0
1   1          0
2   2          0
3   3          0
4   4          0
submission_cat1, submission_cat1.csv
       id    Class
0  117564  0.00007
1  117565  0.00154
2  117566  0.00013
3  117567  0.09322
4  117568  0.00240
submission_cat2, submission_cat2.csv
       id    Class
0  117564  0.00010
1  11756

Unnamed: 0,train_pred_cat1,train_pred_cat2,train_pred_lgbm1,train_pred_lgbm2,train_pred_xgb1
0,0,0,0,0,0
1,0,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,0,0,0,0,0


Unnamed: 0,submission_cat1,submission_cat2,submission_lgbm1,submission_lgbm2,submission_xgb1
0,7e-05,0.0001,0.0006,0.00127,4.02599e-06
1,0.00154,0.00133,0.00167,0.00262,0.000528287
2,0.00013,0.0002,0.00063,0.00148,2.05074e-05
3,0.09322,0.07256,0.03305,0.03869,0.0192685
4,0.0024,0.00252,0.00395,0.00822,0.000220551


In [56]:
# type(preds_df)

In [57]:
def run_lr(useful_features:List[str], TARGET:str, train_df:pd.DataFrame, test_df:pd.DataFrame) -> (List[float],List[float]):
    final_predictions = []
    scores = []

    kfold = model_selection.KFold(n_splits=Config.N_FOLDS, shuffle=True, random_state=Config.seed)

    for fold, (train_idx, valid_idx) in enumerate(kfold.split(train_df)):
        xtrain = train_df.iloc[train_idx].reset_index(drop=True)
        xvalid = train_df.iloc[valid_idx].reset_index(drop=True)

        xtest = test_df[useful_features].copy()

        ytrain = xtrain[TARGET]
        yvalid = xvalid[TARGET]

        xtrain = xtrain[useful_features]
        xvalid = xvalid[useful_features]

#         model = LogisticRegression()
        model = linear_model.LinearRegression()
        # Smaller C means more regularization; default=1.0
        # 2947.0517025518097
#         model = LogisticRegression(max_iter=500, C=2947.0517025518097, penalty='l2',solver='newton-cg')
#         model = LogisticRegression(C = 2947.0517025518097,
#                         max_iter = 500,
#                         penalty = 'l2',
#                         solver = 'liblinear')
        model.fit(xtrain, ytrain)

        preds_valid = model.predict_proba(xvalid)[:,-1]
        test_preds = model.predict_proba(xtest)[:,-1]

        final_predictions.append(test_preds)
#         score = metrics.roc_auc_score(yvalid, preds_valid)
        score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
        print(f"Fold={fold}, Score={score}")
        scores.append(score)
    return scores, final_predictions


In [58]:
# useful_features = ["pred_lda", "pred_gbc","pred_gbc2", "pred_cat_bp", "pred_cat1", "pred_lgbm1", "pred_lgbm2", "pred_lgbm_bp", "pred_xgb1", "pred_xgb_bp"]
useful_features = [ "train_pred_cat1", "train_pred_cat2", "train_pred_lgbm1", "train_pred_lgbm2", "train_pred_xgb1"]

In [59]:
# oof_df[useful_features].head()

In [60]:
# preds_df[useful_features].head()

In [61]:
# fold_scores, final_predictions = run_lr(useful_features, TARGET, oof_df, preds_df)
# test_preds = np.mean(np.column_stack(final_predictions), axis=1)
# cv_score, std_dev = show_fold_scores(fold_scores)
# create_submission("level1_lr", TARGET, test_preds)

In [62]:
pd.options.display.max_colwidth = 100
pd.set_option("display.max_rows", 999)
pd.set_option("display.precision", 5)
pd.options.display.float_format = '{:.5f}'.format
pd.options.display.max_colwidth

100

In [63]:
all_cv_scores.sort_values(by=["Score"], ascending=False)

Unnamed: 0,Model,Score,StdDev,RunTime
2,lgbm_best_params,0.04105,0.00359,662.93337
5,xgb1,0.03533,0.00558,300.43333
7,lgbm2,0.03474,0.00368,16.83702
4,xgb3,0.03468,0.00539,3081.83072
3,cat_best_params,0.03294,0.00403,200.53155
6,lgbm1,0.03264,0.00396,30.82605
1,xgb_best_params,0.0325,0.00403,502.70059
9,cat2,0.03114,0.00426,568.50863
8,cat1,0.0308,0.00418,359.34299
0,xgb_params_logloss,0.03057,0.00392,1141.70029
