<a href="https://www.kaggle.com/code/mmellinger66/s3e9-concrete-strength-models?scriptVersionId=121599062" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

 <div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Playground Season 3: Episode 9 - Concrete Strength Models</h1>
</div>

## Problem Type

Regression

## Evaluation Metric

$$RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^N (y_i - \hat{y_i})^2}$$

```python
score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
```

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Import Libraries</h1>
</div>

In [1]:
from typing import List, Set, Dict, Tuple, Optional

import os
import time
from pathlib import Path
import glob
import gc

import pandas as pd
import numpy as np

from sklearn import impute
from sklearn import metrics
from sklearn import preprocessing
from sklearn import linear_model
from sklearn import svm
from sklearn import cluster
from sklearn import model_selection
from sklearn import ensemble
from sklearn import datasets

import xgboost as xgb
import catboost as cb
import lightgbm as lgb

import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances

from scipy.special import boxcox1p
from scipy.stats import boxcox_normmax

# Visualization Libraries
import matplotlib as mpl
import matplotlib.pylab as plt
import seaborn as sns
import missingno as msno
from folium import Map
from folium.plugins import HeatMap
from IPython.display import display_html, display_markdown, display_latex
from colorama import Fore, Style

import warnings
warnings.filterwarnings('ignore')

pd.set_option("display.max_rows", 999)
pd.set_option("display.precision", 5)

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Configuration</h1>
</div>

In [2]:
TARGET="Strength"
ID="id"

# Optuna
objective_direction = "minimize"  # minimize, maximize

In [3]:
class Config:
    path:str = "../input/playground-series-s3e9/"
    load_original_data:bool = True # Some Competitions use synthetic data, based on real data
    original_data_path:str = "../input/predict-concrete-strength/ConcreteStrengthData.csv"
    gpu:bool = False
    optimize:bool = True
    n_optuna_trials:int = 50 # 5, 10, 30
    fast_render:bool = False
    calc_probability:bool = False
    debug:bool = False
    seed:int = 42
    N_ESTIMATORS:int = 100  # 100, 300, 1000, 2000, 5000, 15_000, 20_000 GBDT
    GPU_N_ESTIMATORS:int = 2000 # Want models to run fast during dev
    N_FOLDS:int = 5
        

In [4]:
class clr:
    S = Style.BRIGHT + Fore.LIGHTRED_EX
    E = Style.RESET_ALL

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Library</h1>
</div>

In [5]:
def read_data(path: str, analyze:bool=True) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
    data_dir = Path(path)

    train = pd.read_csv(data_dir / "train.csv")
    test = pd.read_csv(data_dir / "test.csv")
    submission_df = pd.read_csv(data_dir / "sample_submission.csv")

    if analyze:
        print(clr.S + "=== Shape of Data ==="+clr.E)
        print(f" train data: Rows={train.shape[0]}, Columns={train.shape[1]}")
        print(f" test data : Rows={test.shape[0]}, Columns={test.shape[1]}")

        print(clr.S + "\n=== Train Data: First 5 Rows ===\n"+clr.E)
        display(train.head())
        print(f"\n{clr.S}=== Train Column Names ==={clr.E}\n")
        display(train.columns)
        print(f"\n{clr.S}=== Features/Explanatory Variables ==={clr.E}\n")
        eval_features(train)
        print(f"\n{clr.S}=== Skewness ==={clr.E}\n")
        check_skew(train)
    return train, test, submission_df

def create_submission(model_name: str, target, preds, seed:int=42, nfolds:int=5) -> pd.DataFrame:
    sample_submission[target] = preds #.astype(int)

    if len(model_name) > 0:
        fname = f"submission_{model_name}_k{nfolds}_s{seed}.csv"
    else:
        fname = "submission.csv"

    sample_submission.to_csv(fname, index=False)

    return sample_submission

def show_classification_scores(ground_truth:List[int], yhat:List[int]) -> None:
    accuracy = metrics.accuracy_score(ground_truth, yhat)
    precision = metrics.precision_score(ground_truth, yhat)
    recall = metrics.recall_score(ground_truth, yhat)
    roc = metrics.roc_auc_score(ground_truth, yhat)
    f1 = metrics.f1_score(ground_truth, yhat)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"ROC: {roc:.4f}")
    print(f"f1: {f1:.4f}")
    

def label_encoder(train:pd.DataFrame, test:pd.DataFrame, columns:List[str]) -> (pd.DataFrame, pd.DataFrame) :
    for col in columns:
        train[col] = train[col].astype(str)
        test[col] = test[col].astype(str)
        train[col] = preprocessing.LabelEncoder().fit_transform(train[col])
        test[col] = preprocessing.LabelEncoder().fit_transform(test[col])
    return train, test   

def create_strat_folds(df:pd.DataFrame, TARGET, n_folds:int=5, seed:int=42) -> pd.DataFrame:
    print(f"TARGET={TARGET}, n_folds={n_folds}, seed={seed}")
    df["fold"] = -1

    kf = model_selection.StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
    # kf = GroupKFold(n_splits=Config.N_FOLDS)
    for fold, (train_idx, valid_idx) in enumerate(kf.split(df, df[TARGET])):
        df.loc[valid_idx, "fold"] = fold

    # df.to_csv(f"train_fold{num_folds}.csv", index=False)
    return df


def create_folds(df:pd.DataFrame, n_folds:int=5, seed:int=42) -> pd.DataFrame:
    print(f"n_folds={n_folds}, seed={seed}")
    df["fold"] = -1

    kf = model_selection.KFold(n_splits=n_folds, shuffle=True, random_state=seed)

    for fold, (train_idx, valid_idx) in enumerate(kf.split(df)):
        df.loc[valid_idx, "fold"] = fold

    # df.to_csv(f"train_fold{num_folds}.csv", index=False)
    return df

def show_fold_scores(scores: List[float]) -> (float, float):
    cv_score = np.mean(scores)  # Used in filename
    std_dev = np.std(scores)
    print(
        f"Scores -> Adjusted: {np.mean(scores) - np.std(scores):.8f} , mean: {np.mean(scores):.8f}, std: {np.std(scores):.8f}"
    )
    return cv_score, std_dev


def feature_distribution_types(df:pd.DataFrame, display:bool=True) -> (List[str], List[str]):
    continuous_features = list(df.select_dtypes(include=['int64', 'float64', 'uint8']).columns)
    categorical_features = list(df.select_dtypes(include=['object', 'bool']).columns)
    if display:
        print(f"{clr.S}Continuous Features={continuous_features}{clr.E}\n")
        print(f"{clr.S}Categorical Features={categorical_features}{clr.E}")
    return continuous_features, categorical_features   

def show_cardinality(df:pd.DataFrame, features:List[str]) -> None:
    print("=== Cardinality ===")
    print(df[features].nunique())

## === Model Support ===    

from scipy.stats import mode


def merge_test_predictions(final_test_predictions:List[float], calc_probability:bool=True) -> List[float]:

    if calc_probability:
        print("Mean")
        result = np.mean(np.column_stack(final_test_predictions), axis=1)
    else:
        print("Mode")
        mode_result = mode(np.column_stack(final_test_predictions), axis=1)
        result = mode_result[0].ravel()

    return result

def summary_statistics(X:pd.DataFrame, enhanced=True) -> None:
    desc = X.describe()
    if enhanced:
        desc.loc["var"] = X.var(numeric_only=True).tolist()
        desc.loc["skew"] = X.skew(numeric_only=True).tolist()
        desc.loc["kurt"] = X.kurtosis(numeric_only=True).tolist()

    with pd.option_context("display.precision", 2):
        style = desc.transpose().style.background_gradient(
            cmap="coolwarm"
        )  # .set_precision(4)
    display(style)
    
def show_missing_features(df:pd.DataFrame) -> None:
    missing_vals = df.isna().sum().sort_values(ascending=False)
    print(missing_vals[missing_vals > 0])


def show_duplicate_records(df:pd.DataFrame) -> None:
    dups = df.duplicated()
    print(dups.sum())


def eval_features(df:pd.DataFrame) -> (List[str], List[str], List[str]):
    ## Separate Categorical and Numerical Features
    categorical_features = list(
        df.select_dtypes(include=["category", "object"]).columns
    )
    continuous_features = list(df.select_dtypes(include=["number"]).columns)

    print(f"{clr.S}Continuous features:{clr.E} {continuous_features}")
    print(f"{clr.S}Categorical features:{clr.E} {categorical_features}")
    print("\n --- Cardinality of Categorical Features ---\n")

    for feature in categorical_features:
        cardinality = df[feature].nunique()
        if cardinality < 10:
            print(f"{clr.S}{feature}{clr.E}: cardinality={cardinality}, {df[feature].unique()}")
        else:
            print(f"{clr.S}{feature}{clr.E}: cardinality={cardinality}")
    all_features = categorical_features + continuous_features
    return all_features, categorical_features, continuous_features


def show_feature_importance(feature_importance_lst:List[str]) -> None:
    fis_df = pd.concat(feature_importance_lst, axis=1)

    fis_df.sort_values("0_importance", ascending=True).head(40).plot(
        kind="barh", figsize=(12, 12), title="Feature Importance Across Folds"
    )
    plt.show()


def show_feature_target_crosstab(df:pd.DataFrame, feature_lst:List[str], target:str) -> None:
    for feature in feature_lst:
        print(f"\n=== {feature} vs {target} ===\n")
        display(
            pd.crosstab(df[feature], df[target], margins=True)
        )  # display keeps bold formatting


def show_cardinality(df:pd.DataFrame, features:List[str]) -> None:
    print(f"{clr.S}=== Cardinality ==={clr.E}")
    print(df[features].nunique())


def show_unique_features(df:pd.DataFrame, features:List[str]) -> None:
    for col in features:
        print(col, sorted(df[col].dropna().unique()))


def feature_distribution_types(df:pd.DataFrame, display:bool=True) -> (List[str], List[str]):
    continuous_features = list(
        df.select_dtypes(include=["int64", "float64", "uint8"]).columns
    )
    categorical_features = list(df.select_dtypes(include=["object", "bool"]).columns)
    if display:
        print(f"{clr.S}Continuous Features={clr.E}{continuous_features}\n")
        print(f"{clr.S}Categorical Features={clr.E}{categorical_features}")
    return continuous_features, categorical_features


def describe(X:pd.DataFrame) -> None:
    """Deprecated: Use summary_statistics()"""
    desc = X.describe()
    desc.loc['var'] = X.var(numeric_only=True).tolist()
    desc.loc['skew'] = X.skew(numeric_only=True).tolist()
    desc.loc['kurt'] = X.kurtosis(numeric_only=True).tolist()

    with pd.option_context('display.precision', 2):
        style = desc.transpose().style.background_gradient(cmap='coolwarm') #.set_precision(4)
    display(style)
  

def check_skew(df:pd.DataFrame) -> None:
    skew = df.skew(skipna=True,numeric_only=True).sort_values(ascending=False)
    print(skew)
    
def gpu_ify_lgbm(lgbm_dict):
    if Config.gpu:
        lgbm_dict["device"] = "gpu"
        lgbm_dict["boosting_type"] = "gbdt"
        lgbm_dict["gpu_platform_id"] = 0
        lgbm_dict["gpu_device_id"] = 0
    return lgbm_dict

def gpu_ify_cb(params):
    if Config.gpu:
        params["task_type"] = "GPU"
    return params    


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Optuna Hyperparameter Optimization Library</h1>
</div>

In [6]:
def objective_xgb(trial, X_train, X_valid, y_train, y_valid):

    xgb_params = {
        #         "objective": trial.suggest_categorical("objective", ["multi:softmax"]),
        #         "eval_metric": "mlogloss",
        #         "objective": "multi:softmax",
#         "objective": trial.suggest_categorical("objective", ["mae", "rmse"]),

        "eval_metric": "rmse",  # auc, rmse, mae
        "objective": "reg:squarederror", # Normal Distribution
#         "objective": "reg:gamma", # Gamma Distribution

        #         "enable_categorical": trial.suggest_categorical("use_label_encoder", [True]),
        "use_label_encoder": trial.suggest_categorical("use_label_encoder", [False]),
        "n_estimators": trial.suggest_int("n_estimators", 1000, 5000, 100),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "max_depth": trial.suggest_int("max_depth", 1, 20),  # 10
        "gamma": trial.suggest_float("gamma", 0, 100, step=0.1),
        "booster": trial.suggest_categorical("booster", ["gbtree"]),
        "tree_method": trial.suggest_categorical(
            "tree_method", ["hist"]
        ),  # hist, gpu_hist
#         "predictor": "gpu_predictor",
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 100),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        "min_child_weight": trial.suggest_loguniform("min_child_weight", 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }

    # Model loading and training
    model = xgb.XGBRegressor(**xgb_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        early_stopping_rounds=5000,
        verbose=0,
    )

    print(f"Number of boosting rounds: {model.best_iteration}")
    #     oof = model.predict_proba(X_valid)[:, 1] # Probability
    oof = model.predict(X_valid)  # Classification: 0,1

    return metrics.mean_squared_error(y_valid, oof, squared=False)


def objective_lgbm(trial, X_train, X_valid, y_train, y_valid):

    lgbm_params = {
        "objective": trial.suggest_categorical("objective", ["mae", "rmse"]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [1_000]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [5000]),
        "n_estimators": trial.suggest_int("n_estimators", 700, 5000),
        "importance_type": "gain",
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "num_leaves": trial.suggest_int("num_leaves", 2, 1000),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.1, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.1, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 0, 15),
        "min_child_samples": trial.suggest_int("min_child_samples", 1, 300),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "max_depth": trial.suggest_int("max_depth", 1, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        #         'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }

    # Model loading and training
    model = lgb.LGBMRegressor(**lgbm_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        eval_metric="mae",
        callbacks=[
            lgb.log_evaluation(500),
            lgb.early_stopping(500, False, True),
        ],
    )

    #     print(f"Number of boosting rounds: {model.best_iteration}")
    oof = model.predict(X_valid)

    return metrics.mean_squared_error(y_valid, oof, squared=False)
#     return metrics.mean_absolute_error(y_valid, oof)


def objective_clf_lgbm(trial, X_train, X_valid, y_train, y_valid):

    params = {
        "boosting_type": "gbdt",
        # "objective": trial.suggest_categorical("objective", ["mae", "rmse"]),
        #         "objective": trial.suggest_categorical("objective", ["multi:softprob"]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [1_000]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [5000]),
        "n_estimators": trial.suggest_int("n_estimators", 700, 1000),
        "importance_type": "gain",
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "num_leaves": trial.suggest_int("num_leaves", 2, 1000),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.1, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.1, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 0, 15),
        "min_child_samples": trial.suggest_int("min_child_samples", 1, 300),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "max_depth": trial.suggest_int("max_depth", 1, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        #         'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }
    if Config.gpu:
        params["device_type"] = "gpu"

    # Model loading and training
    model = lgb.LGBMClassifier(**params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        # eval_metric="mae",
        callbacks=[
            lgb.log_evaluation(500),
            lgb.early_stopping(500, False, True),
        ],
    )

    #     print(f"Number of boosting rounds: {model.best_iteration}")
    oof = model.predict(X_valid)

    #     return accuracy_score(y_valid, oof)
    return metrics.roc_auc_score(y_valid, oof)


def objective_cb(trial, X_train, X_valid, y_train, y_valid):

    cb_params = {
        "iterations": 100,
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.1, 1.0),
        "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1, 100),
        "bagging_temperature": trial.suggest_loguniform(
            "bagging_temperature", 0.1, 20.0
        ),
        "random_strength": trial.suggest_float("random_strength", 1.0, 2.0),
        "depth": trial.suggest_int("depth", 1, 10),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 300),
          "use_best_model": True,
#         "task_type": "GPU",
        "random_seed": 42,
    }

    # Model loading and training
    #  model = CatBoostClassifier(**cb_params)
    model = cb.CatBoostRegressor(**cb_params)

    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        # eval_metric="accuracy",
        early_stopping_rounds=500,
        verbose=False,
    )

#     print(f"Number of boosting rounds: {model.best_iteration}")
    # oof = model.predict_proba(X_valid)[:, 1]
    oof = model.predict(X_valid)  # Classification
    return metrics.mean_squared_error(y_valid, oof, squared=False)
#     return metrics.mean_absolute_error(y_valid, oof)
# 
#     return accuracy_score(y_valid, oof)

def objective_clf_cb(trial, X_train, X_valid, y_train, y_valid):

    cb_params = {
        "iterations": 10,  # 1000
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.1, 1.0),
        "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1, 100),
        "bagging_temperature": trial.suggest_loguniform(
            "bagging_temperature", 0.1, 20.0
        ),
        "random_strength": trial.suggest_float("random_strength", 1.0, 2.0),
        "depth": trial.suggest_int("depth", 1, 10),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 300),
        "use_best_model": True,
#             "task_type": "GPU",
        "random_seed": 42,
    }

    # Model loading and training
    model = cb.CatBoostClassifier(**cb_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        # eval_metric="accuracy",
        early_stopping_rounds=500,
        verbose=False,
    )

    # print(f"Number of boosting rounds: {model.best_iteration}")
    # oof = model.predict_proba(X_valid)[:, 1]
    oof = model.predict(X_valid)  # Classification

    return metrics.accuracy_score(y_valid, oof)

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Load Train/Test Data and Analyze</h1>
</div>

## Load the following files

 - train.csv - Data used to build our machine learning model
 - test.csv - Data used to build our machine learning model. Does not contain the target variable
 - sample_submission.csv - A file in the proper format to submit test predictions

In [7]:
%%time
train, test, sample_submission = read_data(Config.path, analyze=True)                                

[1m[91m=== Shape of Data ===[0m
 train data: Rows=5407, Columns=10
 test data : Rows=3605, Columns=9
[1m[91m
=== Train Data: First 5 Rows ===
[0m


Unnamed: 0,id,CementComponent,BlastFurnaceSlag,FlyAshComponent,WaterComponent,SuperplasticizerComponent,CoarseAggregateComponent,FineAggregateComponent,AgeInDays,Strength
0,0,525.0,0.0,0.0,186.0,0.0,1125.0,613.0,3,10.38
1,1,143.0,169.0,143.0,191.0,8.0,967.0,643.0,28,23.52
2,2,289.0,134.7,0.0,185.7,0.0,1075.0,795.3,28,36.96
3,3,304.0,76.0,0.0,228.0,0.0,932.0,670.0,365,39.05
4,4,157.0,236.0,0.0,192.0,0.0,935.4,781.2,90,74.19



[1m[91m=== Train Column Names ===[0m



Index(['id', 'CementComponent', 'BlastFurnaceSlag', 'FlyAshComponent',
       'WaterComponent', 'SuperplasticizerComponent',
       'CoarseAggregateComponent', 'FineAggregateComponent', 'AgeInDays',
       'Strength'],
      dtype='object')


[1m[91m=== Features/Explanatory Variables ===[0m

[1m[91mContinuous features:[0m ['id', 'CementComponent', 'BlastFurnaceSlag', 'FlyAshComponent', 'WaterComponent', 'SuperplasticizerComponent', 'CoarseAggregateComponent', 'FineAggregateComponent', 'AgeInDays', 'Strength']
[1m[91mCategorical features:[0m []

 --- Cardinality of Categorical Features ---


[1m[91m=== Skewness ===[0m

AgeInDays                    2.74687
SuperplasticizerComponent    1.41169
FlyAshComponent              1.30469
BlastFurnaceSlag             1.12120
Strength                     0.38073
CementComponent              0.34128
id                           0.00000
CoarseAggregateComponent    -0.08145
WaterComponent              -0.21528
FineAggregateComponent      -0.44738
dtype: float64
CPU times: user 57.6 ms, sys: 11.2 ms, total: 68.8 ms
Wall time: 134 ms


In [8]:
train.head()

Unnamed: 0,id,CementComponent,BlastFurnaceSlag,FlyAshComponent,WaterComponent,SuperplasticizerComponent,CoarseAggregateComponent,FineAggregateComponent,AgeInDays,Strength
0,0,525.0,0.0,0.0,186.0,0.0,1125.0,613.0,3,10.38
1,1,143.0,169.0,143.0,191.0,8.0,967.0,643.0,28,23.52
2,2,289.0,134.7,0.0,185.7,0.0,1075.0,795.3,28,36.96
3,3,304.0,76.0,0.0,228.0,0.0,932.0,670.0,365,39.05
4,4,157.0,236.0,0.0,192.0,0.0,935.4,781.2,90,74.19


In [9]:
def load_original_data(path:str) -> pd.DataFrame:
#     original = pd.read_csv(path, index_col=[0])
    original = pd.read_csv(path)

    original = original.reset_index()
    original['id'] = original['index'] + 100000
    original = original.drop(columns = ['index'])
    original = original.rename(columns = {'CementComponent ':'CementComponent'})
    original.set_index('id', inplace=True)
#     original = original[-original.depth.isna()]
    print(f"Shape={original.shape}")
    return original
#     original.head()

if Config.load_original_data:    
    original = load_original_data(Config.original_data_path)
    display(original.head())

Shape=(1030, 9)


Unnamed: 0_level_0,CementComponent,BlastFurnaceSlag,FlyAshComponent,WaterComponent,SuperplasticizerComponent,CoarseAggregateComponent,FineAggregateComponent,AgeInDays,Strength
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
100000,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
100001,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
100002,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
100003,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
100004,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [10]:
if Config.load_original_data:
    train['is_original']    = 0
    test['is_original']     = 0
    original['is_original'] = 1
#     combined = pd.concat([train, original], ignore_index=True) #.drop_duplicates()
    combined = pd.concat([train, original])

    train = combined
#     combined.head()
    print(f"Shape={combined.shape}")


Shape=(6437, 11)


In [11]:
summary_statistics(train.drop(columns=[ID], axis=1), enhanced=True)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max,var,skew,kurt
CementComponent,6437.0,296.29,105.57,102.0,212.5,295.8,374.0,540.0,11145.39,0.37,-0.55
BlastFurnaceSlag,6437.0,61.06,84.06,0.0,0.0,0.0,129.9,359.4,7066.49,1.06,-0.11
FlyAshComponent,6437.0,35.44,56.8,0.0,0.0,0.0,94.0,200.1,3226.28,1.16,-0.28
WaterComponent,6437.0,184.51,19.04,121.8,173.0,186.0,192.0,247.0,362.59,-0.17,0.67
SuperplasticizerComponent,6437.0,4.44,5.79,0.0,0.0,0.0,8.7,32.2,33.51,1.3,1.92
CoarseAggregateComponent,6437.0,988.95,77.56,801.0,938.0,975.6,1047.0,1145.0,6014.85,-0.08,-0.56
FineAggregateComponent,6437.0,771.6,78.96,594.0,734.3,781.0,821.0,992.6,6234.31,-0.41,-0.02
AgeInDays,6437.0,50.78,68.99,1.0,7.0,28.0,56.0,365.0,4759.62,2.82,8.76
Strength,6437.0,35.51,16.45,2.33,23.69,33.96,45.85,82.6,270.61,0.39,-0.35
is_original,6437.0,0.16,0.37,0.0,0.0,0.0,0.0,1.0,0.13,1.86,1.44


## Outlier Detection

In [12]:
# https://www.kaggle.com/code/lyasdemir/best-algorithm-for-prediction-xgboost
    
def iqr(data:pd.DataFrame, var:str):# outliers detecion .
    q1 = np.quantile(data[var], 0.25)
    q3 = np.quantile(data[var], 0.75)
    diff = q3 - q1
    lower_t = q1 - (1.5 * diff)
    upper_t = q3 + (1.5 * diff)
    return data[(data[var] < lower_t) | (data[var] > upper_t)]

# iqr(train, "squareMeters")

In [13]:
# # https://www.kaggle.com/code/sujithmandala/playground-s3-e8-ensemble-model-98-accuracy

# def detect_outliers(data:pd.DataFrame) -> pd.DataFrame:
#     outlier_percents = {}
#     for column in data.columns:
#         if data[column].dtype != object:
#             q1 = np.quantile(data[column], 0.25)
#             q3 = np.quantile(data[column], 0.75)
#             iqr = q3 - q1
#             upper_bound = q3 + (1.5 * iqr)
#             lower_bound = q1 - (1.5 * iqr)
#             outliers = data[(data[column] > upper_bound) | (data[column] < lower_bound)][column]
#             outlier_percentage = len(outliers) / len(data[column]) * 100
#             outlier_percents[column] = outlier_percentage
#             outlier_dataframe = pd.DataFrame(data = outlier_percents.values() ,index=outlier_percents.keys() ,columns=['Outlier_percentage'])
    
#     return outlier_dataframe.sort_values(by = 'Outlier_percentage', ascending = False)

# detect_outliers(train)


In [14]:
# https://www.kaggle.com/code/sujithmandala/playground-s3-e8-ensemble-model-98-accuracy
    
def detect_outliers(data:pd.DataFrame) -> pd.DataFrame:
    outlier_percents = {}
    for column in data.columns:
        if data[column].dtype != object:
            q1 = np.quantile(data[column], 0.25)
            q3 = np.quantile(data[column], 0.75)
            iqr = q3 - q1
            upper_bound = q3 + (1.5 * iqr)
            lower_bound = q1 - (1.5 * iqr)
            outliers = data[(data[column] > upper_bound) | (data[column] < lower_bound)][column]
            outlier_percentage = len(outliers) / len(data[column]) * 100
            outlier_percents[column] = outlier_percentage
            outlier_dataframe = pd.DataFrame(data = outlier_percents.values() ,index=outlier_percents.keys() ,columns=['Outlier_percentage'])
    
    return outlier_dataframe.sort_values(by = 'Outlier_percentage', ascending = False)

detect_outliers(test)

Unnamed: 0,Outlier_percentage
FineAggregateComponent,8.54369
WaterComponent,8.2663
AgeInDays,7.93343
SuperplasticizerComponent,1.47018
BlastFurnaceSlag,0.41609
id,0.0
CementComponent,0.0
FlyAshComponent,0.0
CoarseAggregateComponent,0.0
is_original,0.0


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Feature Engineering</h1>
</div>

## Categorical/Numerical Variables

## Handle Outliers
- https://www.kaggle.com/code/lyasdemir/best-algorithm-for-prediction-xgboost
- https://www.kaggle.com/code/mnokno/paris-housing-price-prediction-using-xgboost

In [15]:
# features_with_outliers = []

In [16]:
# https://www.kaggle.com/code/mnokno/paris-housing-price-prediction-using-xgboost

def remove_outliers(df:pd.DataFrame) -> pd.DataFrame:
    for c in features_with_outliers:
        if c == 'garage':
            first_percentile = df[c].quantile(0.001)
            df = df[df[c] > first_percentile]

        ninety_ninth_percentile = df[c].quantile(0.999)
        df = df[df[c] < ninety_ninth_percentile]
        #df_t = df_t[(df_t[c] > first_percentile) & (df_t[c] < ninety_ninth_percentile)]
    return df


In [17]:
# print(f'Before: {len(train)}')
# train = remove_outliers(train)
# print(f'After: {len(train)}')

In [18]:
train.head(10)

Unnamed: 0,id,CementComponent,BlastFurnaceSlag,FlyAshComponent,WaterComponent,SuperplasticizerComponent,CoarseAggregateComponent,FineAggregateComponent,AgeInDays,Strength,is_original
0,0.0,525.0,0.0,0.0,186.0,0.0,1125.0,613.0,3,10.38,0
1,1.0,143.0,169.0,143.0,191.0,8.0,967.0,643.0,28,23.52,0
2,2.0,289.0,134.7,0.0,185.7,0.0,1075.0,795.3,28,36.96,0
3,3.0,304.0,76.0,0.0,228.0,0.0,932.0,670.0,365,39.05,0
4,4.0,157.0,236.0,0.0,192.0,0.0,935.4,781.2,90,74.19,0
5,5.0,350.0,0.0,0.0,203.0,0.0,1055.0,775.0,7,37.43,0
6,6.0,135.7,203.5,0.0,185.7,0.0,1076.2,759.3,28,35.1,0
7,7.0,332.5,142.5,0.0,228.0,0.0,932.0,594.0,28,45.94,0
8,8.0,322.0,0.0,0.0,203.0,0.0,974.0,800.0,180,42.14,0
9,9.0,133.0,200.0,0.0,192.0,0.0,927.4,839.2,3,6.94,0


In [19]:
train = train.reset_index(drop=True).copy()
train.head(10)

Unnamed: 0,id,CementComponent,BlastFurnaceSlag,FlyAshComponent,WaterComponent,SuperplasticizerComponent,CoarseAggregateComponent,FineAggregateComponent,AgeInDays,Strength,is_original
0,0.0,525.0,0.0,0.0,186.0,0.0,1125.0,613.0,3,10.38,0
1,1.0,143.0,169.0,143.0,191.0,8.0,967.0,643.0,28,23.52,0
2,2.0,289.0,134.7,0.0,185.7,0.0,1075.0,795.3,28,36.96,0
3,3.0,304.0,76.0,0.0,228.0,0.0,932.0,670.0,365,39.05,0
4,4.0,157.0,236.0,0.0,192.0,0.0,935.4,781.2,90,74.19,0
5,5.0,350.0,0.0,0.0,203.0,0.0,1055.0,775.0,7,37.43,0
6,6.0,135.7,203.5,0.0,185.7,0.0,1076.2,759.3,28,35.1,0
7,7.0,332.5,142.5,0.0,228.0,0.0,932.0,594.0,28,45.94,0
8,8.0,322.0,0.0,0.0,203.0,0.0,974.0,800.0,180,42.14,0
9,9.0,133.0,200.0,0.0,192.0,0.0,927.4,839.2,3,6.94,0


In [20]:
excluded_features = [TARGET, ID, "fold"]

In [21]:
cont_features, cat_features = feature_distribution_types(train, display=True)
show_cardinality(train, cat_features)

cont_features = [feature for feature in cont_features if feature not in excluded_features]
cat_features = [feature for feature in cat_features if feature not in excluded_features]

FEATURES = cont_features + cat_features
FEATURES

[1m[91mContinuous Features=[0m['id', 'CementComponent', 'BlastFurnaceSlag', 'FlyAshComponent', 'WaterComponent', 'SuperplasticizerComponent', 'CoarseAggregateComponent', 'FineAggregateComponent', 'AgeInDays', 'Strength', 'is_original']

[1m[91mCategorical Features=[0m[]
[1m[91m=== Cardinality ===[0m
Series([], dtype: float64)


['CementComponent',
 'BlastFurnaceSlag',
 'FlyAshComponent',
 'WaterComponent',
 'SuperplasticizerComponent',
 'CoarseAggregateComponent',
 'FineAggregateComponent',
 'AgeInDays',
 'is_original']

In [22]:
train, test = label_encoder(train, test, cat_features)
# train = pd.get_dummies(train,columns=[]) # Will remove original feature names
# test = pd.get_dummies(test,columns=[])

In [23]:
train.head()

Unnamed: 0,id,CementComponent,BlastFurnaceSlag,FlyAshComponent,WaterComponent,SuperplasticizerComponent,CoarseAggregateComponent,FineAggregateComponent,AgeInDays,Strength,is_original
0,0.0,525.0,0.0,0.0,186.0,0.0,1125.0,613.0,3,10.38,0
1,1.0,143.0,169.0,143.0,191.0,8.0,967.0,643.0,28,23.52,0
2,2.0,289.0,134.7,0.0,185.7,0.0,1075.0,795.3,28,36.96,0
3,3.0,304.0,76.0,0.0,228.0,0.0,932.0,670.0,365,39.05,0
4,4.0,157.0,236.0,0.0,192.0,0.0,935.4,781.2,90,74.19,0


In [24]:
# cont_features, cat_features = feature_distribution_types(train, display=True)
# show_cardinality(train, cat_features)

# cont_features = [feature for feature in cont_features if feature not in excluded_features]
# cat_features = [feature for feature in cat_features if feature not in excluded_features]

# FEATURES = cont_features + cat_features
# FEATURES

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Optuna Hyperparameter Optimization</h1>
</div>

In [25]:
%%time

if Config.optimize:
    y = train[TARGET]
    X = train[FEATURES].copy()

    X_test = test[FEATURES].copy()
    X_train, X_valid, y_train, y_valid = model_selection.train_test_split(
        X, y, test_size=0.2, random_state=Config.seed
    )

# === XGB ===

time_limit = 3600 * 3
best_xgb_params = {}

if Config.optimize:
    study = optuna.create_study(direction=objective_direction)
    study.optimize(
        lambda trial: objective_xgb(trial, X_train, X_valid, y_train, y_valid),
        n_trials=Config.n_optuna_trials,
        # timeout=time_limit,  # this or n_trials
    )

if Config.optimize:
    print("Number of finished trials:", len(study.trials))
    print("Best XGB trial parameters:", study.best_trial.params)
    print("Best score:", study.best_value)
    best_xgb_params = study.best_trial.params

## === LGBM ===

time_limit = 3600 * 3
best_lgbm_params = {}

if Config.optimize:
    study = optuna.create_study(direction=objective_direction)
    study.optimize(
        lambda trial: objective_lgbm(trial, X_train, X_valid, y_train, y_valid),
        n_trials=Config.n_optuna_trials,
        # timeout=time_limit,  # this or n_trials
    )

if Config.optimize:
    print("Number of finished trials:", len(study.trials))
    print("Best LGBM trial parameters:", study.best_trial.params)
    print("Best score:", study.best_value)
    best_lgbm_params = study.best_trial.params

## === CatBoost

time_limit = 3600 * 3
# best_cb_params = {}
best_cb_params = {'learning_rate': 0.45743264601999495,
                  'l2_leaf_reg': 41.338946049390074,
                  'bagging_temperature': 0.3472567739474319,
                  'random_strength': 1.7332249677756242, 
                  'depth': 1,
                  'min_data_in_leaf': 6}

if Config.optimize:
    study = optuna.create_study(direction=objective_direction)
    study.optimize(
        lambda trial: objective_cb(trial, X_train, X_valid, y_train, y_valid),
        n_trials=Config.n_optuna_trials,
        # timeout=time_limit,  # this or n_trials
    )

if Config.optimize:
    print("Number of finished trials:", len(study.trials))
    print("Best Cat trial parameters:", study.best_trial.params)
    print("Best score:", study.best_value)
    best_cb_params = study.best_trial.params

[32m[I 2023-03-09 19:51:15,533][0m A new study created in memory with name: no-name-487972a3-b7d1-4ade-a57e-d9d2b21c3253[0m
[32m[I 2023-03-09 19:51:35,773][0m Trial 0 finished with value: 11.312930530480275 and parameters: {'use_label_encoder': False, 'n_estimators': 3600, 'learning_rate': 0.02322090263802853, 'subsample': 1.0, 'colsample_bytree': 0.6100000000000001, 'max_depth': 13, 'gamma': 81.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.0473670863694933, 'reg_alpha': 8.287324504899107e-05, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 103.05755507570501}. Best is trial 0 with value: 11.312930530480275.[0m


Number of boosting rounds: 258


[32m[I 2023-03-09 19:52:02,284][0m Trial 1 finished with value: 11.465825316556202 and parameters: {'use_label_encoder': False, 'n_estimators': 3600, 'learning_rate': 0.06824487002041292, 'subsample': 0.96, 'colsample_bytree': 0.9800000000000001, 'max_depth': 10, 'gamma': 6.1000000000000005, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 5.558639532803284, 'reg_alpha': 3.443455860260427e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.18756246006548222}. Best is trial 0 with value: 11.312930530480275.[0m


Number of boosting rounds: 58


[32m[I 2023-03-09 19:52:19,626][0m Trial 2 finished with value: 11.618240255195426 and parameters: {'use_label_encoder': False, 'n_estimators': 2900, 'learning_rate': 0.08370161548672903, 'subsample': 0.95, 'colsample_bytree': 0.79, 'max_depth': 9, 'gamma': 89.7, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 5.439695724133465e-07, 'reg_alpha': 0.02260453824819086, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.15337786072851037}. Best is trial 0 with value: 11.312930530480275.[0m


Number of boosting rounds: 58


[32m[I 2023-03-09 19:52:43,726][0m Trial 3 finished with value: 11.502295007494947 and parameters: {'use_label_encoder': False, 'n_estimators': 3000, 'learning_rate': 0.021964194157633603, 'subsample': 0.1, 'colsample_bytree': 0.5800000000000001, 'max_depth': 20, 'gamma': 41.900000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 3.660822329239946e-08, 'reg_alpha': 2.7548871165617865, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.13102469338047956}. Best is trial 0 with value: 11.312930530480275.[0m


Number of boosting rounds: 222


[32m[I 2023-03-09 19:53:17,906][0m Trial 4 finished with value: 11.390221424550992 and parameters: {'use_label_encoder': False, 'n_estimators': 5000, 'learning_rate': 0.027781227306597524, 'subsample': 0.31, 'colsample_bytree': 0.5, 'max_depth': 19, 'gamma': 88.7, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 3.92427512648757e-08, 'reg_alpha': 2.7848379061056576e-05, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 12.197030861856616}. Best is trial 0 with value: 11.312930530480275.[0m


Number of boosting rounds: 247


[32m[I 2023-03-09 19:53:43,485][0m Trial 5 finished with value: 11.282650727939512 and parameters: {'use_label_encoder': False, 'n_estimators': 4200, 'learning_rate': 0.08506613234630953, 'subsample': 0.8099999999999999, 'colsample_bytree': 0.7500000000000001, 'max_depth': 19, 'gamma': 94.60000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 35.31724720211912, 'reg_alpha': 0.00019699662385845896, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 1.3093649150752114}. Best is trial 5 with value: 11.282650727939512.[0m


Number of boosting rounds: 97


[32m[I 2023-03-09 19:54:04,570][0m Trial 6 finished with value: 11.661409638889493 and parameters: {'use_label_encoder': False, 'n_estimators': 2700, 'learning_rate': 0.044310835795947336, 'subsample': 0.19, 'colsample_bytree': 0.46, 'max_depth': 14, 'gamma': 61.900000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.43549230649781856, 'reg_alpha': 0.18338507058110762, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.20165184828491106}. Best is trial 5 with value: 11.282650727939512.[0m


Number of boosting rounds: 103


[32m[I 2023-03-09 19:54:25,793][0m Trial 7 finished with value: 11.3614372762533 and parameters: {'use_label_encoder': False, 'n_estimators': 3400, 'learning_rate': 0.2093167690926945, 'subsample': 0.16, 'colsample_bytree': 0.24, 'max_depth': 14, 'gamma': 49.400000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 9.791390671793781e-08, 'reg_alpha': 0.00014036913764214125, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 93.58367252586952}. Best is trial 5 with value: 11.282650727939512.[0m


Number of boosting rounds: 127


[32m[I 2023-03-09 19:54:58,434][0m Trial 8 finished with value: 11.412200626880187 and parameters: {'use_label_encoder': False, 'n_estimators': 4500, 'learning_rate': 0.09129580360939704, 'subsample': 0.83, 'colsample_bytree': 0.8600000000000001, 'max_depth': 11, 'gamma': 16.400000000000002, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 12.786922100568322, 'reg_alpha': 6.260532514447549, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.1233808384741274}. Best is trial 5 with value: 11.282650727939512.[0m


Number of boosting rounds: 49


[32m[I 2023-03-09 19:55:18,750][0m Trial 9 finished with value: 11.22179113122263 and parameters: {'use_label_encoder': False, 'n_estimators': 3400, 'learning_rate': 0.10769128759974604, 'subsample': 0.8099999999999999, 'colsample_bytree': 0.6000000000000001, 'max_depth': 4, 'gamma': 35.300000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.023937931129632613, 'reg_alpha': 25.06275013085232, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 3.2449962783993533}. Best is trial 9 with value: 11.22179113122263.[0m


Number of boosting rounds: 76


[32m[I 2023-03-09 19:55:28,211][0m Trial 10 finished with value: 11.627692008069245 and parameters: {'use_label_encoder': False, 'n_estimators': 1500, 'learning_rate': 0.24537522861903704, 'subsample': 0.55, 'colsample_bytree': 0.07, 'max_depth': 1, 'gamma': 26.700000000000003, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.0004021345936305189, 'reg_alpha': 90.81536242128827, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 658.5599984898058}. Best is trial 9 with value: 11.22179113122263.[0m


Number of boosting rounds: 1099


[32m[I 2023-03-09 19:55:52,727][0m Trial 11 finished with value: 11.248560911282809 and parameters: {'use_label_encoder': False, 'n_estimators': 4200, 'learning_rate': 0.12511332034148423, 'subsample': 0.69, 'colsample_bytree': 0.7100000000000001, 'max_depth': 3, 'gamma': 65.10000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 58.94653883990743, 'reg_alpha': 0.0059166457033313835, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 1.607270676599624}. Best is trial 9 with value: 11.22179113122263.[0m


Number of boosting rounds: 159


[32m[I 2023-03-09 19:56:05,143][0m Trial 12 finished with value: 11.313457186611085 and parameters: {'use_label_encoder': False, 'n_estimators': 2000, 'learning_rate': 0.16351523738607443, 'subsample': 0.62, 'colsample_bytree': 0.37, 'max_depth': 2, 'gamma': 67.60000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.009627645505563777, 'reg_alpha': 0.015886698164849337, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 2.0517098723877947}. Best is trial 9 with value: 11.22179113122263.[0m


Number of boosting rounds: 150


[32m[I 2023-03-09 19:56:32,810][0m Trial 13 finished with value: 11.243551208356124 and parameters: {'use_label_encoder': False, 'n_estimators': 4200, 'learning_rate': 0.011394536720225883, 'subsample': 0.71, 'colsample_bytree': 0.68, 'max_depth': 5, 'gamma': 34.4, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 75.07414870728441, 'reg_alpha': 45.7613715654831, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 2.348106099249147}. Best is trial 9 with value: 11.22179113122263.[0m


Number of boosting rounds: 1217


[32m[I 2023-03-09 19:56:45,966][0m Trial 14 finished with value: 11.25660889212017 and parameters: {'use_label_encoder': False, 'n_estimators': 2100, 'learning_rate': 0.011569936283709645, 'subsample': 0.39, 'colsample_bytree': 0.33999999999999997, 'max_depth': 5, 'gamma': 34.800000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.8316184113084412, 'reg_alpha': 45.67216399616164, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 6.6587114126030045}. Best is trial 9 with value: 11.22179113122263.[0m


Number of boosting rounds: 1438


[32m[I 2023-03-09 19:57:18,434][0m Trial 15 finished with value: 11.265563251165556 and parameters: {'use_label_encoder': False, 'n_estimators': 4900, 'learning_rate': 0.011434422911917111, 'subsample': 0.75, 'colsample_bytree': 0.64, 'max_depth': 6, 'gamma': 23.200000000000003, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.00035665479720623823, 'reg_alpha': 0.9901196204145595, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 5.512991554922114}. Best is trial 9 with value: 11.22179113122263.[0m


Number of boosting rounds: 533


[32m[I 2023-03-09 19:57:46,919][0m Trial 16 finished with value: 11.305054354293727 and parameters: {'use_label_encoder': False, 'n_estimators': 4000, 'learning_rate': 0.04678433791536468, 'subsample': 0.47, 'colsample_bytree': 0.89, 'max_depth': 6, 'gamma': 3.9000000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.444910807921543, 'reg_alpha': 16.681426391127708, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.88097790393219}. Best is trial 9 with value: 11.22179113122263.[0m


Number of boosting rounds: 131


[32m[I 2023-03-09 19:58:01,895][0m Trial 17 finished with value: 11.211234109183378 and parameters: {'use_label_encoder': False, 'n_estimators': 2500, 'learning_rate': 0.14788553390032833, 'subsample': 0.86, 'colsample_bytree': 0.67, 'max_depth': 4, 'gamma': 36.300000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 96.65719422764421, 'reg_alpha': 0.5051006920821137, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 16.181738840470945}. Best is trial 17 with value: 11.211234109183378.[0m


Number of boosting rounds: 108


[32m[I 2023-03-09 19:58:17,034][0m Trial 18 finished with value: 11.396851492086538 and parameters: {'use_label_encoder': False, 'n_estimators': 2400, 'learning_rate': 0.13014886273686666, 'subsample': 0.85, 'colsample_bytree': 0.43, 'max_depth': 8, 'gamma': 50.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.0278422465445791, 'reg_alpha': 0.4193905053806985, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 19.557666807190806}. Best is trial 17 with value: 11.211234109183378.[0m


Number of boosting rounds: 58


[32m[I 2023-03-09 19:58:22,859][0m Trial 19 finished with value: 11.2835793691599 and parameters: {'use_label_encoder': False, 'n_estimators': 1000, 'learning_rate': 0.15720590149971092, 'subsample': 0.88, 'colsample_bytree': 0.55, 'max_depth': 3, 'gamma': 15.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.400331346986528e-05, 'reg_alpha': 0.1735623522759394, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 31.69301588734086}. Best is trial 17 with value: 11.211234109183378.[0m


Number of boosting rounds: 108


[32m[I 2023-03-09 19:58:42,859][0m Trial 20 finished with value: 11.915270266269856 and parameters: {'use_label_encoder': False, 'n_estimators': 3200, 'learning_rate': 0.19089065499776428, 'subsample': 0.61, 'colsample_bytree': 0.22000000000000003, 'max_depth': 7, 'gamma': 50.400000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.089880470893052, 'reg_alpha': 4.074612541340549, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.5938489709097429}. Best is trial 17 with value: 11.211234109183378.[0m


Number of boosting rounds: 98


[32m[I 2023-03-09 19:59:05,662][0m Trial 21 finished with value: 11.242781751989675 and parameters: {'use_label_encoder': False, 'n_estimators': 3800, 'learning_rate': 0.10801048444216141, 'subsample': 0.72, 'colsample_bytree': 0.68, 'max_depth': 4, 'gamma': 34.4, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 96.90758628833117, 'reg_alpha': 71.55868373037052, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 4.180771959532363}. Best is trial 17 with value: 11.211234109183378.[0m


Number of boosting rounds: 174


[32m[I 2023-03-09 19:59:21,209][0m Trial 22 finished with value: 11.253781098290693 and parameters: {'use_label_encoder': False, 'n_estimators': 2500, 'learning_rate': 0.11678693506259562, 'subsample': 0.78, 'colsample_bytree': 0.8300000000000001, 'max_depth': 3, 'gamma': 32.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 3.4677861241263344, 'reg_alpha': 7.935819203337722, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 3.7535791797467026}. Best is trial 17 with value: 11.211234109183378.[0m


Number of boosting rounds: 146


[32m[I 2023-03-09 19:59:43,229][0m Trial 23 finished with value: 11.514877890034628 and parameters: {'use_label_encoder': False, 'n_estimators': 3800, 'learning_rate': 0.11299752210667574, 'subsample': 0.67, 'colsample_bytree': 0.67, 'max_depth': 1, 'gamma': 42.6, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.42434052542693906, 'reg_alpha': 85.23191222994473, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 8.325417491108443}. Best is trial 17 with value: 11.211234109183378.[0m


Number of boosting rounds: 807


[32m[I 2023-03-09 20:00:02,522][0m Trial 24 finished with value: 11.272387705909305 and parameters: {'use_label_encoder': False, 'n_estimators': 3300, 'learning_rate': 0.15657636551559503, 'subsample': 0.91, 'colsample_bytree': 0.55, 'max_depth': 4, 'gamma': 18.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 6.766617104614282, 'reg_alpha': 1.5803444637636495, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 4.660216266389894}. Best is trial 17 with value: 11.211234109183378.[0m


Number of boosting rounds: 80


[32m[I 2023-03-09 20:00:16,471][0m Trial 25 finished with value: 11.204513152445472 and parameters: {'use_label_encoder': False, 'n_estimators': 1900, 'learning_rate': 0.06314646270798292, 'subsample': 0.75, 'colsample_bytree': 0.9700000000000001, 'max_depth': 7, 'gamma': 41.900000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 89.95600546716807, 'reg_alpha': 20.45526813988082, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 13.773917364952407}. Best is trial 25 with value: 11.204513152445472.[0m


Number of boosting rounds: 158


[32m[I 2023-03-09 20:00:26,175][0m Trial 26 finished with value: 11.26054589694062 and parameters: {'use_label_encoder': False, 'n_estimators': 1600, 'learning_rate': 0.061967485454529334, 'subsample': 0.89, 'colsample_bytree': 0.9800000000000001, 'max_depth': 8, 'gamma': 57.1, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 13.537179908798748, 'reg_alpha': 11.16478535681499, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 17.722008448514316}. Best is trial 25 with value: 11.204513152445472.[0m


Number of boosting rounds: 105


[32m[I 2023-03-09 20:00:39,855][0m Trial 27 finished with value: 11.436375399814121 and parameters: {'use_label_encoder': False, 'n_estimators': 2000, 'learning_rate': 0.06385477863241677, 'subsample': 0.79, 'colsample_bytree': 0.8700000000000001, 'max_depth': 11, 'gamma': 43.1, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.003024112180317573, 'reg_alpha': 0.483147126499212, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 38.550766880162456}. Best is trial 25 with value: 11.204513152445472.[0m


Number of boosting rounds: 68


[32m[I 2023-03-09 20:00:50,321][0m Trial 28 finished with value: 11.312767239208792 and parameters: {'use_label_encoder': False, 'n_estimators': 1400, 'learning_rate': 0.09295934849367228, 'subsample': 0.51, 'colsample_bytree': 0.92, 'max_depth': 7, 'gamma': 71.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.19169140155888492, 'reg_alpha': 12.842024632819655, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 7.854745207746735}. Best is trial 25 with value: 11.204513152445472.[0m


Number of boosting rounds: 49


[32m[I 2023-03-09 20:01:03,000][0m Trial 29 finished with value: 11.466063701726092 and parameters: {'use_label_encoder': False, 'n_estimators': 2300, 'learning_rate': 0.13781890475089378, 'subsample': 0.99, 'colsample_bytree': 0.76, 'max_depth': 16, 'gamma': 73.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.1996983088128412, 'reg_alpha': 1.4494702632586272, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 12.935848999143335}. Best is trial 25 with value: 11.204513152445472.[0m


Number of boosting rounds: 28


[32m[I 2023-03-09 20:01:14,652][0m Trial 30 finished with value: 11.258537960979785 and parameters: {'use_label_encoder': False, 'n_estimators': 1700, 'learning_rate': 0.18788784439568473, 'subsample': 0.64, 'colsample_bytree': 0.38, 'max_depth': 6, 'gamma': 28.6, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.048503045592175185, 'reg_alpha': 0.14048502118390813, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 54.81582088651546}. Best is trial 25 with value: 11.204513152445472.[0m


Number of boosting rounds: 48


[32m[I 2023-03-09 20:01:31,005][0m Trial 31 finished with value: 11.215588382469805 and parameters: {'use_label_encoder': False, 'n_estimators': 2700, 'learning_rate': 0.10318789815531568, 'subsample': 0.76, 'colsample_bytree': 0.6200000000000001, 'max_depth': 4, 'gamma': 39.1, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 79.29519547794676, 'reg_alpha': 30.463043405181363, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 4.0611853014983375}. Best is trial 25 with value: 11.204513152445472.[0m


Number of boosting rounds: 240


[32m[I 2023-03-09 20:01:48,631][0m Trial 32 finished with value: 11.22782614499433 and parameters: {'use_label_encoder': False, 'n_estimators': 2700, 'learning_rate': 0.07456086778672817, 'subsample': 0.75, 'colsample_bytree': 0.6100000000000001, 'max_depth': 4, 'gamma': 40.900000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 18.498136715840452, 'reg_alpha': 19.007711001269016, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 3.119436460837263}. Best is trial 25 with value: 11.204513152445472.[0m


Number of boosting rounds: 146


[32m[I 2023-03-09 20:02:05,637][0m Trial 33 finished with value: 11.358134820223425 and parameters: {'use_label_encoder': False, 'n_estimators': 3000, 'learning_rate': 0.10136191994190073, 'subsample': 0.94, 'colsample_bytree': 0.5, 'max_depth': 9, 'gamma': 55.900000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 3.602285048529729, 'reg_alpha': 7.376061890779295, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 10.709635912425341}. Best is trial 25 with value: 11.204513152445472.[0m


Number of boosting rounds: 61


[32m[I 2023-03-09 20:02:28,550][0m Trial 34 finished with value: 11.194118775498694 and parameters: {'use_label_encoder': False, 'n_estimators': 3500, 'learning_rate': 0.07680931874146442, 'subsample': 0.85, 'colsample_bytree': 0.9900000000000001, 'max_depth': 5, 'gamma': 38.2, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 15.779565957054032, 'reg_alpha': 2.416814572558282, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 20.662488932605324}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 133


[32m[I 2023-03-09 20:02:40,761][0m Trial 35 finished with value: 11.305683501131528 and parameters: {'use_label_encoder': False, 'n_estimators': 2200, 'learning_rate': 0.07183951158333306, 'subsample': 1.0, 'colsample_bytree': 0.9600000000000001, 'max_depth': 10, 'gamma': 44.6, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 22.960945328514317, 'reg_alpha': 2.971054667026793, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 22.408086304281788}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 71


[32m[I 2023-03-09 20:02:57,338][0m Trial 36 finished with value: 11.23714654674021 and parameters: {'use_label_encoder': False, 'n_estimators': 2700, 'learning_rate': 0.05813204967716555, 'subsample': 0.86, 'colsample_bytree': 1.0, 'max_depth': 2, 'gamma': 57.800000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 80.56509696896153, 'reg_alpha': 0.0465521200471334, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 13.382442115102124}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 696


[32m[I 2023-03-09 20:03:09,202][0m Trial 37 finished with value: 11.259207975550666 and parameters: {'use_label_encoder': False, 'n_estimators': 1800, 'learning_rate': 0.0858278433601648, 'subsample': 0.94, 'colsample_bytree': 0.9400000000000001, 'max_depth': 7, 'gamma': 10.700000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 7.625690690967052, 'reg_alpha': 0.9523824390742519, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 24.98735617869322}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 66


[32m[I 2023-03-09 20:03:28,825][0m Trial 38 finished with value: 11.215826743642292 and parameters: {'use_label_encoder': False, 'n_estimators': 3000, 'learning_rate': 0.05330342947239267, 'subsample': 0.58, 'colsample_bytree': 0.81, 'max_depth': 5, 'gamma': 23.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 26.25575835913292, 'reg_alpha': 2.6741626423075435, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 49.27679818768203}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 243


[32m[I 2023-03-09 20:03:43,983][0m Trial 39 finished with value: 11.394400223523954 and parameters: {'use_label_encoder': False, 'n_estimators': 2500, 'learning_rate': 0.0765826400454705, 'subsample': 0.75, 'colsample_bytree': 0.78, 'max_depth': 9, 'gamma': 80.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 1.3196232481660068, 'reg_alpha': 0.0008432234935077532, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 6.987515987181013}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 59


[32m[I 2023-03-09 20:03:53,370][0m Trial 40 finished with value: 11.315498805154078 and parameters: {'use_label_encoder': False, 'n_estimators': 1200, 'learning_rate': 0.0387086623201806, 'subsample': 0.83, 'colsample_bytree': 0.91, 'max_depth': 12, 'gamma': 47.400000000000006, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 6.3964423311992995, 'reg_alpha': 0.4072177184858868, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 78.11440283478511}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 183


[32m[I 2023-03-09 20:04:12,214][0m Trial 41 finished with value: 11.209592240242738 and parameters: {'use_label_encoder': False, 'n_estimators': 3000, 'learning_rate': 0.0560473785175036, 'subsample': 0.56, 'colsample_bytree': 0.8400000000000001, 'max_depth': 5, 'gamma': 23.1, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 31.76064288529191, 'reg_alpha': 2.436193522779646, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 43.175713225118955}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 272


[32m[I 2023-03-09 20:04:28,737][0m Trial 42 finished with value: 11.30179559899142 and parameters: {'use_label_encoder': False, 'n_estimators': 2700, 'learning_rate': 0.06862905940590948, 'subsample': 0.32, 'colsample_bytree': 0.7300000000000001, 'max_depth': 2, 'gamma': 39.5, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 30.989669152264597, 'reg_alpha': 4.914052965444518, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 32.71746837734446}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 860


[32m[I 2023-03-09 20:04:50,250][0m Trial 43 finished with value: 11.23700675523713 and parameters: {'use_label_encoder': False, 'n_estimators': 3500, 'learning_rate': 0.08344305639160093, 'subsample': 0.66, 'colsample_bytree': 0.8600000000000001, 'max_depth': 5, 'gamma': 27.6, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 81.32384928730889, 'reg_alpha': 22.185015906408992, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 161.6788573306607}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 182


[32m[I 2023-03-09 20:05:12,042][0m Trial 44 finished with value: 11.270083457276826 and parameters: {'use_label_encoder': False, 'n_estimators': 3200, 'learning_rate': 0.05340607968304246, 'subsample': 0.45000000000000007, 'colsample_bytree': 0.8200000000000001, 'max_depth': 6, 'gamma': 21.0, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 26.51940689197435, 'reg_alpha': 2.2741229557214737, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 15.036010394243402}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 135


[32m[I 2023-03-09 20:05:30,016][0m Trial 45 finished with value: 11.24251745376718 and parameters: {'use_label_encoder': False, 'n_estimators': 2900, 'learning_rate': 0.09780792472344171, 'subsample': 0.91, 'colsample_bytree': 0.9600000000000001, 'max_depth': 8, 'gamma': 38.300000000000004, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 13.003242209763496, 'reg_alpha': 28.247001064413553, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 11.47342036878831}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 51


[32m[I 2023-03-09 20:05:41,405][0m Trial 46 finished with value: 11.19967513952528 and parameters: {'use_label_encoder': False, 'n_estimators': 1900, 'learning_rate': 0.037562806809015116, 'subsample': 0.8, 'colsample_bytree': 0.91, 'max_depth': 3, 'gamma': 12.200000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 2.8872452146561898, 'reg_alpha': 0.07541531083868869, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 24.05379338813589}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 509


[32m[I 2023-03-09 20:05:49,294][0m Trial 47 finished with value: 11.217702341303218 and parameters: {'use_label_encoder': False, 'n_estimators': 1300, 'learning_rate': 0.0420031821272122, 'subsample': 0.83, 'colsample_bytree': 1.0, 'max_depth': 3, 'gamma': 8.4, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 4.869968640962771, 'reg_alpha': 0.07457006117475885, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 18.566684074431187}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 476


[32m[I 2023-03-09 20:06:01,420][0m Trial 48 finished with value: 11.287814965005376 and parameters: {'use_label_encoder': False, 'n_estimators': 1900, 'learning_rate': 0.03710641154522785, 'subsample': 0.71, 'colsample_bytree': 0.91, 'max_depth': 2, 'gamma': 0.7000000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 0.7495922959659163, 'reg_alpha': 0.011264581833025842, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 26.916693284167145}. Best is trial 34 with value: 11.194118775498694.[0m


Number of boosting rounds: 1420


[32m[I 2023-03-09 20:06:14,203][0m Trial 49 finished with value: 11.524146860539838 and parameters: {'use_label_encoder': False, 'n_estimators': 2200, 'learning_rate': 0.03228458178915636, 'subsample': 0.55, 'colsample_bytree': 0.8500000000000001, 'max_depth': 1, 'gamma': 14.200000000000001, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 33.156735575195725, 'reg_alpha': 0.03235590495470784, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 64.5463269443938}. Best is trial 34 with value: 11.194118775498694.[0m
[32m[I 2023-03-09 20:06:14,223][0m A new study created in memory with name: no-name-053f14ef-bd03-44ce-8b0e-ef3fdbf3a304[0m


Number of boosting rounds: 2132
Number of finished trials: 50
Best XGB trial parameters: {'use_label_encoder': False, 'n_estimators': 3500, 'learning_rate': 0.07680931874146442, 'subsample': 0.85, 'colsample_bytree': 0.9900000000000001, 'max_depth': 5, 'gamma': 38.2, 'booster': 'gbtree', 'tree_method': 'hist', 'reg_lambda': 15.779565957054032, 'reg_alpha': 2.416814572558282, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 20.662488932605324}
Best score: 11.194118775498694
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 8.53765	valid_1's l1: 8.77165


[32m[I 2023-03-09 20:06:15,565][0m Trial 0 finished with value: 11.661212145504088 and parameters: {'objective': 'mae', 'n_estimators': 4467, 'reg_alpha': 0.0006788185396814787, 'reg_lambda': 2.001250682145989e-06, 'colsample_bytree': 0.5800000000000001, 'num_leaves': 989, 'feature_fraction': 0.11303069863074139, 'bagging_fraction': 0.46036590520487186, 'bagging_freq': 5, 'min_child_samples': 255, 'subsample': 0.75, 'learning_rate': 0.19311113260822838, 'max_depth': 83, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 11.661212145504088.[0m


Early stopping, best iteration is:
[289]	training's l1: 8.6183	valid_1's l1: 8.7123
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 8.45681	valid_1's l1: 8.7168


[32m[I 2023-03-09 20:06:17,557][0m Trial 1 finished with value: 11.608537994462834 and parameters: {'objective': 'mae', 'n_estimators': 3776, 'reg_alpha': 5.4865901722607773e-05, 'reg_lambda': 0.00019537074629929528, 'colsample_bytree': 0.08, 'num_leaves': 186, 'feature_fraction': 0.2748966921333451, 'bagging_fraction': 0.46374645073359033, 'bagging_freq': 11, 'min_child_samples': 278, 'subsample': 0.73, 'learning_rate': 0.11473720696120562, 'max_depth': 85, 'random_state': 42, 'n_jobs': 4}. Best is trial 1 with value: 11.608537994462834.[0m


[1000]	training's l1: 8.30474	valid_1's l1: 8.74164
Early stopping, best iteration is:
[527]	training's l1: 8.45293	valid_1's l1: 8.69184
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 9.32603	valid_1's l1: 9.17548
[1000]	training's l1: 9.2388	valid_1's l1: 9.13818
[1500]	training's l1: 9.21362	valid_1's l1: 9.13849
[2000]	training's l1: 9.16269	valid_1's l1: 9.08533
[2500]	training's l1: 9.13295	valid_1's l1: 9.05453
[3000]	training's l1: 9.10824	valid_1's l1: 9.0517
Did not meet early stopping. Best iteration is:
[3059]	training's l1: 9.09561	valid_1's l1: 9.06646


[32m[I 2023-03-09 20:06:20,833][0m Trial 2 finished with value: 11.900654368786444 and parameters: {'objective': 'mae', 'n_estimators': 3104, 'reg_alpha': 0.10091369567443381, 'reg_lambda': 5.641960687559561e-06, 'colsample_bytree': 0.7100000000000001, 'num_leaves': 591, 'feature_fraction': 0.6375794882860933, 'bagging_fraction': 0.13667988528930056, 'bagging_freq': 8, 'min_child_samples': 228, 'subsample': 0.45000000000000007, 'learning_rate': 0.03633015798391573, 'max_depth': 16, 'random_state': 42, 'n_jobs': 4}. Best is trial 1 with value: 11.608537994462834.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.04457	valid_1's l1: 8.55462
Early stopping, best iteration is:
[478]	training's l1: 7.07956	valid_1's l1: 8.55123


[32m[I 2023-03-09 20:06:37,591][0m Trial 3 finished with value: 11.529128666959359 and parameters: {'objective': 'mae', 'n_estimators': 3894, 'reg_alpha': 9.101636217525843e-06, 'reg_lambda': 1.1967906353078668e-08, 'colsample_bytree': 1.0, 'num_leaves': 863, 'feature_fraction': 0.973141087031942, 'bagging_fraction': 0.93565318565853, 'bagging_freq': 13, 'min_child_samples': 37, 'subsample': 0.32, 'learning_rate': 0.010562990335991871, 'max_depth': 38, 'random_state': 42, 'n_jobs': 4}. Best is trial 3 with value: 11.529128666959359.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 6.82702	valid_1's l1: 8.69277


[32m[I 2023-03-09 20:06:45,220][0m Trial 4 finished with value: 11.588992587125386 and parameters: {'objective': 'mae', 'n_estimators': 1855, 'reg_alpha': 6.353033968230168, 'reg_lambda': 0.011495039621378867, 'colsample_bytree': 0.4, 'num_leaves': 672, 'feature_fraction': 0.6260980216622857, 'bagging_fraction': 0.6228516568379231, 'bagging_freq': 6, 'min_child_samples': 10, 'subsample': 0.33999999999999997, 'learning_rate': 0.027551573789092496, 'max_depth': 12, 'random_state': 42, 'n_jobs': 4}. Best is trial 3 with value: 11.529128666959359.[0m


Early stopping, best iteration is:
[187]	training's l1: 7.5349	valid_1's l1: 8.66323
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.2255	training's rmse: 9.66007	valid_1's l1: 8.83224	valid_1's rmse: 11.5514


[32m[I 2023-03-09 20:06:47,659][0m Trial 5 finished with value: 11.278063987977228 and parameters: {'objective': 'rmse', 'n_estimators': 2532, 'reg_alpha': 0.0007496547368606873, 'reg_lambda': 0.0013246865479561423, 'colsample_bytree': 0.8500000000000001, 'num_leaves': 396, 'feature_fraction': 0.9678112328812677, 'bagging_fraction': 0.9298675202454968, 'bagging_freq': 5, 'min_child_samples': 186, 'subsample': 0.31, 'learning_rate': 0.10322503427121345, 'max_depth': 63, 'random_state': 42, 'n_jobs': 4}. Best is trial 5 with value: 11.278063987977228.[0m


Early stopping, best iteration is:
[88]	training's l1: 8.05799	training's rmse: 10.73	valid_1's l1: 8.62054	valid_1's rmse: 11.2781
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 5.02466	training's rmse: 7.44182	valid_1's l1: 9.30049	valid_1's rmse: 12.2826


[32m[I 2023-03-09 20:06:54,698][0m Trial 6 finished with value: 11.502618106964816 and parameters: {'objective': 'rmse', 'n_estimators': 3639, 'reg_alpha': 6.773886626209578e-08, 'reg_lambda': 0.007918487775579968, 'colsample_bytree': 0.51, 'num_leaves': 595, 'feature_fraction': 0.7236070643203312, 'bagging_fraction': 0.9119645054045965, 'bagging_freq': 4, 'min_child_samples': 31, 'subsample': 0.36, 'learning_rate': 0.1194210970798368, 'max_depth': 13, 'random_state': 42, 'n_jobs': 4}. Best is trial 5 with value: 11.278063987977228.[0m


Early stopping, best iteration is:
[29]	training's l1: 7.54037	training's rmse: 10.0369	valid_1's l1: 8.82351	valid_1's rmse: 11.5026
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 8.36815	valid_1's l1: 8.59227
[1000]	training's l1: 8.15734	valid_1's l1: 8.58116
[1500]	training's l1: 8.03197	valid_1's l1: 8.57368
Early stopping, best iteration is:
[1066]	training's l1: 8.13768	valid_1's l1: 8.56312


[32m[I 2023-03-09 20:06:58,147][0m Trial 7 finished with value: 11.460432055593527 and parameters: {'objective': 'mae', 'n_estimators': 1702, 'reg_alpha': 0.004382990119332211, 'reg_lambda': 9.881076424809642e-05, 'colsample_bytree': 0.8200000000000001, 'num_leaves': 391, 'feature_fraction': 0.7429239498063664, 'bagging_fraction': 0.5060057230245233, 'bagging_freq': 13, 'min_child_samples': 231, 'subsample': 0.61, 'learning_rate': 0.023512920959651035, 'max_depth': 28, 'random_state': 42, 'n_jobs': 4}. Best is trial 5 with value: 11.278063987977228.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.6774	valid_1's l1: 8.74835


[32m[I 2023-03-09 20:07:03,121][0m Trial 8 finished with value: 11.687737552966443 and parameters: {'objective': 'mae', 'n_estimators': 4352, 'reg_alpha': 6.143039623588121, 'reg_lambda': 0.1468817334957451, 'colsample_bytree': 0.19, 'num_leaves': 118, 'feature_fraction': 0.19069137441594425, 'bagging_fraction': 0.5109387629199045, 'bagging_freq': 6, 'min_child_samples': 28, 'subsample': 0.55, 'learning_rate': 0.06454872057577826, 'max_depth': 89, 'random_state': 42, 'n_jobs': 4}. Best is trial 5 with value: 11.278063987977228.[0m


Early stopping, best iteration is:
[237]	training's l1: 8.00294	valid_1's l1: 8.71391
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 8.69831	training's rmse: 11.4443	valid_1's l1: 8.73665	valid_1's rmse: 11.4569
[1000]	training's l1: 8.48056	training's rmse: 11.2236	valid_1's l1: 8.70173	valid_1's rmse: 11.4141
[1500]	training's l1: 8.37354	training's rmse: 11.0867	valid_1's l1: 8.70648	valid_1's rmse: 11.3616


[32m[I 2023-03-09 20:07:07,331][0m Trial 9 finished with value: 11.362755018448848 and parameters: {'objective': 'rmse', 'n_estimators': 3464, 'reg_alpha': 0.5640198230293838, 'reg_lambda': 1.4227957766077337, 'colsample_bytree': 0.7500000000000001, 'num_leaves': 125, 'feature_fraction': 0.8166824062964628, 'bagging_fraction': 0.2607396630571954, 'bagging_freq': 9, 'min_child_samples': 106, 'subsample': 0.85, 'learning_rate': 0.011822160797896436, 'max_depth': 50, 'random_state': 42, 'n_jobs': 4}. Best is trial 5 with value: 11.278063987977228.[0m


Early stopping, best iteration is:
[1277]	training's l1: 8.4061	training's rmse: 11.1336	valid_1's l1: 8.68185	valid_1's rmse: 11.3628
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.5984	training's rmse: 10.1332	valid_1's l1: 8.72178	valid_1's rmse: 11.4173


[32m[I 2023-03-09 20:07:09,958][0m Trial 10 finished with value: 11.290048511090971 and parameters: {'objective': 'rmse', 'n_estimators': 855, 'reg_alpha': 2.7741031872386884e-06, 'reg_lambda': 4.051256165308124, 'colsample_bytree': 0.9600000000000001, 'num_leaves': 326, 'feature_fraction': 0.9943252386896863, 'bagging_fraction': 0.8023441690649826, 'bagging_freq': 1, 'min_child_samples': 171, 'subsample': 0.12000000000000001, 'learning_rate': 0.05807331953857454, 'max_depth': 66, 'random_state': 42, 'n_jobs': 4}. Best is trial 5 with value: 11.278063987977228.[0m


Early stopping, best iteration is:
[160]	training's l1: 8.10754	training's rmse: 10.765	valid_1's l1: 8.63058	valid_1's rmse: 11.29
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.33053	training's rmse: 9.81806	valid_1's l1: 8.71493	valid_1's rmse: 11.428


[32m[I 2023-03-09 20:07:12,867][0m Trial 11 finished with value: 11.276490888755632 and parameters: {'objective': 'rmse', 'n_estimators': 1137, 'reg_alpha': 3.2280143261382577e-06, 'reg_lambda': 8.360133787320093, 'colsample_bytree': 0.9900000000000001, 'num_leaves': 335, 'feature_fraction': 0.9932579016038974, 'bagging_fraction': 0.7992728693335135, 'bagging_freq': 0, 'min_child_samples': 163, 'subsample': 0.1, 'learning_rate': 0.06399361849071615, 'max_depth': 66, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 11.276490888755632.[0m


Early stopping, best iteration is:
[121]	training's l1: 8.03519	training's rmse: 10.6943	valid_1's l1: 8.60851	valid_1's rmse: 11.2765
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.1268	training's rmse: 9.55389	valid_1's l1: 8.78525	valid_1's rmse: 11.5502


[32m[I 2023-03-09 20:07:15,976][0m Trial 12 finished with value: 11.289466348783131 and parameters: {'objective': 'rmse', 'n_estimators': 2320, 'reg_alpha': 2.8963278479365724e-07, 'reg_lambda': 0.15425109212627983, 'colsample_bytree': 0.8700000000000001, 'num_leaves': 374, 'feature_fraction': 0.9014148041496401, 'bagging_fraction': 0.9880997375712501, 'bagging_freq': 0, 'min_child_samples': 163, 'subsample': 0.13, 'learning_rate': 0.08726647253178504, 'max_depth': 64, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 11.276490888755632.[0m


Early stopping, best iteration is:
[129]	training's l1: 7.8531	training's rmse: 10.4678	valid_1's l1: 8.64445	valid_1's rmse: 11.2895
Training until validation scores don't improve for 500 rounds


[32m[I 2023-03-09 20:07:18,895][0m Trial 13 finished with value: 11.370469267506987 and parameters: {'objective': 'rmse', 'n_estimators': 975, 'reg_alpha': 0.00019027962520433116, 'reg_lambda': 7.732750647634496, 'colsample_bytree': 0.64, 'num_leaves': 253, 'feature_fraction': 0.844924534238805, 'bagging_fraction': 0.7734637134981783, 'bagging_freq': 2, 'min_child_samples': 110, 'subsample': 0.23, 'learning_rate': 0.1657405938034054, 'max_depth': 66, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 11.276490888755632.[0m


[500]	training's l1: 6.53296	training's rmse: 8.83292	valid_1's l1: 8.9925	valid_1's rmse: 11.8567
Early stopping, best iteration is:
[32]	training's l1: 8.20931	training's rmse: 10.8655	valid_1's l1: 8.67982	valid_1's rmse: 11.3705
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.58372	training's rmse: 10.1092	valid_1's l1: 8.75186	valid_1's rmse: 11.444


[32m[I 2023-03-09 20:07:21,250][0m Trial 14 finished with value: 11.298492704585687 and parameters: {'objective': 'rmse', 'n_estimators': 2404, 'reg_alpha': 1.1257480255282582e-08, 'reg_lambda': 0.004070664603366423, 'colsample_bytree': 0.8700000000000001, 'num_leaves': 491, 'feature_fraction': 0.9959644436027598, 'bagging_fraction': 0.7854223908789157, 'bagging_freq': 2, 'min_child_samples': 192, 'subsample': 0.21000000000000002, 'learning_rate': 0.07829414885527444, 'max_depth': 51, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 11.276490888755632.[0m


Early stopping, best iteration is:
[115]	training's l1: 8.21938	training's rmse: 10.8982	valid_1's l1: 8.64875	valid_1's rmse: 11.2985
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 6.91947	training's rmse: 9.24768	valid_1's l1: 9.09787	valid_1's rmse: 11.8798


[32m[I 2023-03-09 20:07:23,809][0m Trial 15 finished with value: 11.436205503211722 and parameters: {'objective': 'rmse', 'n_estimators': 1468, 'reg_alpha': 4.1092795491042935e-06, 'reg_lambda': 0.2598713219516337, 'colsample_bytree': 0.42, 'num_leaves': 17, 'feature_fraction': 0.4818253972515385, 'bagging_fraction': 0.6826266858196883, 'bagging_freq': 4, 'min_child_samples': 105, 'subsample': 0.43000000000000005, 'learning_rate': 0.24378548233837585, 'max_depth': 75, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 11.276490888755632.[0m


Early stopping, best iteration is:
[84]	training's l1: 7.92667	training's rmse: 10.5401	valid_1's l1: 8.79231	valid_1's rmse: 11.4362
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.5177	training's rmse: 10.0248	valid_1's l1: 8.73644	valid_1's rmse: 11.4208


[32m[I 2023-03-09 20:07:27,267][0m Trial 16 finished with value: 11.300689263668275 and parameters: {'objective': 'rmse', 'n_estimators': 2563, 'reg_alpha': 0.0024117478030424465, 'reg_lambda': 8.999291073534094, 'colsample_bytree': 0.9900000000000001, 'num_leaves': 492, 'feature_fraction': 0.8740225688567068, 'bagging_fraction': 0.8671000899335967, 'bagging_freq': 10, 'min_child_samples': 135, 'subsample': 0.23, 'learning_rate': 0.04629364923707667, 'max_depth': 49, 'random_state': 42, 'n_jobs': 4}. Best is trial 11 with value: 11.276490888755632.[0m


Early stopping, best iteration is:
[152]	training's l1: 8.07174	training's rmse: 10.7298	valid_1's l1: 8.61593	valid_1's rmse: 11.3007
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.23843	training's rmse: 9.68902	valid_1's l1: 8.76643	valid_1's rmse: 11.5052


[32m[I 2023-03-09 20:07:29,961][0m Trial 17 finished with value: 11.2668142834646 and parameters: {'objective': 'rmse', 'n_estimators': 1184, 'reg_alpha': 2.1041396919956156e-05, 'reg_lambda': 0.002760870636449586, 'colsample_bytree': 0.78, 'num_leaves': 716, 'feature_fraction': 0.9248814482883356, 'bagging_fraction': 0.9957826765259391, 'bagging_freq': 3, 'min_child_samples': 204, 'subsample': 0.12000000000000001, 'learning_rate': 0.10790535328734975, 'max_depth': 36, 'random_state': 42, 'n_jobs': 4}. Best is trial 17 with value: 11.2668142834646.[0m


Early stopping, best iteration is:
[69]	training's l1: 8.17026	training's rmse: 10.8383	valid_1's l1: 8.61358	valid_1's rmse: 11.2668
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.15201	training's rmse: 9.57881	valid_1's l1: 8.81731	valid_1's rmse: 11.5663


[32m[I 2023-03-09 20:07:33,053][0m Trial 18 finished with value: 11.292245000333399 and parameters: {'objective': 'rmse', 'n_estimators': 1336, 'reg_alpha': 1.9942522169723364e-05, 'reg_lambda': 0.06592099786300652, 'colsample_bytree': 0.7200000000000001, 'num_leaves': 818, 'feature_fraction': 0.7835814809542765, 'bagging_fraction': 0.985440810649566, 'bagging_freq': 0, 'min_child_samples': 216, 'subsample': 0.96, 'learning_rate': 0.14317718481754427, 'max_depth': 29, 'random_state': 42, 'n_jobs': 4}. Best is trial 17 with value: 11.2668142834646.[0m


Early stopping, best iteration is:
[60]	training's l1: 8.177	training's rmse: 10.8472	valid_1's l1: 8.65236	valid_1's rmse: 11.2922
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.86686	training's rmse: 10.4706	valid_1's l1: 8.66797	valid_1's rmse: 11.3226


[32m[I 2023-03-09 20:07:34,953][0m Trial 19 finished with value: 11.257069858857713 and parameters: {'objective': 'rmse', 'n_estimators': 2039, 'reg_alpha': 8.663147967767764e-07, 'reg_lambda': 0.8835838365452832, 'colsample_bytree': 0.93, 'num_leaves': 746, 'feature_fraction': 0.8861554002651614, 'bagging_fraction': 0.8501930327533974, 'bagging_freq': 3, 'min_child_samples': 295, 'subsample': 0.1, 'learning_rate': 0.0816757155503611, 'max_depth': 37, 'random_state': 42, 'n_jobs': 4}. Best is trial 19 with value: 11.257069858857713.[0m


Early stopping, best iteration is:
[118]	training's l1: 8.3868	training's rmse: 11.0892	valid_1's l1: 8.59917	valid_1's rmse: 11.2571
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 8.85685	training's rmse: 11.6326	valid_1's l1: 8.8054	valid_1's rmse: 11.5222
[1000]	training's l1: 8.79808	training's rmse: 11.5679	valid_1's l1: 8.78494	valid_1's rmse: 11.4984
[1500]	training's l1: 8.76777	training's rmse: 11.5306	valid_1's l1: 8.77122	valid_1's rmse: 11.4861


[32m[I 2023-03-09 20:07:37,132][0m Trial 20 finished with value: 11.491423486465075 and parameters: {'objective': 'rmse', 'n_estimators': 1879, 'reg_alpha': 5.084627144194242e-07, 'reg_lambda': 0.4552225817327293, 'colsample_bytree': 0.64, 'num_leaves': 750, 'feature_fraction': 0.8799886607179407, 'bagging_fraction': 0.993268746996505, 'bagging_freq': 3, 'min_child_samples': 299, 'subsample': 0.2, 'learning_rate': 0.08597129105329003, 'max_depth': 1, 'random_state': 42, 'n_jobs': 4}. Best is trial 19 with value: 11.257069858857713.[0m


Did not meet early stopping. Best iteration is:
[1878]	training's l1: 8.75272	training's rmse: 11.5095	valid_1's l1: 8.77653	valid_1's rmse: 11.4914
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.86879	training's rmse: 10.48	valid_1's l1: 8.67721	valid_1's rmse: 11.3445


[32m[I 2023-03-09 20:07:39,493][0m Trial 21 finished with value: 11.295191434109036 and parameters: {'objective': 'rmse', 'n_estimators': 734, 'reg_alpha': 8.651447322300779e-07, 'reg_lambda': 1.0005406186929688, 'colsample_bytree': 0.92, 'num_leaves': 950, 'feature_fraction': 0.9202236632141763, 'bagging_fraction': 0.8324981727560408, 'bagging_freq': 2, 'min_child_samples': 263, 'subsample': 0.1, 'learning_rate': 0.06520051962023042, 'max_depth': 36, 'random_state': 42, 'n_jobs': 4}. Best is trial 19 with value: 11.257069858857713.[0m


Early stopping, best iteration is:
[162]	training's l1: 8.28651	training's rmse: 10.9902	valid_1's l1: 8.62198	valid_1's rmse: 11.2952
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 5.735	training's rmse: 8.03001	valid_1's l1: 9.04343	valid_1's rmse: 11.9406


[32m[I 2023-03-09 20:07:44,625][0m Trial 22 finished with value: 11.36110361214871 and parameters: {'objective': 'rmse', 'n_estimators': 1325, 'reg_alpha': 2.67170651648562e-05, 'reg_lambda': 0.03133644315616893, 'colsample_bytree': 0.79, 'num_leaves': 703, 'feature_fraction': 0.839949063381658, 'bagging_fraction': 0.703374068136904, 'bagging_freq': 0, 'min_child_samples': 72, 'subsample': 0.17, 'learning_rate': 0.12262535198134933, 'max_depth': 44, 'random_state': 42, 'n_jobs': 4}. Best is trial 19 with value: 11.257069858857713.[0m


Early stopping, best iteration is:
[37]	training's l1: 7.78435	training's rmse: 10.3578	valid_1's l1: 8.66224	valid_1's rmse: 11.3611
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.45942	training's rmse: 9.96955	valid_1's l1: 8.72782	valid_1's rmse: 11.4252


[32m[I 2023-03-09 20:07:48,117][0m Trial 23 finished with value: 11.302400852546333 and parameters: {'objective': 'rmse', 'n_estimators': 2172, 'reg_alpha': 4.02394751960767e-06, 'reg_lambda': 1.888370527728348, 'colsample_bytree': 0.92, 'num_leaves': 614, 'feature_fraction': 0.9277973156188782, 'bagging_fraction': 0.8804784118629287, 'bagging_freq': 3, 'min_child_samples': 141, 'subsample': 0.1, 'learning_rate': 0.0488638219123843, 'max_depth': 28, 'random_state': 42, 'n_jobs': 4}. Best is trial 19 with value: 11.257069858857713.[0m


Early stopping, best iteration is:
[161]	training's l1: 8.02205	training's rmse: 10.6664	valid_1's l1: 8.63863	valid_1's rmse: 11.3024
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.56783	training's rmse: 10.0916	valid_1's l1: 8.76603	valid_1's rmse: 11.4544


[32m[I 2023-03-09 20:07:50,515][0m Trial 24 finished with value: 11.252932307735009 and parameters: {'objective': 'rmse', 'n_estimators': 1099, 'reg_alpha': 5.5256908175709204e-05, 'reg_lambda': 0.4745999148950493, 'colsample_bytree': 1.0, 'num_leaves': 831, 'feature_fraction': 0.7972367508375433, 'bagging_fraction': 0.8550030435185526, 'bagging_freq': 7, 'min_child_samples': 208, 'subsample': 0.28, 'learning_rate': 0.08787383928994455, 'max_depth': 59, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[115]	training's l1: 8.17682	training's rmse: 10.8646	valid_1's l1: 8.59641	valid_1's rmse: 11.2529
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.47901	training's rmse: 9.98583	valid_1's l1: 8.74461	valid_1's rmse: 11.4512


[32m[I 2023-03-09 20:07:52,861][0m Trial 25 finished with value: 11.278631549357327 and parameters: {'objective': 'rmse', 'n_estimators': 1667, 'reg_alpha': 6.276965191711898e-05, 'reg_lambda': 0.023362473093070057, 'colsample_bytree': 0.79, 'num_leaves': 859, 'feature_fraction': 0.7482601442773111, 'bagging_fraction': 0.8727616216907129, 'bagging_freq': 15, 'min_child_samples': 203, 'subsample': 0.29000000000000004, 'learning_rate': 0.09486505032058797, 'max_depth': 56, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[113]	training's l1: 8.14634	training's rmse: 10.8241	valid_1's l1: 8.59117	valid_1's rmse: 11.2786
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.64853	training's rmse: 10.1882	valid_1's l1: 8.76692	valid_1's rmse: 11.4741


[32m[I 2023-03-09 20:07:54,694][0m Trial 26 finished with value: 11.29206973291361 and parameters: {'objective': 'rmse', 'n_estimators': 2041, 'reg_alpha': 0.00012357908291162648, 'reg_lambda': 0.7814468764434948, 'colsample_bytree': 0.9, 'num_leaves': 781, 'feature_fraction': 0.817124417396725, 'bagging_fraction': 0.720450566805905, 'bagging_freq': 7, 'min_child_samples': 249, 'subsample': 0.42000000000000004, 'learning_rate': 0.1464771178591556, 'max_depth': 96, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[106]	training's l1: 8.25996	training's rmse: 10.9596	valid_1's l1: 8.64481	valid_1's rmse: 11.2921
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.77394	training's rmse: 10.3664	valid_1's l1: 8.70755	valid_1's rmse: 11.3631


[32m[I 2023-03-09 20:07:56,871][0m Trial 27 finished with value: 11.276566737207855 and parameters: {'objective': 'rmse', 'n_estimators': 2859, 'reg_alpha': 1.0453573143560626e-05, 'reg_lambda': 0.06866360638532719, 'colsample_bytree': 0.7000000000000001, 'num_leaves': 940, 'feature_fraction': 0.8872591183827719, 'bagging_fraction': 0.923641802851759, 'bagging_freq': 7, 'min_child_samples': 280, 'subsample': 0.26, 'learning_rate': 0.07535647040447459, 'max_depth': 41, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[162]	training's l1: 8.21373	training's rmse: 10.9029	valid_1's l1: 8.63534	valid_1's rmse: 11.2766
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.63246	training's rmse: 10.1704	valid_1's l1: 8.77044	valid_1's rmse: 11.4516


[32m[I 2023-03-09 20:07:59,086][0m Trial 28 finished with value: 11.338167638725068 and parameters: {'objective': 'rmse', 'n_estimators': 1239, 'reg_alpha': 1.4512568679305993e-07, 'reg_lambda': 0.21747416328590222, 'colsample_bytree': 0.29, 'num_leaves': 672, 'feature_fraction': 0.6839910573441781, 'bagging_fraction': 0.8478750719046121, 'bagging_freq': 4, 'min_child_samples': 236, 'subsample': 0.17, 'learning_rate': 0.10497696342312746, 'max_depth': 22, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[95]	training's l1: 8.29385	training's rmse: 10.9889	valid_1's l1: 8.64699	valid_1's rmse: 11.3382
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.82379	training's rmse: 10.4013	valid_1's l1: 8.77637	valid_1's rmse: 11.4801


[32m[I 2023-03-09 20:08:00,757][0m Trial 29 finished with value: 11.372599704225342 and parameters: {'objective': 'rmse', 'n_estimators': 4953, 'reg_alpha': 0.0004614574860624545, 'reg_lambda': 0.0013960520969896993, 'colsample_bytree': 0.5700000000000001, 'num_leaves': 880, 'feature_fraction': 0.7810703873991469, 'bagging_fraction': 0.6399908695459209, 'bagging_freq': 5, 'min_child_samples': 298, 'subsample': 0.65, 'learning_rate': 0.19372057178839522, 'max_depth': 57, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[150]	training's l1: 8.2334	training's rmse: 10.9409	valid_1's l1: 8.68313	valid_1's rmse: 11.3726
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.80002	training's rmse: 10.3968	valid_1's l1: 8.75891	valid_1's rmse: 11.4234


[32m[I 2023-03-09 20:08:03,816][0m Trial 30 finished with value: 11.304208087925865 and parameters: {'objective': 'rmse', 'n_estimators': 1525, 'reg_alpha': 1.0642932346464493e-06, 'reg_lambda': 0.03885102624044091, 'colsample_bytree': 0.64, 'num_leaves': 721, 'feature_fraction': 0.5443242855455859, 'bagging_fraction': 0.7516859712952432, 'bagging_freq': 8, 'min_child_samples': 211, 'subsample': 0.4, 'learning_rate': 0.079268688331184, 'max_depth': 32, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[136]	training's l1: 8.30704	training's rmse: 11.0031	valid_1's l1: 8.64371	valid_1's rmse: 11.3042
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.57949	training's rmse: 10.1211	valid_1's l1: 8.71368	valid_1's rmse: 11.397


[32m[I 2023-03-09 20:08:06,524][0m Trial 31 finished with value: 11.263944499796132 and parameters: {'objective': 'rmse', 'n_estimators': 1072, 'reg_alpha': 1.8551194623862154e-06, 'reg_lambda': 2.8328475616511244, 'colsample_bytree': 0.9600000000000001, 'num_leaves': 556, 'feature_fraction': 0.9439055999834804, 'bagging_fraction': 0.8212877562299359, 'bagging_freq': 1, 'min_child_samples': 173, 'subsample': 0.16, 'learning_rate': 0.0604664579961592, 'max_depth': 74, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[168]	training's l1: 8.06418	training's rmse: 10.7216	valid_1's l1: 8.60725	valid_1's rmse: 11.2639
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.36973	training's rmse: 9.82917	valid_1's l1: 8.80624	valid_1's rmse: 11.5075


[32m[I 2023-03-09 20:08:08,915][0m Trial 32 finished with value: 11.30780407029536 and parameters: {'objective': 'rmse', 'n_estimators': 1071, 'reg_alpha': 4.048039553774624e-05, 'reg_lambda': 2.4832484899650393, 'colsample_bytree': 0.93, 'num_leaves': 545, 'feature_fraction': 0.9398533998268671, 'bagging_fraction': 0.855480460675309, 'bagging_freq': 3, 'min_child_samples': 181, 'subsample': 0.17, 'learning_rate': 0.09428670973961414, 'max_depth': 77, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[65]	training's l1: 8.28488	training's rmse: 10.9802	valid_1's l1: 8.63413	valid_1's rmse: 11.3078
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.49953	training's rmse: 10.0044	valid_1's l1: 8.74283	valid_1's rmse: 11.4413


[32m[I 2023-03-09 20:08:11,230][0m Trial 33 finished with value: 11.298410369992752 and parameters: {'objective': 'rmse', 'n_estimators': 738, 'reg_alpha': 1.25596249886939e-06, 'reg_lambda': 0.5553250589428943, 'colsample_bytree': 0.8500000000000001, 'num_leaves': 783, 'feature_fraction': 0.9194841437824557, 'bagging_fraction': 0.9440428592067418, 'bagging_freq': 1, 'min_child_samples': 254, 'subsample': 0.5, 'learning_rate': 0.10996217137560463, 'max_depth': 75, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[134]	training's l1: 8.06001	training's rmse: 10.7194	valid_1's l1: 8.64789	valid_1's rmse: 11.2984
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.80002	valid_1's l1: 8.52955
[1000]	training's l1: 7.59074	valid_1's l1: 8.51175


[32m[I 2023-03-09 20:08:14,655][0m Trial 34 finished with value: 11.407863207898973 and parameters: {'objective': 'mae', 'n_estimators': 1122, 'reg_alpha': 1.9947199671709082e-05, 'reg_lambda': 2.720920289565095, 'colsample_bytree': 1.0, 'num_leaves': 641, 'feature_fraction': 0.8660697787396618, 'bagging_fraction': 0.8248588331647093, 'bagging_freq': 5, 'min_child_samples': 273, 'subsample': 0.26, 'learning_rate': 0.0710781590596512, 'max_depth': 45, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Did not meet early stopping. Best iteration is:
[1122]	training's l1: 7.55734	valid_1's l1: 8.51054
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.48414	valid_1's l1: 8.46493
[1000]	training's l1: 7.2209	valid_1's l1: 8.47753


[32m[I 2023-03-09 20:08:20,396][0m Trial 35 finished with value: 11.393029063827232 and parameters: {'objective': 'mae', 'n_estimators': 1569, 'reg_alpha': 9.121157099417728e-06, 'reg_lambda': 0.5383399678531874, 'colsample_bytree': 0.9400000000000001, 'num_leaves': 531, 'feature_fraction': 0.8061576792407689, 'bagging_fraction': 0.7568380372732623, 'bagging_freq': 11, 'min_child_samples': 132, 'subsample': 0.16, 'learning_rate': 0.05486989404255662, 'max_depth': 86, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[715]	training's l1: 7.34537	valid_1's l1: 8.45665
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.74683	training's rmse: 10.3299	valid_1's l1: 8.68144	valid_1's rmse: 11.3444


[32m[I 2023-03-09 20:08:23,168][0m Trial 36 finished with value: 11.263238610005398 and parameters: {'objective': 'rmse', 'n_estimators': 2027, 'reg_alpha': 9.764029616389882e-05, 'reg_lambda': 0.09371159002416805, 'colsample_bytree': 0.8, 'num_leaves': 902, 'feature_fraction': 0.9309948390511111, 'bagging_fraction': 0.9156003054532769, 'bagging_freq': 6, 'min_child_samples': 201, 'subsample': 0.35, 'learning_rate': 0.046533061325759024, 'max_depth': 57, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[178]	training's l1: 8.18019	training's rmse: 10.8569	valid_1's l1: 8.6055	valid_1's rmse: 11.2632
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.89079	training's rmse: 10.5039	valid_1's l1: 8.65224	valid_1's rmse: 11.3125


[32m[I 2023-03-09 20:08:25,912][0m Trial 37 finished with value: 11.266735068744262 and parameters: {'objective': 'rmse', 'n_estimators': 2882, 'reg_alpha': 0.0003295083653031715, 'reg_lambda': 0.14736750420013867, 'colsample_bytree': 0.8500000000000001, 'num_leaves': 996, 'feature_fraction': 0.9495927037241338, 'bagging_fraction': 0.8940315528299444, 'bagging_freq': 6, 'min_child_samples': 227, 'subsample': 0.37, 'learning_rate': 0.04118484380625394, 'max_depth': 54, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[235]	training's l1: 8.19291	training's rmse: 10.8728	valid_1's l1: 8.60003	valid_1's rmse: 11.2667
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.91529	valid_1's l1: 8.55066
[1000]	training's l1: 7.67204	valid_1's l1: 8.54928


[32m[I 2023-03-09 20:08:30,007][0m Trial 38 finished with value: 11.412475362705944 and parameters: {'objective': 'mae', 'n_estimators': 1918, 'reg_alpha': 0.00014114485424902037, 'reg_lambda': 0.013572990009146406, 'colsample_bytree': 0.9, 'num_leaves': 892, 'feature_fraction': 0.8542899891563046, 'bagging_fraction': 0.5846274811324985, 'bagging_freq': 9, 'min_child_samples': 157, 'subsample': 0.49, 'learning_rate': 0.036091174363382744, 'max_depth': 72, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[721]	training's l1: 7.78976	valid_1's l1: 8.53698
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.67165	training's rmse: 10.2385	valid_1's l1: 8.70814	valid_1's rmse: 11.3687


[32m[I 2023-03-09 20:08:33,331][0m Trial 39 finished with value: 11.281492831087144 and parameters: {'objective': 'rmse', 'n_estimators': 3215, 'reg_alpha': 0.0011153785839254219, 'reg_lambda': 1.912378620360779, 'colsample_bytree': 0.8200000000000001, 'num_leaves': 809, 'feature_fraction': 0.6981132646040489, 'bagging_fraction': 0.9338438441700866, 'bagging_freq': 7, 'min_child_samples': 187, 'subsample': 0.32, 'learning_rate': 0.053395464840269344, 'max_depth': 59, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[212]	training's l1: 8.05688	training's rmse: 10.7095	valid_1's l1: 8.63052	valid_1's rmse: 11.2815
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.74371	valid_1's l1: 8.56156
[1000]	training's l1: 7.51806	valid_1's l1: 8.5587
[1500]	training's l1: 7.40917	valid_1's l1: 8.5563
[2000]	training's l1: 7.33847	valid_1's l1: 8.55705
Early stopping, best iteration is:
[1606]	training's l1: 7.39228	valid_1's l1: 8.55275


[32m[I 2023-03-09 20:08:41,111][0m Trial 40 finished with value: 11.429112180808236 and parameters: {'objective': 'mae', 'n_estimators': 2651, 'reg_alpha': 0.005826257455838805, 'reg_lambda': 0.701638066586978, 'colsample_bytree': 0.11, 'num_leaves': 920, 'feature_fraction': 0.9582444025160511, 'bagging_fraction': 0.8279039917660267, 'bagging_freq': 5, 'min_child_samples': 237, 'subsample': 0.29000000000000004, 'learning_rate': 0.06300570555782344, 'max_depth': 71, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.87047	training's rmse: 10.4831	valid_1's l1: 8.66601	valid_1's rmse: 11.3057


[32m[I 2023-03-09 20:08:43,561][0m Trial 41 finished with value: 11.271581534640614 and parameters: {'objective': 'rmse', 'n_estimators': 2996, 'reg_alpha': 0.000397922488352631, 'reg_lambda': 0.09763365649364225, 'colsample_bytree': 0.9700000000000001, 'num_leaves': 841, 'feature_fraction': 0.9710055501996371, 'bagging_fraction': 0.8940796004506526, 'bagging_freq': 6, 'min_child_samples': 228, 'subsample': 0.38, 'learning_rate': 0.04462881216495284, 'max_depth': 54, 'random_state': 42, 'n_jobs': 4}. Best is trial 24 with value: 11.252932307735009.[0m


Early stopping, best iteration is:
[144]	training's l1: 8.37866	training's rmse: 11.0837	valid_1's l1: 8.58472	valid_1's rmse: 11.2716
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.88864	training's rmse: 10.518	valid_1's l1: 8.64532	valid_1's rmse: 11.2944


[32m[I 2023-03-09 20:08:46,665][0m Trial 42 finished with value: 11.243430552142563 and parameters: {'objective': 'rmse', 'n_estimators': 3178, 'reg_alpha': 0.00019334036586894884, 'reg_lambda': 0.19107588354665422, 'colsample_bytree': 0.8500000000000001, 'num_leaves': 996, 'feature_fraction': 0.9442666456714747, 'bagging_fraction': 0.9006993638999077, 'bagging_freq': 8, 'min_child_samples': 220, 'subsample': 0.36, 'learning_rate': 0.03990683788171536, 'max_depth': 61, 'random_state': 42, 'n_jobs': 4}. Best is trial 42 with value: 11.243430552142563.[0m


Early stopping, best iteration is:
[309]	training's l1: 8.0896	training's rmse: 10.7508	valid_1's l1: 8.60588	valid_1's rmse: 11.2434
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.77236	training's rmse: 10.3474	valid_1's l1: 8.72898	valid_1's rmse: 11.3819


[32m[I 2023-03-09 20:08:49,273][0m Trial 43 finished with value: 11.26320015686333 and parameters: {'objective': 'rmse', 'n_estimators': 3241, 'reg_alpha': 0.00011728027211182194, 'reg_lambda': 0.3284391201743375, 'colsample_bytree': 0.9500000000000001, 'num_leaves': 974, 'feature_fraction': 0.8992139040726337, 'bagging_fraction': 0.7390050119927085, 'bagging_freq': 9, 'min_child_samples': 176, 'subsample': 0.33999999999999997, 'learning_rate': 0.05451101122091559, 'max_depth': 80, 'random_state': 42, 'n_jobs': 4}. Best is trial 42 with value: 11.243430552142563.[0m


Early stopping, best iteration is:
[180]	training's l1: 8.17332	training's rmse: 10.8623	valid_1's l1: 8.62238	valid_1's rmse: 11.2632
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.83466	training's rmse: 10.4475	valid_1's l1: 8.6724	valid_1's rmse: 11.3181


[32m[I 2023-03-09 20:08:52,154][0m Trial 44 finished with value: 11.294428943215406 and parameters: {'objective': 'rmse', 'n_estimators': 4016, 'reg_alpha': 6.938569239592673e-05, 'reg_lambda': 0.2988623779221827, 'colsample_bytree': 0.7400000000000001, 'num_leaves': 969, 'feature_fraction': 0.8947796330455584, 'bagging_fraction': 0.9431337438500591, 'bagging_freq': 9, 'min_child_samples': 203, 'subsample': 0.48, 'learning_rate': 0.03615717183313165, 'max_depth': 80, 'random_state': 42, 'n_jobs': 4}. Best is trial 42 with value: 11.243430552142563.[0m


Early stopping, best iteration is:
[189]	training's l1: 8.24799	training's rmse: 10.9296	valid_1's l1: 8.62502	valid_1's rmse: 11.2944
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.98554	training's rmse: 10.6291	valid_1's l1: 8.62152	valid_1's rmse: 11.2597


[32m[I 2023-03-09 20:08:55,390][0m Trial 45 finished with value: 11.253124838267926 and parameters: {'objective': 'rmse', 'n_estimators': 3339, 'reg_alpha': 8.306439418425231e-05, 'reg_lambda': 0.01884965179010764, 'colsample_bytree': 0.89, 'num_leaves': 999, 'feature_fraction': 0.8373905386120342, 'bagging_fraction': 0.7995275267416404, 'bagging_freq': 8, 'min_child_samples': 193, 'subsample': 0.5700000000000001, 'learning_rate': 0.03191851413608007, 'max_depth': 96, 'random_state': 42, 'n_jobs': 4}. Best is trial 42 with value: 11.243430552142563.[0m


Early stopping, best iteration is:
[320]	training's l1: 8.15439	training's rmse: 10.8312	valid_1's l1: 8.60307	valid_1's rmse: 11.2531
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 8.22943	training's rmse: 10.9128	valid_1's l1: 8.66502	valid_1's rmse: 11.3103


[32m[I 2023-03-09 20:08:57,938][0m Trial 46 finished with value: 11.342711974226212 and parameters: {'objective': 'rmse', 'n_estimators': 3305, 'reg_alpha': 0.0010702583813344364, 'reg_lambda': 0.014295208691526347, 'colsample_bytree': 0.89, 'num_leaves': 1000, 'feature_fraction': 0.7738255535550557, 'bagging_fraction': 0.7512409118493533, 'bagging_freq': 11, 'min_child_samples': 247, 'subsample': 0.5700000000000001, 'learning_rate': 0.030806728123304098, 'max_depth': 95, 'random_state': 42, 'n_jobs': 4}. Best is trial 42 with value: 11.243430552142563.[0m


Early stopping, best iteration is:
[285]	training's l1: 8.42129	training's rmse: 11.1413	valid_1's l1: 8.64527	valid_1's rmse: 11.3427
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.77349	training's rmse: 10.3555	valid_1's l1: 8.66813	valid_1's rmse: 11.3236


[32m[I 2023-03-09 20:09:02,750][0m Trial 47 finished with value: 11.302511899296116 and parameters: {'objective': 'rmse', 'n_estimators': 3675, 'reg_alpha': 0.00025077622677866505, 'reg_lambda': 0.033335664562826994, 'colsample_bytree': 1.0, 'num_leaves': 936, 'feature_fraction': 0.8125398092443393, 'bagging_fraction': 0.7976164774168805, 'bagging_freq': 10, 'min_child_samples': 117, 'subsample': 0.6799999999999999, 'learning_rate': 0.025198646927366394, 'max_depth': 99, 'random_state': 42, 'n_jobs': 4}. Best is trial 42 with value: 11.243430552142563.[0m


Early stopping, best iteration is:
[370]	training's l1: 7.90971	training's rmse: 10.531	valid_1's l1: 8.65578	valid_1's rmse: 11.3025
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.94833	valid_1's l1: 8.53951
[1000]	training's l1: 7.69624	valid_1's l1: 8.53049
[1500]	training's l1: 7.54741	valid_1's l1: 8.52072
Early stopping, best iteration is:
[1480]	training's l1: 7.5514	valid_1's l1: 8.51933


[32m[I 2023-03-09 20:09:11,766][0m Trial 48 finished with value: 11.402905445047502 and parameters: {'objective': 'mae', 'n_estimators': 3445, 'reg_alpha': 5.6586727808589285e-05, 'reg_lambda': 0.0003355129712106877, 'colsample_bytree': 0.45, 'num_leaves': 862, 'feature_fraction': 0.8373751428257092, 'bagging_fraction': 0.6664200351600754, 'bagging_freq': 8, 'min_child_samples': 149, 'subsample': 0.53, 'learning_rate': 0.02214661215913074, 'max_depth': 91, 'random_state': 42, 'n_jobs': 4}. Best is trial 42 with value: 11.243430552142563.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 7.66014	training's rmse: 10.2014	valid_1's l1: 8.79963	valid_1's rmse: 11.4784


[32m[I 2023-03-09 20:09:14,091][0m Trial 49 finished with value: 11.347162833225125 and parameters: {'objective': 'rmse', 'n_estimators': 4284, 'reg_alpha': 0.012694098572295007, 'reg_lambda': 0.007757012017985964, 'colsample_bytree': 0.8500000000000001, 'num_leaves': 970, 'feature_fraction': 0.9925457919015537, 'bagging_fraction': 0.7292277142183364, 'bagging_freq': 12, 'min_child_samples': 177, 'subsample': 0.8, 'learning_rate': 0.07216984412987447, 'max_depth': 85, 'random_state': 42, 'n_jobs': 4}. Best is trial 42 with value: 11.243430552142563.[0m
[32m[I 2023-03-09 20:09:14,111][0m A new study created in memory with name: no-name-e3b0c97d-9cb1-429b-bcab-e46b5061e77d[0m


Early stopping, best iteration is:
[108]	training's l1: 8.31883	training's rmse: 11.0212	valid_1's l1: 8.6242	valid_1's rmse: 11.3472
Number of finished trials: 50
Best LGBM trial parameters: {'objective': 'rmse', 'n_estimators': 3178, 'reg_alpha': 0.00019334036586894884, 'reg_lambda': 0.19107588354665422, 'colsample_bytree': 0.8500000000000001, 'num_leaves': 996, 'feature_fraction': 0.9442666456714747, 'bagging_fraction': 0.9006993638999077, 'bagging_freq': 8, 'min_child_samples': 220, 'subsample': 0.36, 'learning_rate': 0.03990683788171536, 'max_depth': 61, 'random_state': 42, 'n_jobs': 4}
Best score: 11.243430552142563


[32m[I 2023-03-09 20:09:14,455][0m Trial 0 finished with value: 11.161201401381254 and parameters: {'learning_rate': 0.3706382021594186, 'l2_leaf_reg': 99.08884668380527, 'bagging_temperature': 0.44990725289104233, 'random_strength': 1.2351644605920575, 'depth': 4, 'min_data_in_leaf': 205}. Best is trial 0 with value: 11.161201401381254.[0m
[32m[I 2023-03-09 20:09:14,616][0m Trial 1 finished with value: 11.37570498089918 and parameters: {'learning_rate': 0.17333489151406417, 'l2_leaf_reg': 24.10950580510785, 'bagging_temperature': 2.858527299777863, 'random_strength': 1.3570820813951427, 'depth': 2, 'min_data_in_leaf': 9}. Best is trial 0 with value: 11.161201401381254.[0m
[32m[I 2023-03-09 20:09:14,892][0m Trial 2 finished with value: 11.178612379139526 and parameters: {'learning_rate': 0.2262571871529166, 'l2_leaf_reg': 89.84650558278662, 'bagging_temperature': 1.3259962487112793, 'random_strength': 1.2784968914908483, 'depth': 6, 'min_data_in_leaf': 6}. Best is trial 0 with 

Number of finished trials: 50
Best Cat trial parameters: {'learning_rate': 0.1743079243263554, 'l2_leaf_reg': 8.667256605954844, 'bagging_temperature': 0.18681216056626537, 'random_strength': 1.0667074773083682, 'depth': 6, 'min_data_in_leaf': 175}
Best score: 11.073772740217834
CPU times: user 1h 2min 38s, sys: 1min 58s, total: 1h 4min 37s
Wall time: 18min 20s


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Train Models with Cross Validation</h1>
</div>

In [26]:
def show_tree_model_fi(model, features:List[str]) -> None:
    print("\n=== Model Feature Importance ===")
    for i in model.feature_importances_.argsort()[::-1]:
        print(features[i], model.feature_importances_[i]/model.feature_importances_.sum())

def save_oof_predictions(model_name:str, final_valid_predictions, oof:pd.DataFrame) -> pd.DataFrame:
    final_valid_predictions_df = process_valid_predictions(
        final_valid_predictions, ID, model_name
    )
    display(final_valid_predictions_df.head())
    oof[f"pred_{model_name}"] = final_valid_predictions_df[f"pred_{model_name}"]

    return oof

def save_test_predictions(model_name:str, final_test_predictions, submission_df:pd.DataFrame, result_field:str=TARGET) -> None:
    result = merge_test_predictions(final_test_predictions, Config.calc_probability)
    # result[:20]
    submission_df[f"target_{model_name}"] = result #.astype(int)
    #     submission_df.head(10)
    ss = submission_df[[ID, f"target_{model_name}"]].copy().reset_index(drop=True)
    ss.rename(columns={f"target_{model_name}": result_field}, inplace=True)
    ss.to_csv(
        f"submission_{model_name}.csv", index=False
    )  # Can submit the individual model
    print("=== Target Value Counts ===")
#     display(ss[TARGET].value_counts())
    ss.head(10)

def process_valid_predictions(final_valid_predictions, train_id, model_name:str) -> pd.DataFrame:
    model = f"pred_{model_name}"
    final_valid_predictions_df = pd.DataFrame.from_dict(
        final_valid_predictions, orient="index"
    ).reset_index()
    final_valid_predictions_df.columns = [train_id, model]
    final_valid_predictions_df.set_index(train_id, inplace=True)
    final_valid_predictions_df.sort_index(inplace=True)
    final_valid_predictions_df.to_csv(f"train_pred_{model_name}.csv", index=True)

    return final_valid_predictions_df

def add_score(score_df:pd.DataFrame, model_name:str, score:float, std:float):
    dict1 = {"Model": model_name, "Score": cv_score, "StdDev": std_dev}
    score_df = score_df.append(dict1, ignore_index=True)
    return score_df

In [27]:
def train_cv_model(
    df:pd.DataFrame,
    test:pd.DataFrame,
    get_model_fn,
    FEATURES:List[str],
    TARGET:str,
    calc_probability:bool,
    rowid,
    params,
    n_folds:int=5,
    seed:int=42,
):

    final_test_predictions = []
    final_valid_predictions = {}
    fold_scores = []  # Scores of Validation Set
    feature_importance_lst = []

    test = test[FEATURES].copy()

    for fold in range(n_folds):
        print(10 * "=", f"Fold {fold+1}/{n_folds}", 10 * "=")

        start_time = time.time()

        xtrain = df[df.fold != fold].reset_index(
            drop=True
        )  # Everything not in validation fold
        xvalid = df[df.fold == fold].reset_index(drop=True)
        xtest = test.copy()

        valid_ids = xvalid.id.values.tolist()  # Id's of everything in validation fold

        ytrain = xtrain[TARGET]
        yvalid = xvalid[TARGET]

        xtrain = xtrain[FEATURES]
        xvalid = xvalid[FEATURES]

        scaler = preprocessing.StandardScaler()
#         scaler = preprocessing.MinMaxScaler()
        xtrain = scaler.fit(xtrain).transform(xtrain)
        xvalid = scaler.transform(xvalid)
        xtest = scaler.transform(xtest)

        model = get_model_fn # ()

        model.fit(
            xtrain,
            ytrain,
        )
        if calc_probability:
            preds_valid = model.predict_proba(xvalid)[:, 1]
            test_preds = model.predict_proba(xtest)[:, 1]
        else:
            preds_valid = model.predict(xvalid)
            test_preds = model.predict(xtest)

        preds_valid_class = model.predict(xvalid)
        
        final_test_predictions.append(test_preds)
        final_valid_predictions.update(dict(zip(valid_ids, preds_valid)))

#         fold_score = metrics.accuracy_score(yvalid, preds_valid_class)  # Validation Set Score
        fold_score = metrics.mean_absolute_error(
            yvalid, preds_valid
        ) 
#         fold_score = metrics.roc_auc_score(yvalid.values, preds_valid)  # Validation Set Score
#         show_classification_scores(yvalid.values, preds_valid_class)

#         fold_score = metrics.roc_auc_score(yvalid, preds_valid)  # Validation Set Score
#         fold_score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
        fold_scores.append(fold_score)
        #         importance_list.append(model.coef_.ravel())

        fi = []
        # Feature importance
#         fi = pd.DataFrame(
#             index=FEATURES,
#             data=model.coef_.ravel(),
#             columns=[f"{fold}_importance"],
#         )
        
        feature_importance_lst.append(fi)

        run_time = time.time() - start_time

        print(f"fold: {fold+1}, Score: {fold_score}, Run Time: {run_time:.2f}")

    return (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    )


def train_xgb_model(
    df:pd.DataFrame,
    test:pd.DataFrame,
    get_model_fn,
    FEATURES:List[str],
    TARGET:str,
    calc_probability:bool,
    rowid:str,
    params,
    n_folds:int=5,
    seed:int=42,
):

    print(params)
    final_test_predictions = []
    final_valid_predictions = {}
    fold_scores = []  # Scores of Validation Set
    feature_importance_lst = []

    test = test[FEATURES].copy()

    for fold in range(n_folds):
        print(10 * "=", f"Fold {fold+1}/{n_folds}", 10 * "=")

        start_time = time.time()

        xtrain = df[df.fold != fold].reset_index(
            drop=True
        )  # Everything not in validation fold
        xvalid = df[df.fold == fold].reset_index(drop=True)
        xtest = test.copy()

        valid_ids = xvalid.id.values.tolist()  # Id's of everything in validation fold

        ytrain = xtrain[TARGET]
        yvalid = xvalid[TARGET]

        xtrain = xtrain[FEATURES]
        xvalid = xvalid[FEATURES]

        model = get_model_fn # (params)

        model.fit(
            xtrain,
            ytrain,
            eval_set=[(xvalid, yvalid)],
            #             eval_metric="acc",  # auc
            verbose=0,
            #             early_stopping_rounds=3000,
            #             callbacks=[
            #                 xgb.log_evaluation(0),
            #                 xgb.early_stopping(500, False, True),
            #             ],
        )

        if calc_probability:
            preds_valid = model.predict_proba(xvalid)[:, 1]
            test_preds = model.predict_proba(xtest)[:, 1]
        else:
            preds_valid = model.predict(xvalid)
            test_preds = model.predict(xtest)

        preds_valid_class = model.predict(xvalid)
        
        final_test_predictions.append(test_preds)
        if Config.debug:
            print(f"GT Type: {type(yvalid.values)}")
            print(f"Preds Type: {type(preds_valid_class)}")
            print(f"         GT:{yvalid.values[:20]}")
            print(f"Preds Class:{preds_valid_class[:20]}")
            print(f"Preds Prob:{preds_valid[:20]}")
        final_valid_predictions.update(dict(zip(valid_ids, preds_valid_class)))

#         fold_score = metrics.cohen_kappa_score(yvalid,  preds_valid_class, weights = "quadratic")
#         fold_score = metrics.roc_auc_score(yvalid.values, preds_valid)  # Validation Set Score
#         show_classification_scores(yvalid.values, preds_valid_class)
        fold_score = metrics.mean_absolute_error(
            yvalid, preds_valid
        )  # Validation Set Score
#         fold_score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
        fold_scores.append(fold_score)

        # Feature importance
        fi = pd.DataFrame(
            index=FEATURES,
            data=model.feature_importances_,
            columns=[f"{fold}_importance"],
        )
        feature_importance_lst.append(fi)

        run_time = time.time() - start_time

        print(f"fold: {fold+1}, Score: {fold_score}, Run Time: {run_time:.2f}")

    return (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    )        

In [28]:
def run_linear_model(train:pd.DataFrame, test:pd.DataFrame, model_dict, model_name:str, features:List[str], oof:pd.DataFrame) -> (float, float, pd.DataFrame):
    (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    ) = train_cv_model(
        train,
        test,
        model_dict[model_name],
        features,
        TARGET,
        False, #Config.calc_probability,
        ID,
        {},
        Config.N_FOLDS,
        Config.seed,
    )

    cv_score, std_dev = show_fold_scores(fold_scores)

    oof = save_oof_predictions(model_name, final_valid_predictions, oof)
    oof.head()
    save_test_predictions(model_name, final_test_predictions, sample_submission, TARGET)

    return cv_score, std_dev, oof


def run_tree_model(train:pd.DataFrame, test:pd.DataFrame, model_dict, model_name:str, features:List[str], params, oof:pd.DataFrame) -> (float, float, pd.DataFrame):
    (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    ) = train_xgb_model(
        train,
        test,
        model_dict[model_name],
        features,
        TARGET,
        Config.calc_probability,
        ID,
        params,
        Config.N_FOLDS,
        Config.seed,
    )

    cv_score, std_dev = show_fold_scores(fold_scores)
    show_tree_model_fi(model, features)

    oof = save_oof_predictions(model_name, final_valid_predictions, oof)
    oof.head()
    save_test_predictions(model_name, final_test_predictions, sample_submission, TARGET)

    return cv_score, std_dev, oof

In [29]:
%%time

def run_models4features(train:pd.DataFrame, test:pd.DataFrame, model_dict, model_lst:List[str], target:str, feature_lst:List[str], all_cv_scores:pd.DataFrame, linear_models:bool=True) -> pd.DataFrame:

    oof = train[[ID, target, "fold"]].copy().reset_index(drop=True).copy()
    oof.set_index(ID, inplace=True)

    for idx, m in enumerate(model_lst):
        model = model_lst[idx]
        start_time = time.time()

        print(f"Model={model}")

        params = {}
        if linear_models:
                cv_score, std_dev, oof = run_linear_model(train, test, model_dict, model, feature_lst, oof)

        else:
            cv_score, std_dev, oof = run_tree_model(train, test, model_dict, model, feature_lst, params, oof)

        run_time = time.time() - start_time

        score_dict = {"Model": model, "Score": cv_score, "StdDev": std_dev, "RunTime": run_time, "n_estimators": Config.N_ESTIMATORS, "n_folds": Config.N_FOLDS, "comments": ""}
        all_cv_scores = all_cv_scores.append(score_dict, ignore_index=True)
        print(f"Model Run Time: {run_time:.2f}")

    return all_cv_scores




CPU times: user 18 µs, sys: 0 ns, total: 18 µs
Wall time: 23.4 µs


In [30]:
lgbm_params = {'n_estimators': Config.N_ESTIMATORS,
                 'num_rounds': 404,
                 'learning_rate': 0.19,
                 'num_leaves': 17,
                 'max_depth': 8,
                 'min_data_in_leaf': 36,
                 'lambda_l1': 0.96,
                 'lambda_l2': 0.01,
                 'min_gain_to_split': 11.32,
                 'bagging_fraction': 0.6,
                 'feature_fraction': 0.9}


lgbm_params3 = {
    "n_estimators": Config.N_ESTIMATORS,
    'max_depth': 9,
    'learning_rate': 0.01,
    'min_data_in_leaf': 36, 
    'num_leaves': 100, 
    'feature_fraction': 0.8, 
    'bagging_fraction': 0.89, 
    'bagging_freq': 5, 
    'lambda_l2': 28,
    
    'seed': Config.seed,
    'objective': 'regression',
#     'boosting_type': 'gbdt',
#     'device': 'gpu', 
#     'gpu_platform_id': 0,
#     'gpu_device_id': 0,
    'n_jobs': -1,
    'metric': 'rmse',
    'verbose': -1
}
    
lgbm_params = gpu_ify_lgbm(lgbm_params)

In [31]:
xgb_params = {
    "n_estimators": Config.N_ESTIMATORS,  # 10_000,
    "max_depth": 10,  # 10
    "objective": "reg:squarederror",
    #     "enable_categorical": True,  # Only works with gpu_hist
    #     "eval_metric": "mae",
    #     "metric": "mae",
    #     "enable_categorical": True,
    "n_jobs": 8,  # 4
    "seed": Config.seed,
    "tree_method": "hist",
    #         "gpu_id": 0,
    "subsample": 0.9,  # 0.7
    "colsample_bytree": 0.7,
    "use_label_encoder": False,
    "learning_rate": 0.05,  # 0.01
}

xgb_params3 = {
    'n_estimators': Config.N_ESTIMATORS,
    'learning_rate': 0.05,
    'max_depth': 10,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'objective': 'reg:squarederror'
}

if Config.gpu:
    xgb_params["tree_method"] = "gpu_hist"
else:
    xgb_params["tree_method"] = "hist"

In [32]:
cb_params = {
    #     "learning_rate": 0.3277295792305584,
    "learning_rate": 0.05,
    "l2_leaf_reg": 3.1572972266001518,
    "bagging_temperature": 0.6799604234141348,
    "random_strength": 1.99590400593318,
    "depth": 10,
    "min_data_in_leaf": 93,
    # "iterations": 100,  # 10000
    "n_estimators": Config.N_ESTIMATORS,  # 10000
    "use_best_model": True,
    #     "task_type": "GPU",
    "random_seed": Config.seed,
}

cb_params = gpu_ify_cb(cb_params)

In [33]:
model_estimator_dict = {
    "xgb2": xgb.XGBRegressor(**xgb_params),
    "xgb_best_params": xgb.XGBRegressor(**best_xgb_params),
    "xgb3": xgb.XGBRegressor(**xgb_params3),


    "lgbm1": lgb.LGBMRegressor(**lgbm_params),

    "cat1": cb.CatBoostRegressor(),
    "cat2": cb.CatBoostRegressor(**cb_params),
    "cat_best_params": cb.CatBoostRegressor(**best_cb_params),

    "xgb1": xgb.XGBRegressor(),
    "lgbm0": lgb.LGBMRegressor(),
    "lgbm3": lgb.LGBMRegressor(lgbm_params3),
    "lgbm2": lgb.LGBMRegressor(
        learning_rate=0.05,
        max_depth=15,
        num_leaves=11,
        feature_fraction=0.3,
        subsample=0.1,
        n_jobs=-1,
    ),
    "lgbm3": lgb.LGBMRegressor(**lgbm_params),
    "lgbm_best_params": lgb.LGBMRegressor(**best_lgbm_params),


    "lin_reg": linear_model.LinearRegression(),
    "lasso": linear_model.Lasso(),
    "ridge": linear_model.Ridge(max_iter=7000),
    "ridge_25": linear_model.Ridge(fit_intercept=True, solver='auto', alpha=0.25, max_iter=7000),
    "ridge_50": linear_model.Ridge(fit_intercept=True, solver='auto', alpha=0.5, max_iter=7000),
}

In [34]:
all_cv_scores = pd.DataFrame(
    {
        "Model": pd.Series(dtype="str"),
        "Score": pd.Series(dtype="float"),
        "StdDev": pd.Series(dtype="float"),
        "RunTime": pd.Series(dtype="float"),
        "n_estimators": pd.Series(dtype="int"),
        "n_folds": pd.Series(dtype="int"),
        "comments": pd.Series(dtype="str"),
    }
)



## Tree Models

In [35]:
%%time

# model_lst = ["xgb3","xgb_best_params", "lgbm_best_params", "cat_best_params", "xgb1", "xgb2", "lgbm1", "lgbm2", "cat1", "cat2"]
# model_lst = = []

def run_tree_models(X_tr, test, n_folds, model_lst, all_cv_scores):
        all_cv_scores = run_models4features(X_tr, test, model_estimator_dict, model_lst, TARGET, FEATURES, all_cv_scores, linear_models=False)    

        all_cv_scores.sort_values(by=["Score"], ascending=False)
        return all_cv_scores

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 10 µs


## Linear Models

In [36]:
def run_linear_models(X_tr, test, n_folds, model_lst, all_cv_scores):
    for training in [train, train]:

    #     all_cv_scores = run_models4features(train, test, model_estimator_dict, model_lst, TARGET, FEATURES, all_cv_scores, linear_models=False)    
        all_cv_scores = run_models4features(X_tr, test, model_estimator_dict, model_lst, TARGET, FEATURES, all_cv_scores, linear_models=True)    
        return all_cv_scores  

In [37]:
%%time


tree_model_lst = ["xgb_best_params", "lgbm_best_params", "cat_best_params","xgb3", "xgb1", "xgb2", "lgbm0", "lgbm1", "lgbm2", "lgbm3", "cat1", "cat2"]
linear_model_lst = ["lin_reg", "lasso", "ridge", "ridge_25", "ridge_50"]
linear_model_lst = ["lasso", "ridge",  "ridge_50"]

Config.N_FOLDS = 10

for training in [train, train]:
    X_tr = create_folds(train, Config.N_FOLDS)
    
    oof = train[[ID, TARGET, "fold"]].copy().reset_index(drop=True).copy()
    oof.set_index(ID, inplace=True)
    oof.head()

    all_cv_scores = run_tree_models(X_tr, test, Config.N_FOLDS, tree_model_lst, all_cv_scores)
    all_cv_scores = run_linear_models(X_tr, test, Config.N_FOLDS, linear_model_lst, all_cv_scores)
    Config.N_FOLDS = 5



n_folds=10, seed=42
Model=xgb_best_params
{}
fold: 1, Score: 9.545842549193722, Run Time: 15.95
fold: 2, Score: 8.618881004315726, Run Time: 17.02
fold: 3, Score: 8.890871874116222, Run Time: 15.85
fold: 4, Score: 8.67049984056757, Run Time: 16.51
fold: 5, Score: 8.584261000171212, Run Time: 15.67
fold: 6, Score: 9.1563313566935, Run Time: 16.71
fold: 7, Score: 9.220801551016221, Run Time: 15.88
fold: 8, Score: 8.970055288994109, Run Time: 16.89
fold: 9, Score: 9.131186889274678, Run Time: 15.73
fold: 10, Score: 8.803590667466539, Run Time: 16.55
Scores -> Adjusted: 8.66772406 , mean: 8.95923220, std: 0.29150815

=== Model Feature Importance ===
AgeInDays 0.3811004
is_original 0.10202914
CementComponent 0.089053184
SuperplasticizerComponent 0.08611963
WaterComponent 0.078768104
FlyAshComponent 0.06899821
BlastFurnaceSlag 0.06778828
FineAggregateComponent 0.063094676
CoarseAggregateComponent 0.06304841


Unnamed: 0_level_0,pred_xgb_best_params
id,Unnamed: 1_level_1
0.0,25.76403
1.0,33.38162
2.0,45.21269
3.0,40.71827
4.0,46.41655


Mode
=== Target Value Counts ===
Model Run Time: 162.99
Model=lgbm_best_params
{}
fold: 1, Score: 9.30726273914041, Run Time: 15.12
fold: 2, Score: 8.54928485908331, Run Time: 14.13
fold: 3, Score: 8.64773056250605, Run Time: 13.23
fold: 4, Score: 8.575387911484238, Run Time: 13.02
fold: 5, Score: 8.544413553583889, Run Time: 14.00
fold: 6, Score: 9.01213170328958, Run Time: 13.38
fold: 7, Score: 9.15573692825669, Run Time: 14.72
fold: 8, Score: 8.779073877978066, Run Time: 13.94
fold: 9, Score: 8.85091361764811, Run Time: 15.16
fold: 10, Score: 8.759606365596957, Run Time: 14.76
Scores -> Adjusted: 8.56674833 , mean: 8.81815421, std: 0.25140588

=== Model Feature Importance ===
FineAggregateComponent 0.19838056680161945
CementComponent 0.18177952785018395
CoarseAggregateComponent 0.17847041206810493
WaterComponent 0.13339988538258185
SuperplasticizerComponent 0.08962342632133548
BlastFurnaceSlag 0.0825430277485072
AgeInDays 0.07934483204850905
FlyAshComponent 0.04444197955373154
is_or

Unnamed: 0_level_0,pred_lgbm_best_params
id,Unnamed: 1_level_1
0.0,21.38728
1.0,34.04771
2.0,42.93428
3.0,42.24447
4.0,46.51778


Mode
=== Target Value Counts ===
Model Run Time: 141.62
Model=cat_best_params
{}
fold: 1, Score: 8.884871316728866, Run Time: 2.29
fold: 2, Score: 8.027557466397464, Run Time: 2.32
fold: 3, Score: 8.407129533125818, Run Time: 2.28
fold: 4, Score: 8.307315610533102, Run Time: 2.26
fold: 5, Score: 8.142282033359145, Run Time: 2.62
fold: 6, Score: 8.84025909442841, Run Time: 2.30
fold: 7, Score: 8.92142970770193, Run Time: 2.33
fold: 8, Score: 8.398775992886327, Run Time: 2.29
fold: 9, Score: 8.631436517147183, Run Time: 2.37
fold: 10, Score: 8.577334700032038, Run Time: 2.35
Scores -> Adjusted: 8.21888009 , mean: 8.51383920, std: 0.29495911

=== Model Feature Importance ===
AgeInDays 0.5137672605650883
CementComponent 0.1581089376021641
WaterComponent 0.0762473359255142
BlastFurnaceSlag 0.05353904848726209
is_original 0.05241070250325555
SuperplasticizerComponent 0.04755038162706786
CoarseAggregateComponent 0.03527891781314682
FineAggregateComponent 0.0327323890450361
FlyAshComponent 0.0

Unnamed: 0_level_0,pred_cat_best_params
id,Unnamed: 1_level_1
0.0,20.96868
1.0,35.2778
2.0,38.05452
3.0,42.92285
4.0,46.25621


Mode
=== Target Value Counts ===
Model Run Time: 23.56
Model=xgb3
{}
fold: 1, Score: 9.115072100858512, Run Time: 1.62
fold: 2, Score: 8.281834796319096, Run Time: 1.60
fold: 3, Score: 8.623970200319468, Run Time: 1.61
fold: 4, Score: 8.570510627320093, Run Time: 1.60
fold: 5, Score: 8.29798080177781, Run Time: 1.61
fold: 6, Score: 8.957940034807098, Run Time: 1.61
fold: 7, Score: 9.060638760157994, Run Time: 1.61
fold: 8, Score: 8.466117990858633, Run Time: 1.61
fold: 9, Score: 8.916727939030245, Run Time: 1.62
fold: 10, Score: 8.514586782025328, Run Time: 1.61
Scores -> Adjusted: 8.38725146 , mean: 8.68053800, std: 0.29328655

=== Model Feature Importance ===
AgeInDays 0.5902954
SuperplasticizerComponent 0.09135971
WaterComponent 0.05379744
CementComponent 0.049766593
FineAggregateComponent 0.04716308
FlyAshComponent 0.044641245
CoarseAggregateComponent 0.043902498
is_original 0.039931543
BlastFurnaceSlag 0.039142568


Unnamed: 0_level_0,pred_xgb3
id,Unnamed: 1_level_1
0.0,25.28448
1.0,34.14724
2.0,39.66658
3.0,42.89977
4.0,46.05143


Mode
=== Target Value Counts ===
Model Run Time: 16.29
Model=xgb1
{}
fold: 1, Score: 9.236125352930578, Run Time: 1.00
fold: 2, Score: 8.703685671548296, Run Time: 1.06
fold: 3, Score: 8.790395913894132, Run Time: 1.02
fold: 4, Score: 8.73717150128406, Run Time: 1.00
fold: 5, Score: 8.644312141637625, Run Time: 1.00
fold: 6, Score: 8.939861057056403, Run Time: 1.00
fold: 7, Score: 9.326767193338146, Run Time: 1.00
fold: 8, Score: 8.745134027223008, Run Time: 1.04
fold: 9, Score: 8.898621553507043, Run Time: 1.02
fold: 10, Score: 8.725353428468363, Run Time: 1.00
Scores -> Adjusted: 8.65400568 , mean: 8.87474278, std: 0.22073710

=== Model Feature Importance ===
AgeInDays 0.54389334
SuperplasticizerComponent 0.10120986
CementComponent 0.06652754
WaterComponent 0.06457985
is_original 0.048552986
FineAggregateComponent 0.04808326
FlyAshComponent 0.044471428
CoarseAggregateComponent 0.042678196
BlastFurnaceSlag 0.04000359


Unnamed: 0_level_0,pred_xgb1
id,Unnamed: 1_level_1
0.0,24.19326
1.0,32.04249
2.0,40.09577
3.0,42.7277
4.0,46.48205


Mode
=== Target Value Counts ===
Model Run Time: 10.30
Model=xgb2
{}
fold: 1, Score: 9.082725963888702, Run Time: 1.62
fold: 2, Score: 8.320198714925636, Run Time: 1.59
fold: 3, Score: 8.621016952265864, Run Time: 1.62
fold: 4, Score: 8.513195698305687, Run Time: 1.62
fold: 5, Score: 8.278828196821744, Run Time: 1.68
fold: 6, Score: 9.022919389594415, Run Time: 1.59
fold: 7, Score: 9.120848483743133, Run Time: 1.62
fold: 8, Score: 8.474450437121591, Run Time: 1.62
fold: 9, Score: 8.828915088113693, Run Time: 1.61
fold: 10, Score: 8.472513008325318, Run Time: 1.61
Scores -> Adjusted: 8.37307986 , mean: 8.67356119, std: 0.30048134

=== Model Feature Importance ===
AgeInDays 0.6973176
SuperplasticizerComponent 0.065859735
WaterComponent 0.041117102
CementComponent 0.03641771
is_original 0.035176545
CoarseAggregateComponent 0.03202776
BlastFurnaceSlag 0.031194871
FineAggregateComponent 0.030968588
FlyAshComponent 0.029920096


Unnamed: 0_level_0,pred_xgb2
id,Unnamed: 1_level_1
0.0,24.12436
1.0,33.35629
2.0,38.3877
3.0,42.50555
4.0,45.39285


Mode
=== Target Value Counts ===
Model Run Time: 16.36
Model=lgbm0
{}
fold: 1, Score: 9.055783731666184, Run Time: 0.63
fold: 2, Score: 8.12570301676443, Run Time: 0.63
fold: 3, Score: 8.41910621068709, Run Time: 0.63
fold: 4, Score: 8.280065603579596, Run Time: 0.63
fold: 5, Score: 8.376722433105561, Run Time: 0.70
fold: 6, Score: 8.875543922011909, Run Time: 0.62
fold: 7, Score: 8.981318038612422, Run Time: 0.61
fold: 8, Score: 8.606421036424909, Run Time: 0.64
fold: 9, Score: 8.68732967779213, Run Time: 0.67
fold: 10, Score: 8.516035214633478, Run Time: 1.45
Scores -> Adjusted: 8.30032440 , mean: 8.59240289, std: 0.29207849

=== Model Feature Importance ===
FineAggregateComponent 0.14966666666666667
CementComponent 0.14733333333333334
CoarseAggregateComponent 0.14466666666666667
WaterComponent 0.14266666666666666
BlastFurnaceSlag 0.10333333333333333
AgeInDays 0.10266666666666667
SuperplasticizerComponent 0.101
FlyAshComponent 0.056
is_original 0.05266666666666667


Unnamed: 0_level_0,pred_lgbm0
id,Unnamed: 1_level_1
0.0,22.92241
1.0,34.31604
2.0,43.05568
3.0,43.02717
4.0,46.00049


Mode
=== Target Value Counts ===
Model Run Time: 7.37
Model=lgbm1
{}
fold: 1, Score: 9.149096182972482, Run Time: 0.59
fold: 2, Score: 8.386915375190465, Run Time: 0.63
fold: 3, Score: 8.7326271354246, Run Time: 0.62
fold: 4, Score: 8.523533015867649, Run Time: 0.62
fold: 5, Score: 8.581593311077551, Run Time: 0.65
fold: 6, Score: 9.060754399783715, Run Time: 0.69
fold: 7, Score: 9.187189331925973, Run Time: 0.65
fold: 8, Score: 8.808269834090497, Run Time: 0.65
fold: 9, Score: 8.904172034143704, Run Time: 0.70
fold: 10, Score: 8.832885886383586, Run Time: 0.72
Scores -> Adjusted: 8.56208597 , mean: 8.81670365, std: 0.25461768

=== Model Feature Importance ===
CoarseAggregateComponent 0.19626615605552897
CementComponent 0.16419339396840593
WaterComponent 0.14648157012924845
FineAggregateComponent 0.14073719483006222
SuperplasticizerComponent 0.11680229775011967
BlastFurnaceSlag 0.09478219243657253
AgeInDays 0.0852082336045955
FlyAshComponent 0.04786979415988511
is_original 0.0076591670

Unnamed: 0_level_0,pred_lgbm1
id,Unnamed: 1_level_1
0.0,23.24892
1.0,32.43066
2.0,39.92815
3.0,43.543
4.0,45.65339


Mode
=== Target Value Counts ===
Model Run Time: 6.67
Model=lgbm2
{}
fold: 1, Score: 9.16817995985692, Run Time: 0.29
fold: 2, Score: 8.40666759997171, Run Time: 0.30
fold: 3, Score: 8.701669366084758, Run Time: 0.30
fold: 4, Score: 8.580113637331726, Run Time: 0.30
fold: 5, Score: 8.586103436072968, Run Time: 0.31
fold: 6, Score: 9.350413237916252, Run Time: 0.31
fold: 7, Score: 9.270965198898608, Run Time: 0.31
fold: 8, Score: 8.749308044524904, Run Time: 0.31
fold: 9, Score: 8.958358604702708, Run Time: 0.31
fold: 10, Score: 9.00700636896053, Run Time: 0.34
Scores -> Adjusted: 8.57314119 , mean: 8.87787855, std: 0.30473736

=== Model Feature Importance ===
AgeInDays 0.176
WaterComponent 0.15
CementComponent 0.149
CoarseAggregateComponent 0.112
FineAggregateComponent 0.102
SuperplasticizerComponent 0.102
FlyAshComponent 0.089
BlastFurnaceSlag 0.076
is_original 0.044


Unnamed: 0_level_0,pred_lgbm2
id,Unnamed: 1_level_1
0.0,22.64701
1.0,33.45124
2.0,35.71802
3.0,44.7279
4.0,42.36713


Mode
=== Target Value Counts ===
Model Run Time: 3.23
Model=lgbm3
{}
fold: 1, Score: 9.149096182972482, Run Time: 0.61
fold: 2, Score: 8.386915375190465, Run Time: 0.66
fold: 3, Score: 8.7326271354246, Run Time: 0.64
fold: 4, Score: 8.523533015867649, Run Time: 0.66
fold: 5, Score: 8.581593311077551, Run Time: 0.71
fold: 6, Score: 9.060754399783715, Run Time: 0.65
fold: 7, Score: 9.187189331925973, Run Time: 0.67
fold: 8, Score: 8.808269834090497, Run Time: 0.67
fold: 9, Score: 8.904172034143704, Run Time: 0.71
fold: 10, Score: 8.832885886383586, Run Time: 0.64
Scores -> Adjusted: 8.56208597 , mean: 8.81670365, std: 0.25461768

=== Model Feature Importance ===
CoarseAggregateComponent 0.19626615605552897
CementComponent 0.16419339396840593
WaterComponent 0.14648157012924845
FineAggregateComponent 0.14073719483006222
SuperplasticizerComponent 0.11680229775011967
BlastFurnaceSlag 0.09478219243657253
AgeInDays 0.0852082336045955
FlyAshComponent 0.04786979415988511
is_original 0.0076591670

Unnamed: 0_level_0,pred_lgbm3
id,Unnamed: 1_level_1
0.0,23.24892
1.0,32.43066
2.0,39.92815
3.0,43.543
4.0,45.65339


Mode
=== Target Value Counts ===
Model Run Time: 6.79
Model=cat1
{}
fold: 1, Score: 8.842385316986515, Run Time: 2.31
fold: 2, Score: 7.9850569512126235, Run Time: 2.28
fold: 3, Score: 8.455137852119595, Run Time: 2.29
fold: 4, Score: 8.22780940403716, Run Time: 2.30
fold: 5, Score: 8.210470434922678, Run Time: 2.22
fold: 6, Score: 8.749027982444677, Run Time: 2.45
fold: 7, Score: 8.839097959786251, Run Time: 2.43
fold: 8, Score: 8.379434844390742, Run Time: 2.33
fold: 9, Score: 8.639260675573494, Run Time: 2.34
fold: 10, Score: 8.521292199445387, Run Time: 2.39
Scores -> Adjusted: 8.21077584 , mean: 8.48489736, std: 0.27412152

=== Model Feature Importance ===
AgeInDays 0.4659912684569474
CementComponent 0.1450925884244876
WaterComponent 0.08228424883304461
SuperplasticizerComponent 0.06569281289389269
FineAggregateComponent 0.0556882130509553
is_original 0.055160898704424415
BlastFurnaceSlag 0.05356532705306306
CoarseAggregateComponent 0.047055417013662205
FlyAshComponent 0.029469225

Unnamed: 0_level_0,pred_cat1
id,Unnamed: 1_level_1
0.0,20.50967
1.0,34.97334
2.0,38.34672
3.0,44.36916
4.0,44.46599


Mode
=== Target Value Counts ===
Model Run Time: 23.52
Model=cat2
{}
fold: 1, Score: 8.882004718483344, Run Time: 1.23
fold: 2, Score: 7.977759088419156, Run Time: 1.23
fold: 3, Score: 8.416674977791695, Run Time: 1.22
fold: 4, Score: 8.272077847777137, Run Time: 1.27
fold: 5, Score: 8.278552727957184, Run Time: 1.22
fold: 6, Score: 9.081202663446089, Run Time: 1.23
fold: 7, Score: 8.967409469424815, Run Time: 1.25
fold: 8, Score: 8.536536695994418, Run Time: 1.22
fold: 9, Score: 8.815959292973847, Run Time: 1.26
fold: 10, Score: 8.578699546054292, Run Time: 1.20
Scores -> Adjusted: 8.24526028 , mean: 8.58068770, std: 0.33542742

=== Model Feature Importance ===
AgeInDays 0.5032084746157022
CementComponent 0.12009707901092799
WaterComponent 0.07202595607906485
is_original 0.06236545944307235
SuperplasticizerComponent 0.0623574286520829
BlastFurnaceSlag 0.049131756153854254
FineAggregateComponent 0.04804344794046502
CoarseAggregateComponent 0.047013309389005185
FlyAshComponent 0.0357570

Unnamed: 0_level_0,pred_cat2
id,Unnamed: 1_level_1
0.0,21.2265
1.0,35.53554
2.0,38.21673
3.0,43.40681
4.0,43.63925


Mode
=== Target Value Counts ===
Model Run Time: 12.49
Model=lasso
fold: 1, Score: 11.204515037347154, Run Time: 0.04
fold: 2, Score: 11.217786983001021, Run Time: 0.07
fold: 3, Score: 11.100359495215368, Run Time: 0.07
fold: 4, Score: 11.10732069883224, Run Time: 0.08
fold: 5, Score: 11.69170119923506, Run Time: 0.08
fold: 6, Score: 11.947710850062732, Run Time: 0.08
fold: 7, Score: 11.782637387492358, Run Time: 0.08
fold: 8, Score: 11.531262211198314, Run Time: 0.09
fold: 9, Score: 11.508755075128008, Run Time: 0.08
fold: 10, Score: 11.162851684415475, Run Time: 0.08
Scores -> Adjusted: 11.13271899 , mean: 11.42549006, std: 0.29277107


Unnamed: 0_level_0,pred_lasso
id,Unnamed: 1_level_1
0.0,33.93727
1.0,33.63498
2.0,32.0581
3.0,54.75081
4.0,34.67705


Mode
=== Target Value Counts ===
Model Run Time: 0.99
Model=ridge
fold: 1, Score: 10.997892105033113, Run Time: 0.08
fold: 2, Score: 10.869717132593685, Run Time: 0.07
fold: 3, Score: 10.827627026803574, Run Time: 0.07
fold: 4, Score: 10.940017480888818, Run Time: 0.07
fold: 5, Score: 11.45307944778129, Run Time: 0.08
fold: 6, Score: 11.582842537369332, Run Time: 0.07
fold: 7, Score: 11.465743733770681, Run Time: 0.07
fold: 8, Score: 11.182484659869058, Run Time: 0.08
fold: 9, Score: 11.257265503201364, Run Time: 0.08
fold: 10, Score: 10.85309697029356, Run Time: 0.08
Scores -> Adjusted: 10.87300713 , mean: 11.14297666, std: 0.26996953


Unnamed: 0_level_0,pred_ridge
id,Unnamed: 1_level_1
0.0,35.24243
1.0,34.90703
2.0,32.1831
3.0,58.32234
4.0,36.45027


Mode
=== Target Value Counts ===
Model Run Time: 1.00
Model=ridge_50
fold: 1, Score: 10.997908414405188, Run Time: 0.04
fold: 2, Score: 10.869648312563903, Run Time: 0.07
fold: 3, Score: 10.82760886302, Run Time: 0.07
fold: 4, Score: 10.939997935200502, Run Time: 0.07
fold: 5, Score: 11.453055147701754, Run Time: 0.07
fold: 6, Score: 11.58279325583396, Run Time: 0.08
fold: 7, Score: 11.465683747008704, Run Time: 0.07
fold: 8, Score: 11.182428493438755, Run Time: 0.07
fold: 9, Score: 11.257222975245227, Run Time: 0.08
fold: 10, Score: 10.853010573137023, Run Time: 0.07
Scores -> Adjusted: 10.87296790 , mean: 11.14293577, std: 0.26996788


Unnamed: 0_level_0,pred_ridge_50
id,Unnamed: 1_level_1
0.0,35.24257
1.0,34.90685
2.0,32.18391
3.0,58.32428
4.0,36.45071


Mode
=== Target Value Counts ===
Model Run Time: 0.94
n_folds=5, seed=42
Model=xgb_best_params
{}
fold: 1, Score: 9.228931914438371, Run Time: 16.71
fold: 2, Score: 8.809786246933552, Run Time: 15.92
fold: 3, Score: 9.023708423490014, Run Time: 16.69
fold: 4, Score: 9.123578293740334, Run Time: 15.75
fold: 5, Score: 9.102317817007373, Run Time: 16.72
Scores -> Adjusted: 8.91747344 , mean: 9.05766454, std: 0.14019110

=== Model Feature Importance ===
AgeInDays 0.3738207
is_original 0.09788507
SuperplasticizerComponent 0.089335606
CementComponent 0.088907294
WaterComponent 0.08027625
BlastFurnaceSlag 0.06939101
FlyAshComponent 0.06859837
CoarseAggregateComponent 0.06627423
FineAggregateComponent 0.0655115


Unnamed: 0_level_0,pred_xgb_best_params
id,Unnamed: 1_level_1
0.0,24.45964
1.0,31.55924
2.0,44.18363
3.0,40.35679
4.0,46.59731


Mode
=== Target Value Counts ===
Model Run Time: 82.00
Model=lgbm_best_params
{}
fold: 1, Score: 8.941001307946268, Run Time: 12.24
fold: 2, Score: 8.659173193268549, Run Time: 12.94
fold: 3, Score: 8.896375887202693, Run Time: 12.18
fold: 4, Score: 8.972409828605471, Run Time: 12.31
fold: 5, Score: 8.8642080695157, Run Time: 13.31
Scores -> Adjusted: 8.75649786 , mean: 8.86663366, std: 0.11013580

=== Model Feature Importance ===
FineAggregateComponent 0.19938887842447836
CementComponent 0.18287603859274607
CoarseAggregateComponent 0.18124359054854441
WaterComponent 0.13101441995772378
SuperplasticizerComponent 0.08838241141876478
BlastFurnaceSlag 0.08189447688411712
AgeInDays 0.07975973713400725
FlyAshComponent 0.042108788011971286
is_original 0.013331659027646972


Unnamed: 0_level_0,pred_lgbm_best_params
id,Unnamed: 1_level_1
0.0,22.94459
1.0,32.42459
2.0,43.54321
3.0,41.86699
4.0,46.80415


Mode
=== Target Value Counts ===
Model Run Time: 63.14
Model=cat_best_params
{}
fold: 1, Score: 8.455788080742218, Run Time: 2.25
fold: 2, Score: 8.413158231540246, Run Time: 2.22
fold: 3, Score: 8.644794895304575, Run Time: 2.27
fold: 4, Score: 8.656051558962266, Run Time: 2.19
fold: 5, Score: 8.650992841565207, Run Time: 2.20
Scores -> Adjusted: 8.45735638 , mean: 8.56415712, std: 0.10680074

=== Model Feature Importance ===
AgeInDays 0.5348865031530253
CementComponent 0.14566389456496975
WaterComponent 0.06556770596761975
SuperplasticizerComponent 0.051754224362490504
is_original 0.05000950995679352
BlastFurnaceSlag 0.0491031246019896
FineAggregateComponent 0.0405575505026373
CoarseAggregateComponent 0.03426097400048003
FlyAshComponent 0.028196512889994335


Unnamed: 0_level_0,pred_cat_best_params
id,Unnamed: 1_level_1
0.0,21.42759
1.0,34.03799
2.0,37.85206
3.0,43.24887
4.0,45.86338


Mode
=== Target Value Counts ===
Model Run Time: 11.29
Model=xgb3
{}
fold: 1, Score: 8.837577945667764, Run Time: 1.53
fold: 2, Score: 8.636069847234287, Run Time: 1.52
fold: 3, Score: 8.73279698480815, Run Time: 1.52
fold: 4, Score: 8.846140630258176, Run Time: 1.52
fold: 5, Score: 8.854638926321533, Run Time: 1.61
Scores -> Adjusted: 8.69636332 , mean: 8.78144487, std: 0.08508155

=== Model Feature Importance ===
AgeInDays 0.61559093
SuperplasticizerComponent 0.07770578
WaterComponent 0.055866014
CementComponent 0.052290294
FineAggregateComponent 0.046200264
CoarseAggregateComponent 0.042677015
FlyAshComponent 0.041050218
is_original 0.035590023
BlastFurnaceSlag 0.03302941


Unnamed: 0_level_0,pred_xgb3
id,Unnamed: 1_level_1
0.0,25.13547
1.0,34.39598
2.0,40.5027
3.0,41.39053
4.0,45.74621


Mode
=== Target Value Counts ===
Model Run Time: 7.89
Model=xgb1
{}
fold: 1, Score: 8.992121124506367, Run Time: 0.99
fold: 2, Score: 8.733414161464442, Run Time: 0.98
fold: 3, Score: 8.956214543374454, Run Time: 0.99
fold: 4, Score: 9.118459456383764, Run Time: 0.98
fold: 5, Score: 9.041821897368887, Run Time: 0.99
Scores -> Adjusted: 8.83891636 , mean: 8.96840624, std: 0.12948987

=== Model Feature Importance ===
AgeInDays 0.5348705
SuperplasticizerComponent 0.09560716
WaterComponent 0.0679491
CementComponent 0.06512399
FineAggregateComponent 0.051458344
is_original 0.04879414
FlyAshComponent 0.045836072
BlastFurnaceSlag 0.045422416
CoarseAggregateComponent 0.04493825


Unnamed: 0_level_0,pred_xgb1
id,Unnamed: 1_level_1
0.0,22.93364
1.0,32.67008
2.0,41.96727
3.0,40.7112
4.0,46.85235


Mode
=== Target Value Counts ===
Model Run Time: 5.11
Model=xgb2
{}
fold: 1, Score: 8.790288359511713, Run Time: 1.62
fold: 2, Score: 8.631925358031847, Run Time: 1.65
fold: 3, Score: 8.720934793521197, Run Time: 1.65
fold: 4, Score: 8.813794754475888, Run Time: 1.64
fold: 5, Score: 8.801228067506257, Run Time: 1.66
Scores -> Adjusted: 8.68367797 , mean: 8.75163427, std: 0.06795630

=== Model Feature Importance ===
AgeInDays 0.6741591
SuperplasticizerComponent 0.06544539
WaterComponent 0.044892494
CementComponent 0.039817963
CoarseAggregateComponent 0.038065217
is_original 0.036948487
FineAggregateComponent 0.034848586
FlyAshComponent 0.033209722
BlastFurnaceSlag 0.032613065


Unnamed: 0_level_0,pred_xgb2
id,Unnamed: 1_level_1
0.0,24.95548
1.0,32.44896
2.0,40.59565
3.0,42.27273
4.0,45.51218


Mode
=== Target Value Counts ===
Model Run Time: 8.41
Model=lgbm0
{}
fold: 1, Score: 8.6469613848716, Run Time: 0.77
fold: 2, Score: 8.458575302905318, Run Time: 0.78
fold: 3, Score: 8.62103563334192, Run Time: 0.79
fold: 4, Score: 8.81514767626793, Run Time: 0.72
fold: 5, Score: 8.665335619615586, Run Time: 0.67
Scores -> Adjusted: 8.52771708 , mean: 8.64141112, std: 0.11369404

=== Model Feature Importance ===
FineAggregateComponent 0.15566666666666668
CementComponent 0.151
WaterComponent 0.14833333333333334
CoarseAggregateComponent 0.133
AgeInDays 0.103
BlastFurnaceSlag 0.10166666666666667
SuperplasticizerComponent 0.09533333333333334
FlyAshComponent 0.06633333333333333
is_original 0.04566666666666667


Unnamed: 0_level_0,pred_lgbm0
id,Unnamed: 1_level_1
0.0,23.26141
1.0,33.89003
2.0,40.82895
3.0,42.66877
4.0,47.15585


Mode
=== Target Value Counts ===
Model Run Time: 3.88
Model=lgbm1
{}
fold: 1, Score: 8.781139640071684, Run Time: 0.74
fold: 2, Score: 8.709040659877498, Run Time: 0.62
fold: 3, Score: 8.80640952313024, Run Time: 0.74
fold: 4, Score: 8.96679561544107, Run Time: 0.63
fold: 5, Score: 8.987212454352488, Run Time: 0.77
Scores -> Adjusted: 8.74151036 , mean: 8.85011958, std: 0.10860922

=== Model Feature Importance ===
CoarseAggregateComponent 0.23054040654437283
FineAggregateComponent 0.15270203272186417
CementComponent 0.14774417451660882
WaterComponent 0.14377788795240456
SuperplasticizerComponent 0.09519087754090233
BlastFurnaceSlag 0.0842835894893406
AgeInDays 0.08031730292513634
FlyAshComponent 0.05899851264253842
is_original 0.006445215666831929


Unnamed: 0_level_0,pred_lgbm1
id,Unnamed: 1_level_1
0.0,23.09081
1.0,30.3692
2.0,43.27919
3.0,42.85879
4.0,46.98142


Mode
=== Target Value Counts ===
Model Run Time: 3.66
Model=lgbm2
{}
fold: 1, Score: 8.809860521234734, Run Time: 0.31
fold: 2, Score: 8.646333881713847, Run Time: 0.31
fold: 3, Score: 9.008811788025513, Run Time: 0.31
fold: 4, Score: 9.015505359172197, Run Time: 0.33
fold: 5, Score: 8.983053770048388, Run Time: 0.32
Scores -> Adjusted: 8.74829316 , mean: 8.89271306, std: 0.14441991

=== Model Feature Importance ===
AgeInDays 0.179
WaterComponent 0.148
CementComponent 0.141
CoarseAggregateComponent 0.121
FineAggregateComponent 0.108
SuperplasticizerComponent 0.105
FlyAshComponent 0.084
BlastFurnaceSlag 0.071
is_original 0.043


Unnamed: 0_level_0,pred_lgbm2
id,Unnamed: 1_level_1
0.0,22.90256
1.0,33.08388
2.0,36.38186
3.0,44.98889
4.0,42.88206


Mode
=== Target Value Counts ===
Model Run Time: 1.74
Model=lgbm3
{}
fold: 1, Score: 8.781139640071684, Run Time: 0.68
fold: 2, Score: 8.709040659877498, Run Time: 0.70
fold: 3, Score: 8.80640952313024, Run Time: 0.80
fold: 4, Score: 8.96679561544107, Run Time: 0.79
fold: 5, Score: 8.987212454352488, Run Time: 0.97
Scores -> Adjusted: 8.74151036 , mean: 8.85011958, std: 0.10860922

=== Model Feature Importance ===
CoarseAggregateComponent 0.23054040654437283
FineAggregateComponent 0.15270203272186417
CementComponent 0.14774417451660882
WaterComponent 0.14377788795240456
SuperplasticizerComponent 0.09519087754090233
BlastFurnaceSlag 0.0842835894893406
AgeInDays 0.08031730292513634
FlyAshComponent 0.05899851264253842
is_original 0.006445215666831929


Unnamed: 0_level_0,pred_lgbm3
id,Unnamed: 1_level_1
0.0,23.09081
1.0,30.3692
2.0,43.27919
3.0,42.85879
4.0,46.98142


Mode
=== Target Value Counts ===
Model Run Time: 4.10
Model=cat1
{}
fold: 1, Score: 8.431749298401597, Run Time: 2.33
fold: 2, Score: 8.388387899200259, Run Time: 2.50
fold: 3, Score: 8.61088159648203, Run Time: 2.26
fold: 4, Score: 8.647884863106468, Run Time: 2.23
fold: 5, Score: 8.616519213641785, Run Time: 2.23
Scores -> Adjusted: 8.43210907 , mean: 8.53908457, std: 0.10697550

=== Model Feature Importance ===
AgeInDays 0.5011350696894802
CementComponent 0.14023811194223915
WaterComponent 0.07174849751558048
SuperplasticizerComponent 0.06377808703707621
is_original 0.0548454901720651
BlastFurnaceSlag 0.05274810508637863
FineAggregateComponent 0.048259980049346736
CoarseAggregateComponent 0.04016481254801164
FlyAshComponent 0.027081845959821868


Unnamed: 0_level_0,pred_cat1
id,Unnamed: 1_level_1
0.0,21.56087
1.0,34.4374
2.0,38.23524
3.0,44.26784
4.0,45.42224


Mode
=== Target Value Counts ===
Model Run Time: 11.71
Model=cat2
{}
fold: 1, Score: 8.486452076257459, Run Time: 1.26
fold: 2, Score: 8.398097025948712, Run Time: 1.25
fold: 3, Score: 8.729117006170345, Run Time: 1.26
fold: 4, Score: 8.736941053990376, Run Time: 1.26
fold: 5, Score: 8.75059953339561, Run Time: 1.31
Scores -> Adjusted: 8.47211055 , mean: 8.62024134, std: 0.14813079

=== Model Feature Importance ===
AgeInDays 0.4920662095046869
CementComponent 0.1162907484817497
WaterComponent 0.07683575070505587
is_original 0.06263189700783435
SuperplasticizerComponent 0.05715114341889712
CoarseAggregateComponent 0.05447293703831537
FineAggregateComponent 0.053917382732094135
BlastFurnaceSlag 0.04940941702954806
FlyAshComponent 0.037224514081818566


Unnamed: 0_level_0,pred_cat2
id,Unnamed: 1_level_1
0.0,21.78553
1.0,36.22236
2.0,39.11054
3.0,44.02174
4.0,44.64331


Mode
=== Target Value Counts ===
Model Run Time: 6.49
Model=lasso
fold: 1, Score: 11.195269849347316, Run Time: 0.05
fold: 2, Score: 11.095622470716382, Run Time: 0.07
fold: 3, Score: 11.825413657275954, Run Time: 0.09
fold: 4, Score: 11.672098094494755, Run Time: 0.09
fold: 5, Score: 11.33590531548821, Run Time: 0.10
Scores -> Adjusted: 11.14536525 , mean: 11.42486188, std: 0.27949663


Unnamed: 0_level_0,pred_lasso
id,Unnamed: 1_level_1
0.0,33.79405
1.0,33.69611
2.0,31.94575
3.0,54.65122
4.0,34.76774


Mode
=== Target Value Counts ===
Model Run Time: 0.64
Model=ridge
fold: 1, Score: 10.924089987588308, Run Time: 0.04
fold: 2, Score: 10.895494126528128, Run Time: 0.08
fold: 3, Score: 11.526104063096483, Run Time: 0.09
fold: 4, Score: 11.3306436121218, Run Time: 0.08
fold: 5, Score: 11.047532527047172, Run Time: 0.10
Scores -> Adjusted: 10.89971862 , mean: 11.14477286, std: 0.24505425


Unnamed: 0_level_0,pred_ridge
id,Unnamed: 1_level_1
0.0,35.17172
1.0,35.082
2.0,31.94759
3.0,58.22468
4.0,36.62562


Mode
=== Target Value Counts ===
Model Run Time: 0.63
Model=ridge_50
fold: 1, Score: 10.924059453271678, Run Time: 0.04
fold: 2, Score: 10.895481824132732, Run Time: 0.08
fold: 3, Score: 11.52606366402642, Run Time: 0.10
fold: 4, Score: 11.33057166300521, Run Time: 0.09
fold: 5, Score: 11.047455427963495, Run Time: 0.09
Scores -> Adjusted: 10.89968152 , mean: 11.14472641, std: 0.24504488


Unnamed: 0_level_0,pred_ridge_50
id,Unnamed: 1_level_1
0.0,35.17185
1.0,35.08182
2.0,31.94836
3.0,58.22688
4.0,36.62616


Mode
=== Target Value Counts ===
Model Run Time: 0.63
CPU times: user 26min 30s, sys: 2min 53s, total: 29min 23s
Wall time: 10min 45s


In [38]:
sample_submission.head(20)

Unnamed: 0,id,Strength,target_xgb_best_params,target_lgbm_best_params,target_cat_best_params,target_xgb3,target_xgb1,target_xgb2,target_lgbm0,target_lgbm1,target_lgbm2,target_lgbm3,target_cat1,target_cat2,target_lasso,target_ridge,target_ridge_50
0,5407,35.452,47.11875,48.39299,47.53011,45.34196,42.92241,44.72165,46.22262,47.79622,46.32879,47.79622,47.22347,46.89835,33.58043,34.80228,34.80263
1,5408,35.452,17.36256,17.15562,18.51541,19.14786,15.81407,20.09818,19.50923,18.35496,20.34535,18.35496,19.44139,19.59299,29.08704,26.47911,26.47779
2,5409,35.452,32.31214,31.39071,32.23055,32.01067,30.90379,31.91684,32.49093,31.18286,32.63635,31.18286,32.3648,33.84536,29.10303,25.1279,25.12697
3,5410,35.452,45.16241,45.61252,47.25766,46.08764,45.73134,46.58258,46.34991,46.69429,44.36107,46.69429,46.98858,46.42902,38.28218,37.72009,37.72052
4,5411,35.452,20.74086,21.10538,27.1486,21.67327,21.63098,20.63199,25.66189,20.96297,26.67288,20.96297,28.10152,28.12529,31.12518,30.32645,30.32574
5,5412,35.452,45.25645,44.86199,40.21578,42.37252,44.30632,44.87197,44.74317,45.1892,41.9858,45.1892,39.89685,39.5019,34.09403,34.57516,34.57514
6,5413,35.452,24.52942,29.61152,31.11844,29.7678,26.66411,30.0085,28.56655,27.99033,33.42085,27.99033,30.83093,33.02627,29.21252,26.20105,26.19749
7,5414,35.452,15.742,18.96684,21.76689,18.62709,21.06854,20.43243,21.61859,20.87957,22.61033,20.87957,22.06883,21.25458,30.36277,31.68782,31.68781
8,5415,35.452,47.74851,51.51879,44.22834,48.83333,44.88307,43.13742,48.99787,52.16774,45.27036,52.16774,44.28766,43.50256,36.89103,41.79684,41.79798
9,5416,35.452,32.67274,34.62768,37.88497,31.31363,30.00051,34.27808,36.15427,35.37121,38.09412,35.37121,38.20545,37.06937,32.34939,31.1574,31.1565


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Blend Models</h1>
</div>

In [39]:
all_blend_scores = pd.DataFrame(
    {
        "Model": pd.Series(dtype="str"),
        "Score": pd.Series(dtype="float"),
        "StdDev": pd.Series(dtype="float"),
    }
)

In [40]:
model_lst = ["xgb1", "xgb2", "cat1", "lgbm0", "lgbm1"]

In [41]:
len(model_lst)

5

In [42]:
target_names = [f"target_{model}" for model in model_lst]
target_names

['target_xgb1', 'target_xgb2', 'target_cat1', 'target_lgbm0', 'target_lgbm1']

In [43]:
sample_submission[TARGET] = sample_submission[target_names].sum(axis=1) / len(model_lst)

In [44]:
sample_submission[[ID, TARGET]].to_csv("submission_models_wt_avg.csv", index=False)
sample_submission[[ID, TARGET]].tail(8)

Unnamed: 0,id,Strength
3597,9004,18.2941
3598,9005,39.15304
3599,9006,16.89599
3600,9007,31.29621
3601,9008,32.9624
3602,9009,41.32826
3603,9010,31.09395
3604,9011,19.6966


In [45]:
sample_submission[TARGET] = (
#     (sample_submission["target_xgb_bp"] * 2 )
#     + (sample_submission["target_lgbm_bp"]  )
    (sample_submission["target_xgb1"] * 3 )
    + (sample_submission["target_lgbm1"])
#     + (sample_submission["target_lgbm2"])    
#     + (sample_submission["target_lgbm2"])
    + (sample_submission["target_cat1"] )
    + (sample_submission["target_cat2"] )    
#     + (sample_submission["target_cat_bp"] )
#     + (sample_submission["target_svc"] )
#     + (sample_submission["target_log_reg3"] )
#     + (sample_submission["target_cat2"] )
)/6

# sample_submission[TARGET] = sample_submission[TARGET].astype(int)

In [46]:
sample_submission[[ID, TARGET]].to_csv("submission_wt_avg.csv", index=False)
sample_submission[[ID, TARGET]].tail(8)

Unnamed: 0,id,Strength
3597,9004,18.16883
3598,9005,39.17904
3599,9006,17.40593
3600,9007,30.6512
3601,9008,32.51728
3602,9009,40.80834
3603,9010,30.51272
3604,9011,19.95918


In [47]:
all_cv_scores.sort_values(by=["Score"], ascending=False)

Unnamed: 0,Model,Score,StdDev,RunTime,n_estimators,n_folds,comments
12,lasso,11.42549,0.29277,0.99004,100,10,
27,lasso,11.42486,0.2795,0.63606,100,5,
28,ridge,11.14477,0.24505,0.63308,100,5,
29,ridge_50,11.14473,0.24504,0.62702,100,5,
13,ridge,11.14298,0.26997,0.99572,100,10,
14,ridge_50,11.14294,0.26997,0.9402,100,10,
15,xgb_best_params,9.05766,0.14019,81.99597,100,5,
19,xgb1,8.96841,0.12949,5.11198,100,5,
0,xgb_best_params,8.95923,0.29151,162.99291,100,10,
23,lgbm2,8.89271,0.14442,1.74116,100,5,


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Level 1 Stack Models</h1>
</div>

In [48]:
## TODO: Generate these dictionaries from model names

train_oof_dict = {
    "train_pred_cat1": "train_pred_cat1.csv",
    "train_pred_cat2": "train_pred_cat2.csv",
    "train_pred_lgbm1": "train_pred_lgbm1.csv",    
    "train_pred_lgbm2": "train_pred_lgbm2.csv",    
    "train_pred_xgb1": "train_pred_xgb1.csv"
}

test_pred_dict = {
    "submission_cat1": "submission_cat1.csv",
    "submission_cat2": "submission_cat2.csv",
    "submission_lgbm1": "submission_lgbm1.csv",
    "submission_lgbm2": "submission_lgbm2.csv",
    "submission_xgb1": "submission_xgb1.csv",
}

In [49]:
def blend_results(train_oof_dict, test_pred_dict):
    oof_df = pd.DataFrame()
    test_preds_df = pd.DataFrame()

    for name, train_oof_fname in train_oof_dict.items():
        fname = "../working/" + train_oof_fname
        print(f"Processing {name}, {train_oof_fname}")
        df = pd.read_csv(fname)
        print(df.head())
#         print(df.iloc[:,1])
        preds = pd.Series(df.iloc[:,1], name=name)
#         print(preds[:5])
        oof_df = pd.concat([oof_df, preds], axis=1)
    #     oof_df = pd.concat([oof_df, pd.Series(np.load(TRAIN_PATH / train_oof), name=name)], axis=1)

    for name, test_pred_fname in test_pred_dict.items():
        fname = "../working/" + test_pred_fname
        print(f"{name}, {test_pred_fname}")
        df = pd.read_csv(fname)
        print(df.head())
        preds = pd.Series(df.iloc[:,1], name=name)
        test_preds_df = pd.concat([test_preds_df, preds], axis=1)

    print("=== oof ===")
    print(oof_df.head())
    print("=== test_preds ===")
    print(test_preds_df.head())
    return oof_df, test_preds_df
    
# (oof_df, preds_df) = blend_results(train_oof_dict, test_pred_dict)    

In [50]:
def load_oof_results(train_oof_dict, test_pred_dict):
    oof_df = pd.DataFrame()
    test_preds_df = pd.DataFrame()

    for name, train_oof_fname in train_oof_dict.items():
        fname = "../working/" + train_oof_fname
        print(f"Processing {name}, {train_oof_fname}")
        df = pd.read_csv(fname)
        print(df.head())
#         print(df.iloc[:,1])
        preds = pd.Series(df.iloc[:,1], name=name)
#         print(preds[:5])
        oof_df = pd.concat([oof_df, preds], axis=1)
    #     oof_df = pd.concat([oof_df, pd.Series(np.load(TRAIN_PATH / train_oof), name=name)], axis=1)

    for name, test_pred_fname in test_pred_dict.items():
        fname = "../working/" + test_pred_fname
        print(f"{name}, {test_pred_fname}")
        df = pd.read_csv(fname)
        print(df.head())
        preds = pd.Series(df.iloc[:,1], name=name)
        test_preds_df = pd.concat([test_preds_df, preds], axis=1)

    print("=== oof ===")
    print(oof_df.head())
    print("=== test_preds ===")
    print(test_preds_df.head())
    return oof_df, test_preds_df
    
(oof_df, preds_df) = load_oof_results(train_oof_dict, test_pred_dict) 

Processing train_pred_cat1, train_pred_cat1.csv
    id  pred_cat1
0  0.0   21.56087
1  1.0   34.43740
2  2.0   38.23524
3  3.0   44.26784
4  4.0   45.42224
Processing train_pred_cat2, train_pred_cat2.csv
    id  pred_cat2
0  0.0   21.78553
1  1.0   36.22236
2  2.0   39.11054
3  3.0   44.02174
4  4.0   44.64331
Processing train_pred_lgbm1, train_pred_lgbm1.csv
    id  pred_lgbm1
0  0.0    23.09081
1  1.0    30.36920
2  2.0    43.27919
3  3.0    42.85879
4  4.0    46.98142
Processing train_pred_lgbm2, train_pred_lgbm2.csv
    id  pred_lgbm2
0  0.0    22.90256
1  1.0    33.08388
2  2.0    36.38186
3  3.0    44.98889
4  4.0    42.88206
Processing train_pred_xgb1, train_pred_xgb1.csv
    id  pred_xgb1
0  0.0   22.93364
1  1.0   32.67008
2  2.0   41.96727
3  3.0   40.71120
4  4.0   46.85235
submission_cat1, submission_cat1.csv
     id  Strength
0  5407  47.22347
1  5408  19.44139
2  5409  32.36480
3  5410  46.98858
4  5411  28.10152
submission_cat2, submission_cat2.csv
     id  Strength
0  5

In [51]:
oof_df.head()

Unnamed: 0,train_pred_cat1,train_pred_cat2,train_pred_lgbm1,train_pred_lgbm2,train_pred_xgb1
0,21.56087,21.78553,23.09081,22.90256,22.93364
1,34.4374,36.22236,30.3692,33.08388,32.67008
2,38.23524,39.11054,43.27919,36.38186,41.96727
3,44.26784,44.02174,42.85879,44.98889,40.7112
4,45.42224,44.64331,46.98142,42.88206,46.85235


In [52]:
preds_df.head()

Unnamed: 0,submission_cat1,submission_cat2,submission_lgbm1,submission_lgbm2,submission_xgb1
0,47.22347,46.89835,47.79622,46.32879,42.92241
1,19.44139,19.59299,18.35496,20.34535,15.81407
2,32.3648,33.84536,31.18286,32.63635,30.90379
3,46.98858,46.42902,46.69429,44.36107,45.73134
4,28.10152,28.12529,20.96297,26.67288,21.63098


In [53]:
type(preds_df)

pandas.core.frame.DataFrame

In [54]:
def run_lr(useful_features:List[str], TARGET:str, train_df:pd.DataFrame, test_df:pd.DataFrame) -> (List[float],List[float]):
    final_predictions = []
    scores = []

    kfold = model_selection.KFold(n_splits=Config.N_FOLDS, shuffle=True, random_state=Config.seed)

    for fold, (train_idx, valid_idx) in enumerate(kfold.split(train_df)):
        xtrain = train_df.iloc[train_idx].reset_index(drop=True)
        xvalid = train_df.iloc[valid_idx].reset_index(drop=True)

        xtest = test_df[useful_features].copy()

        ytrain = xtrain[TARGET]
        yvalid = xvalid[TARGET]

        xtrain = xtrain[useful_features]
        xvalid = xvalid[useful_features]

#         model = LogisticRegression()
        model = linear_model.LinearRegression()
        # Smaller C means more regularization; default=1.0
        # 2947.0517025518097
#         model = LogisticRegression(max_iter=500, C=2947.0517025518097, penalty='l2',solver='newton-cg')
#         model = LogisticRegression(C = 2947.0517025518097,
#                         max_iter = 500,
#                         penalty = 'l2',
#                         solver = 'liblinear')
        model.fit(xtrain, ytrain)

        preds_valid = model.predict_proba(xvalid)[:,-1]
        test_preds = model.predict_proba(xtest)[:,-1]

        final_predictions.append(test_preds)
#         score = metrics.roc_auc_score(yvalid, preds_valid)
        score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
        print(f"Fold={fold}, Score={score}")
        scores.append(score)
    return scores, final_predictions


In [55]:
# useful_features = ["pred_lda", "pred_gbc","pred_gbc2", "pred_cat_bp", "pred_cat1", "pred_lgbm1", "pred_lgbm2", "pred_lgbm_bp", "pred_xgb1", "pred_xgb_bp"]
useful_features = [ "train_pred_cat1", "train_pred_cat2", "train_pred_lgbm1", "train_pred_lgbm2", "train_pred_xgb1"]

In [56]:
oof_df[useful_features].head()

Unnamed: 0,train_pred_cat1,train_pred_cat2,train_pred_lgbm1,train_pred_lgbm2,train_pred_xgb1
0,21.56087,21.78553,23.09081,22.90256,22.93364
1,34.4374,36.22236,30.3692,33.08388,32.67008
2,38.23524,39.11054,43.27919,36.38186,41.96727
3,44.26784,44.02174,42.85879,44.98889,40.7112
4,45.42224,44.64331,46.98142,42.88206,46.85235


In [57]:
# preds_df[useful_features].head()

In [58]:
# fold_scores, final_predictions = run_lr(useful_features, TARGET, oof_df, preds_df)
# test_preds = np.mean(np.column_stack(final_predictions), axis=1)
# cv_score, std_dev = show_fold_scores(fold_scores)
# create_submission("level1_lr", TARGET, test_preds)

In [59]:
pd.options.display.max_colwidth = 100
pd.set_option("display.max_rows", 999)
pd.set_option("display.precision", 5)
pd.options.display.float_format = '{:.2f}'.format
pd.options.display.max_colwidth

100

In [60]:
all_cv_scores.sort_values(by=["Score"], ascending=False)

Unnamed: 0,Model,Score,StdDev,RunTime,n_estimators,n_folds,comments
12,lasso,11.43,0.29,0.99,100,10,
27,lasso,11.42,0.28,0.64,100,5,
28,ridge,11.14,0.25,0.63,100,5,
29,ridge_50,11.14,0.25,0.63,100,5,
13,ridge,11.14,0.27,1.0,100,10,
14,ridge_50,11.14,0.27,0.94,100,10,
15,xgb_best_params,9.06,0.14,82.0,100,5,
19,xgb1,8.97,0.13,5.11,100,5,
0,xgb_best_params,8.96,0.29,162.99,100,10,
23,lgbm2,8.89,0.14,1.74,100,5,
