<a href="https://www.kaggle.com/code/mmellinger66/s3e8-gemstone-pricing-models?scriptVersionId=121622544" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

 <div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Playground Season 3: Episode 8 - Gemstone Pricing Models</h1>
</div>

## Problem Type

Regression

## Evaluation Metric

$$RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^N (y_i - \hat{y_i})^2}$$

```python
score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
```

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Import Libraries</h1>
</div>

In [1]:
from typing import List, Set, Dict, Tuple, Optional

import os
import time
from pathlib import Path
import glob
import gc

import pandas as pd
import numpy as np

from sklearn import impute
from sklearn import metrics
from sklearn import preprocessing
from sklearn import linear_model
from sklearn import svm
from sklearn import cluster
from sklearn import model_selection
from sklearn import ensemble
from sklearn import datasets

import xgboost as xgb
import catboost as cb
import lightgbm as lgb

import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances

from scipy.special import boxcox1p
from scipy.stats import boxcox_normmax

# Visualization Libraries
import matplotlib as mpl
import matplotlib.pylab as plt
import seaborn as sns
import missingno as msno
from folium import Map
from folium.plugins import HeatMap
from IPython.display import display_html, display_markdown, display_latex
from colorama import Fore, Style

import warnings
warnings.filterwarnings('ignore')

pd.set_option("display.max_rows", 999)
pd.set_option("display.precision", 5)

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Configuration</h1>
</div>

In [2]:
TARGET="price"
ID="id"

# Optuna
objective_direction = "minimize"  # minimize, maximize

In [3]:
class Config:
    path:str = "../input/playground-series-s3e8/"
    gpu:bool = True
    optimize:bool = True
    n_optuna_trials:int = 30 # 5, 10, 30
    fast_render:bool = False
    calc_probability:bool = False
    debug:bool = False
    seed:int = 42
    N_ESTIMATORS:int = 100  # 100, 300, 1000, 2000, 5000, 15_000, 20_000 GBDT
    GPU_N_ESTIMATORS:int = 2000 # Want models to run fast during dev
    N_FOLDS:int = 5

In [4]:
class clr:
    S = Style.BRIGHT + Fore.LIGHTRED_EX
    E = Style.RESET_ALL

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Library</h1>
</div>

In [5]:
def read_data(path: str, analyze:bool=True) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
    data_dir = Path(path)

    train = pd.read_csv(data_dir / "train.csv")
    test = pd.read_csv(data_dir / "test.csv")
    submission_df = pd.read_csv(data_dir / "sample_submission.csv")

    if analyze:
        print(clr.S + "=== Shape of Data ==="+clr.E)
        print(f" train data: Rows={train.shape[0]}, Columns={train.shape[1]}")
        print(f" test data : Rows={test.shape[0]}, Columns={test.shape[1]}")

        print(clr.S + "\n=== Train Data: First 5 Rows ===\n"+clr.E)
        display(train.head())
        print(f"\n{clr.S}=== Train Column Names ==={clr.E}\n")
        display(train.columns)
        print(f"\n{clr.S}=== Features/Explanatory Variables ==={clr.E}\n")
        eval_features(train)
        print(f"\n{clr.S}=== Skewness ==={clr.E}\n")
        check_skew(train)
    return train, test, submission_df

def create_submission(model_name: str, target, preds, seed:int=42, nfolds:int=5) -> pd.DataFrame:
    sample_submission[target] = preds #.astype(int)

    if len(model_name) > 0:
        fname = f"submission_{model_name}_k{nfolds}_s{seed}.csv"
    else:
        fname = "submission.csv"

    sample_submission.to_csv(fname, index=False)

    return sample_submission

def show_classification_scores(ground_truth:List[int], yhat:List[int]) -> None:
    accuracy = metrics.accuracy_score(ground_truth, yhat)
    precision = metrics.precision_score(ground_truth, yhat)
    recall = metrics.recall_score(ground_truth, yhat)
    roc = metrics.roc_auc_score(ground_truth, yhat)
    f1 = metrics.f1_score(ground_truth, yhat)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"ROC: {roc:.4f}")
    print(f"f1: {f1:.4f}")
    

def label_encoder(train:pd.DataFrame, test:pd.DataFrame, columns:List[str]) -> (pd.DataFrame, pd.DataFrame) :
    for col in columns:
        train[col] = train[col].astype(str)
        test[col] = test[col].astype(str)
        train[col] = preprocessing.LabelEncoder().fit_transform(train[col])
        test[col] = preprocessing.LabelEncoder().fit_transform(test[col])
    return train, test   

def create_strat_folds(df:pd.DataFrame, TARGET, n_folds:int=5, seed:int=42) -> pd.DataFrame:
    print(f"TARGET={TARGET}, n_folds={n_folds}, seed={seed}")
    df["fold"] = -1

    kf = model_selection.StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
    # kf = GroupKFold(n_splits=Config.N_FOLDS)
    for fold, (train_idx, valid_idx) in enumerate(kf.split(df, df[TARGET])):
        df.loc[valid_idx, "fold"] = fold

    # df.to_csv(f"train_fold{num_folds}.csv", index=False)
    return df


def create_folds(df:pd.DataFrame, n_folds:int=5, seed:int=42) -> pd.DataFrame:
    print(f"n_folds={n_folds}, seed={seed}")
    df["fold"] = -1

    kf = model_selection.KFold(n_splits=n_folds, shuffle=True, random_state=seed)

    for fold, (train_idx, valid_idx) in enumerate(kf.split(df)):
        df.loc[valid_idx, "fold"] = fold

    # df.to_csv(f"train_fold{num_folds}.csv", index=False)
    return df

def show_fold_scores(scores: List[float]) -> (float, float):
    cv_score = np.mean(scores)  # Used in filename
    std_dev = np.std(scores)
    print(
        f"Scores -> Adjusted: {np.mean(scores) - np.std(scores):.8f} , mean: {np.mean(scores):.8f}, std: {np.std(scores):.8f}"
    )
    return cv_score, std_dev


def feature_distribution_types(df:pd.DataFrame, display:bool=True) -> (List[str], List[str]):
    continuous_features = list(df.select_dtypes(include=['int64', 'float64', 'uint8']).columns)
    categorical_features = list(df.select_dtypes(include=['object', 'bool']).columns)
    if display:
        print(f"{clr.S}Continuous Features={continuous_features}{clr.E}\n")
        print(f"{clr.S}Categorical Features={categorical_features}{clr.E}")
    return continuous_features, categorical_features   

def show_cardinality(df:pd.DataFrame, features:List[str]) -> None:
    print("=== Cardinality ===")
    print(df[features].nunique())

## === Model Support ===    

from scipy.stats import mode


def merge_test_predictions(final_test_predictions:List[float], calc_probability:bool=True) -> List[float]:

    if calc_probability:
        print("Mean")
        result = np.mean(np.column_stack(final_test_predictions), axis=1)
    else:
        print("Mode")
        mode_result = mode(np.column_stack(final_test_predictions), axis=1)
        result = mode_result[0].ravel()

    return result

def summary_statistics(X:pd.DataFrame, enhanced=True) -> None:
    desc = X.describe()
    if enhanced:
        desc.loc["var"] = X.var(numeric_only=True).tolist()
        desc.loc["skew"] = X.skew(numeric_only=True).tolist()
        desc.loc["kurt"] = X.kurtosis(numeric_only=True).tolist()

    with pd.option_context("display.precision", 2):
        style = desc.transpose().style.background_gradient(
            cmap="coolwarm"
        )  # .set_precision(4)
    display(style)
    
def show_missing_features(df:pd.DataFrame) -> None:
    missing_vals = df.isna().sum().sort_values(ascending=False)
    print(missing_vals[missing_vals > 0])


def show_duplicate_records(df:pd.DataFrame) -> None:
    dups = df.duplicated()
    print(dups.sum())


def eval_features(df:pd.DataFrame) -> (List[str], List[str], List[str]):
    ## Separate Categorical and Numerical Features
    categorical_features = list(
        df.select_dtypes(include=["category", "object"]).columns
    )
    continuous_features = list(df.select_dtypes(include=["number"]).columns)

    print(f"{clr.S}Continuous features:{clr.E} {continuous_features}")
    print(f"{clr.S}Categorical features:{clr.E} {categorical_features}")
    print("\n --- Cardinality of Categorical Features ---\n")

    for feature in categorical_features:
        cardinality = df[feature].nunique()
        if cardinality < 10:
            print(f"{clr.S}{feature}{clr.E}: cardinality={cardinality}, {df[feature].unique()}")
        else:
            print(f"{clr.S}{feature}{clr.E}: cardinality={cardinality}")
    all_features = categorical_features + continuous_features
    return all_features, categorical_features, continuous_features


def show_feature_importance(feature_importance_lst:List[str]) -> None:
    fis_df = pd.concat(feature_importance_lst, axis=1)

    fis_df.sort_values("0_importance", ascending=True).head(40).plot(
        kind="barh", figsize=(12, 12), title="Feature Importance Across Folds"
    )
    plt.show()


def show_feature_target_crosstab(df:pd.DataFrame, feature_lst:List[str], target:str) -> None:
    for feature in feature_lst:
        print(f"\n=== {feature} vs {target} ===\n")
        display(
            pd.crosstab(df[feature], df[target], margins=True)
        )  # display keeps bold formatting


def show_cardinality(df:pd.DataFrame, features:List[str]) -> None:
    print(f"{clr.S}=== Cardinality ==={clr.E}")
    print(df[features].nunique())


def show_unique_features(df:pd.DataFrame, features:List[str]) -> None:
    for col in features:
        print(col, sorted(df[col].dropna().unique()))


def feature_distribution_types(df:pd.DataFrame, display:bool=True) -> (List[str], List[str]):
    continuous_features = list(
        df.select_dtypes(include=["int64", "float64", "uint8"]).columns
    )
    categorical_features = list(df.select_dtypes(include=["object", "bool"]).columns)
    if display:
        print(f"{clr.S}Continuous Features={clr.E}{continuous_features}\n")
        print(f"{clr.S}Categorical Features={clr.E}{categorical_features}")
    return continuous_features, categorical_features


def describe(X:pd.DataFrame) -> None:
    """Deprecated: Use summary_statistics()"""
    desc = X.describe()
    desc.loc['var'] = X.var(numeric_only=True).tolist()
    desc.loc['skew'] = X.skew(numeric_only=True).tolist()
    desc.loc['kurt'] = X.kurtosis(numeric_only=True).tolist()

    with pd.option_context('display.precision', 2):
        style = desc.transpose().style.background_gradient(cmap='coolwarm') #.set_precision(4)
    display(style)
  

def check_skew(df:pd.DataFrame) -> None:
    skew = df.skew(skipna=True,numeric_only=True).sort_values(ascending=False)
    print(skew)
    
def gpu_ify_lgbm(lgbm_dict):
    if Config.gpu:
        lgbm_dict["device"] = "gpu"
        lgbm_dict["boosting_type"] = "gbdt"
        lgbm_dict["gpu_platform_id"] = 0
        lgbm_dict["gpu_device_id"] = 0
    return lgbm_dict

def gpu_ify_cb(params):
    if Config.gpu:
        params["task_type"] = "GPU"
    return params    


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Optuna Hyperparameter Optimization Library</h1>
</div>

In [6]:
def objective_xgb(trial, X_train, X_valid, y_train, y_valid):

    xgb_params = {
        #         "objective": trial.suggest_categorical("objective", ["multi:softmax"]),
        #         "eval_metric": "mlogloss",
        #         "objective": "multi:softmax",
#         "eval_metric": "rmse",  # auc, rmse, mae
        "eval_metric": trial.suggest_categorical("eval_metric", ["rmse", "mae"]),
        "objective": trial.suggest_categorical("objective", ["reg:squarederror"]), # "reg:squarederror",
        #         "enable_categorical": trial.suggest_categorical("use_label_encoder", [True]),
        "use_label_encoder": trial.suggest_categorical("use_label_encoder", [False]),
        "n_estimators": trial.suggest_int("n_estimators", 1000, 5000, 100),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "max_depth": trial.suggest_int("max_depth", 1, 20),  # 10
        "gamma": trial.suggest_float("gamma", 0, 100, step=0.1),
        "booster": trial.suggest_categorical("booster", ["gbtree"]),
        "tree_method": trial.suggest_categorical(
            "tree_method", ["gpu_hist"]
        ),  # hist, gpu_hist
        "predictor": "gpu_predictor",
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 100),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        "min_child_weight": trial.suggest_loguniform("min_child_weight", 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }

    # Model loading and training
    model = xgb.XGBRegressor(**xgb_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        early_stopping_rounds=5000,
        verbose=0,
    )

    print(f"Number of boosting rounds: {model.best_iteration}")
    #     oof = model.predict_proba(X_valid)[:, 1] # Probability
    oof = model.predict(X_valid)  # Classification: 0,1

    return metrics.mean_squared_error(y_valid, oof, squared=False)


def objective_lgbm(trial, X_train, X_valid, y_train, y_valid):

    lgbm_params = {
        "objective": trial.suggest_categorical("objective", ["mae", "rmse"]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [1_000]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [5000]),
        "n_estimators": trial.suggest_int("n_estimators", 700, 5000),
        "importance_type": "gain",
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "num_leaves": trial.suggest_int("num_leaves", 2, 1000),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.1, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.1, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 0, 15),
        "min_child_samples": trial.suggest_int("min_child_samples", 1, 300),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "max_depth": trial.suggest_int("max_depth", 1, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        #         'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }

    # Model loading and training
    model = lgb.LGBMRegressor(**lgbm_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        eval_metric="mae",
        callbacks=[
            lgb.log_evaluation(500),
            lgb.early_stopping(500, False, True),
        ],
    )

    #     print(f"Number of boosting rounds: {model.best_iteration}")
    oof = model.predict(X_valid)

    return metrics.mean_squared_error(y_valid, oof, squared=False)
#     return metrics.mean_absolute_error(y_valid, oof)


def objective_clf_lgbm(trial, X_train, X_valid, y_train, y_valid):

    params = {
        "boosting_type": "gbdt",
        # "objective": trial.suggest_categorical("objective", ["mae", "rmse"]),
        #         "objective": trial.suggest_categorical("objective", ["multi:softprob"]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [1_000]),
        #         "n_estimators": trial.suggest_categorical("n_estimators", [5000]),
        "n_estimators": trial.suggest_int("n_estimators", 700, 1000),
        "importance_type": "gain",
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 10.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 10.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1, step=0.01),
        "num_leaves": trial.suggest_int("num_leaves", 2, 1000),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.1, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.1, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 0, 15),
        "min_child_samples": trial.suggest_int("min_child_samples", 1, 300),
        "subsample": trial.suggest_float("subsample", 0.1, 1, step=0.01),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-2, 0.25),
        "max_depth": trial.suggest_int("max_depth", 1, 100),
        "random_state": trial.suggest_categorical("random_state", [42]),
        "n_jobs": trial.suggest_categorical("n_jobs", [4]),
        #         'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-1, 1e3),
        # "min_child_weight": trial.suggest_categorical("min_child_weight", [256]),
    }
    if Config.gpu:
        params["device_type"] = "gpu"

    # Model loading and training
    model = lgb.LGBMClassifier(**params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        # eval_metric="mae",
        callbacks=[
            lgb.log_evaluation(500),
            lgb.early_stopping(500, False, True),
        ],
    )

    #     print(f"Number of boosting rounds: {model.best_iteration}")
    oof = model.predict(X_valid)

    #     return accuracy_score(y_valid, oof)
    return metrics.roc_auc_score(y_valid, oof)


def objective_cb(trial, X_train, X_valid, y_train, y_valid):

    cb_params = {
        "iterations": 100,
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.1, 1.0),
        "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1, 100),
        "bagging_temperature": trial.suggest_loguniform(
            "bagging_temperature", 0.1, 20.0
        ),
        "random_strength": trial.suggest_float("random_strength", 1.0, 2.0),
        "depth": trial.suggest_int("depth", 1, 10),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 300),
          "use_best_model": True,
#         "task_type": "GPU",
        "random_seed": 42,
    }

    # Model loading and training
    model = cb.CatBoostRegressor(**cb_params)

    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        # eval_metric="accuracy",
        early_stopping_rounds=500,
        verbose=False,
    )

#     print(f"Number of boosting rounds: {model.best_iteration}")
    # oof = model.predict_proba(X_valid)[:, 1]
    oof = model.predict(X_valid)  # Classification
    return metrics.mean_squared_error(y_valid, oof, squared=False)
#     return metrics.mean_absolute_error(y_valid, oof)
# 
#     return accuracy_score(y_valid, oof)

def objective_clf_cb(trial, X_train, X_valid, y_train, y_valid):

    cb_params = {
        "iterations": 10,  # 1000
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.1, 1.0),
        "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1, 100),
        "bagging_temperature": trial.suggest_loguniform(
            "bagging_temperature", 0.1, 20.0
        ),
        "random_strength": trial.suggest_float("random_strength", 1.0, 2.0),
        "depth": trial.suggest_int("depth", 1, 10),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 300),
        "use_best_model": True,
#             "task_type": "GPU",
        "random_seed": 42,
    }

    # Model loading and training
    model = cb.CatBoostClassifier(**cb_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        # eval_metric="accuracy",
        early_stopping_rounds=500,
        verbose=False,
    )

    # print(f"Number of boosting rounds: {model.best_iteration}")
    # oof = model.predict_proba(X_valid)[:, 1]
    oof = model.predict(X_valid)  # Classification

    return metrics.accuracy_score(y_valid, oof)

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Load Train/Test Data and Analyze</h1>
</div>

## Load the following files

 - train.csv - Data used to build our machine learning model
 - test.csv - Data used to build our machine learning model. Does not contain the target variable
 - sample_submission.csv - A file in the proper format to submit test predictions

In [7]:
%%time
train, test, sample_submission = read_data(Config.path, analyze=True)                                

[1m[91m=== Shape of Data ===[0m
 train data: Rows=193573, Columns=11
 test data : Rows=129050, Columns=10
[1m[91m
=== Train Data: First 5 Rows ===
[0m


Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price
0,0,1.52,Premium,F,VS2,62.2,58.0,7.27,7.33,4.55,13619
1,1,2.03,Very Good,J,SI2,62.0,58.0,8.06,8.12,5.05,13387
2,2,0.7,Ideal,G,VS1,61.2,57.0,5.69,5.73,3.5,2772
3,3,0.32,Ideal,G,VS1,61.6,56.0,4.38,4.41,2.71,666
4,4,1.7,Premium,G,VS2,62.6,59.0,7.65,7.61,4.77,14453



[1m[91m=== Train Column Names ===[0m



Index(['id', 'carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y',
       'z', 'price'],
      dtype='object')


[1m[91m=== Features/Explanatory Variables ===[0m

[1m[91mContinuous features:[0m ['id', 'carat', 'depth', 'table', 'x', 'y', 'z', 'price']
[1m[91mCategorical features:[0m ['cut', 'color', 'clarity']

 --- Cardinality of Categorical Features ---

[1m[91mcut[0m: cardinality=5, ['Premium' 'Very Good' 'Ideal' 'Good' 'Fair']
[1m[91mcolor[0m: cardinality=7, ['F' 'J' 'G' 'E' 'D' 'H' 'I']
[1m[91mclarity[0m: cardinality=8, ['VS2' 'SI2' 'VS1' 'SI1' 'IF' 'VVS2' 'VVS1' 'I1']

[1m[91m=== Skewness ===[0m

price    1.60558
carat    0.99513
z        0.68567
table    0.61906
x        0.36105
y        0.35676
id       0.00000
depth   -0.27638
dtype: float64
CPU times: user 337 ms, sys: 64.5 ms, total: 401 ms
Wall time: 656 ms


In [8]:
train.head()

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price
0,0,1.52,Premium,F,VS2,62.2,58.0,7.27,7.33,4.55,13619
1,1,2.03,Very Good,J,SI2,62.0,58.0,8.06,8.12,5.05,13387
2,2,0.7,Ideal,G,VS1,61.2,57.0,5.69,5.73,3.5,2772
3,3,0.32,Ideal,G,VS1,61.6,56.0,4.38,4.41,2.71,666
4,4,1.7,Premium,G,VS2,62.6,59.0,7.65,7.61,4.77,14453


In [9]:
original = pd.read_csv("../input/gemstone-price-prediction/cubic_zirconia.csv", index_col=[0])
original = original[-original.depth.isna()]
original.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,price
1,0.3,Ideal,E,SI1,62.1,58.0,4.27,4.29,2.66,499
2,0.33,Premium,G,IF,60.8,58.0,4.42,4.46,2.7,984
3,0.9,Very Good,E,VVS2,62.2,60.0,6.04,6.12,3.78,6289
4,0.42,Ideal,F,VS1,61.6,56.0,4.82,4.8,2.96,1082
5,0.31,Ideal,F,VVS1,60.4,59.0,4.35,4.43,2.65,779


In [10]:
original.shape

(26270, 10)

In [11]:
train['is_original']    = 0
test['is_original']     = 0
original['is_original'] = 1
combined = pd.concat([train, original], ignore_index=True).drop_duplicates()
train = combined

In [12]:
combined.head()

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price,is_original
0,0.0,1.52,Premium,F,VS2,62.2,58.0,7.27,7.33,4.55,13619,0
1,1.0,2.03,Very Good,J,SI2,62.0,58.0,8.06,8.12,5.05,13387,0
2,2.0,0.7,Ideal,G,VS1,61.2,57.0,5.69,5.73,3.5,2772,0
3,3.0,0.32,Ideal,G,VS1,61.6,56.0,4.38,4.41,2.71,666,0
4,4.0,1.7,Premium,G,VS2,62.6,59.0,7.65,7.61,4.77,14453,0


In [13]:
summary_statistics(train.drop(columns=[ID], axis=1), enhanced=True)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max,var,skew,kurt
carat,219809.0,0.79,0.46,0.2,0.4,0.7,1.03,4.5,0.22,1.01,0.63
depth,219809.0,61.81,1.13,50.8,61.2,61.9,62.4,73.6,1.27,-0.24,3.07
table,219809.0,57.25,1.96,49.0,56.0,57.0,58.0,79.0,3.84,0.66,1.04
x,219809.0,5.72,1.11,0.0,4.7,5.7,6.52,10.23,1.24,0.36,-0.78
y,219809.0,5.72,1.11,0.0,4.71,5.72,6.51,58.9,1.23,0.85,23.12
z,219809.0,3.53,0.69,0.0,2.9,3.53,4.03,31.3,0.48,0.65,11.15
price,219809.0,3965.19,4032.64,326.0,949.0,2398.0,5405.0,18818.0,16262215.44,1.61,2.11
is_original,219809.0,0.12,0.32,0.0,0.0,0.0,0.0,1.0,0.11,2.35,3.51


## Outlier Detection

In [14]:
# https://www.kaggle.com/code/lyasdemir/best-algorithm-for-prediction-xgboost
    
def iqr(data:pd.DataFrame, var:str):# outliers detecion .
    q1 = np.quantile(data[var], 0.25)
    q3 = np.quantile(data[var], 0.75)
    diff = q3 - q1
    lower_t = q1 - (1.5 * diff)
    upper_t = q3 + (1.5 * diff)
    return data[(data[var] < lower_t) | (data[var] > upper_t)]

# iqr(train, "squareMeters")

In [15]:
# https://www.kaggle.com/code/sujithmandala/playground-s3-e8-ensemble-model-98-accuracy

def detect_outliers(data:pd.DataFrame) -> pd.DataFrame:
    outlier_percents = {}
    for column in data.columns:
        if data[column].dtype != object:
            q1 = np.quantile(data[column], 0.25)
            q3 = np.quantile(data[column], 0.75)
            iqr = q3 - q1
            upper_bound = q3 + (1.5 * iqr)
            lower_bound = q1 - (1.5 * iqr)
            outliers = data[(data[column] > upper_bound) | (data[column] < lower_bound)][column]
            outlier_percentage = len(outliers) / len(data[column]) * 100
            outlier_percents[column] = outlier_percentage
            outlier_dataframe = pd.DataFrame(data = outlier_percents.values() ,index=outlier_percents.keys() ,columns=['Outlier_percentage'])
    
    return outlier_dataframe.sort_values(by = 'Outlier_percentage', ascending = False)

detect_outliers(train)


Unnamed: 0,Outlier_percentage
is_original,11.93582
price,6.53067
depth,4.58989
carat,3.96799
table,2.54721
z,0.0182
x,0.01456
y,0.01319
id,0.0


In [16]:
# https://www.kaggle.com/code/sujithmandala/playground-s3-e8-ensemble-model-98-accuracy
    
def detect_outliers(data:pd.DataFrame) -> pd.DataFrame:
    outlier_percents = {}
    for column in data.columns:
        if data[column].dtype != object:
            q1 = np.quantile(data[column], 0.25)
            q3 = np.quantile(data[column], 0.75)
            iqr = q3 - q1
            upper_bound = q3 + (1.5 * iqr)
            lower_bound = q1 - (1.5 * iqr)
            outliers = data[(data[column] > upper_bound) | (data[column] < lower_bound)][column]
            outlier_percentage = len(outliers) / len(data[column]) * 100
            outlier_percents[column] = outlier_percentage
            outlier_dataframe = pd.DataFrame(data = outlier_percents.values() ,index=outlier_percents.keys() ,columns=['Outlier_percentage'])
    
    return outlier_dataframe.sort_values(by = 'Outlier_percentage', ascending = False)

detect_outliers(test)


Unnamed: 0,Outlier_percentage
depth,5.06083
carat,3.92096
table,2.30918
z,0.01937
x,0.00697
y,0.00697
id,0.0
is_original,0.0


In [17]:
# iqr(train,"floors")

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Feature Engineering</h1>
</div>

## Categorical/Numerical Variables

In [18]:
# train.drop(['cityCode'], axis=1, inplace=True)
# test.drop(['cityCode'], axis=1, inplace=True)

## Handle Outliers
- https://www.kaggle.com/code/lyasdemir/best-algorithm-for-prediction-xgboost
- https://www.kaggle.com/code/mnokno/paris-housing-price-prediction-using-xgboost

In [19]:
# features_with_outliers = ['attic', 'garage', 'made', 'basement', 'floors', 'cityCode', 'squareMeters']
# features_with_outliers = ['attic', 'garage', 'made', 'basement', 'floors',  'squareMeters']

In [20]:
# https://www.kaggle.com/code/mnokno/paris-housing-price-prediction-using-xgboost

def remove_outliers(df:pd.DataFrame) -> pd.DataFrame:
    for c in features_with_outliers:
        if c == 'garage':
            first_percentile = df[c].quantile(0.001)
            df = df[df[c] > first_percentile]

        ninety_ninth_percentile = df[c].quantile(0.999)
        df = df[df[c] < ninety_ninth_percentile]
        #df_t = df_t[(df_t[c] > first_percentile) & (df_t[c] < ninety_ninth_percentile)]
    return df


In [21]:
# print(f'Before: {len(train)}')
# train = remove_outliers(train)
# print(f'After: {len(train)}')

In [22]:
train.head(10)

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price,is_original
0,0.0,1.52,Premium,F,VS2,62.2,58.0,7.27,7.33,4.55,13619,0
1,1.0,2.03,Very Good,J,SI2,62.0,58.0,8.06,8.12,5.05,13387,0
2,2.0,0.7,Ideal,G,VS1,61.2,57.0,5.69,5.73,3.5,2772,0
3,3.0,0.32,Ideal,G,VS1,61.6,56.0,4.38,4.41,2.71,666,0
4,4.0,1.7,Premium,G,VS2,62.6,59.0,7.65,7.61,4.77,14453,0
5,5.0,1.51,Very Good,J,SI1,62.8,58.0,7.34,7.29,4.59,7506,0
6,6.0,0.74,Ideal,E,VS2,61.8,57.0,5.76,5.79,3.57,3229,0
7,7.0,1.34,Premium,G,SI2,62.5,57.0,7.0,7.05,4.38,6224,0
8,8.0,0.3,Ideal,F,IF,62.0,56.0,4.35,4.37,2.7,886,0
9,9.0,0.3,Good,J,VS1,63.6,57.0,4.26,4.28,2.72,421,0


In [23]:
train = train.reset_index(drop=True).copy()
train.head(10)

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price,is_original
0,0.0,1.52,Premium,F,VS2,62.2,58.0,7.27,7.33,4.55,13619,0
1,1.0,2.03,Very Good,J,SI2,62.0,58.0,8.06,8.12,5.05,13387,0
2,2.0,0.7,Ideal,G,VS1,61.2,57.0,5.69,5.73,3.5,2772,0
3,3.0,0.32,Ideal,G,VS1,61.6,56.0,4.38,4.41,2.71,666,0
4,4.0,1.7,Premium,G,VS2,62.6,59.0,7.65,7.61,4.77,14453,0
5,5.0,1.51,Very Good,J,SI1,62.8,58.0,7.34,7.29,4.59,7506,0
6,6.0,0.74,Ideal,E,VS2,61.8,57.0,5.76,5.79,3.57,3229,0
7,7.0,1.34,Premium,G,SI2,62.5,57.0,7.0,7.05,4.38,6224,0
8,8.0,0.3,Ideal,F,IF,62.0,56.0,4.35,4.37,2.7,886,0
9,9.0,0.3,Good,J,VS1,63.6,57.0,4.26,4.28,2.72,421,0


In [24]:
excluded_features = [TARGET, ID, "fold", "is_original"]

In [25]:
cont_features, cat_features = feature_distribution_types(train, display=True)
show_cardinality(train, cat_features)

cont_features = [feature for feature in cont_features if feature not in excluded_features]
cat_features = [feature for feature in cat_features if feature not in excluded_features]

FEATURES = cont_features + cat_features
FEATURES

[1m[91mContinuous Features=[0m['id', 'carat', 'depth', 'table', 'x', 'y', 'z', 'price', 'is_original']

[1m[91mCategorical Features=[0m['cut', 'color', 'clarity']
[1m[91m=== Cardinality ===[0m
cut        5
color      7
clarity    8
dtype: int64


['carat', 'depth', 'table', 'x', 'y', 'z', 'cut', 'color', 'clarity']

In [26]:
# train, test = label_encoder(train, test, cat_features)
train = pd.get_dummies(train,columns=['cut','color','clarity']) # Will remove original feature names
test = pd.get_dummies(test,columns=['cut','color','clarity'])

In [27]:
train.head()

Unnamed: 0,id,carat,depth,table,x,y,z,price,is_original,cut_Fair,...,color_I,color_J,clarity_I1,clarity_IF,clarity_SI1,clarity_SI2,clarity_VS1,clarity_VS2,clarity_VVS1,clarity_VVS2
0,0.0,1.52,62.2,58.0,7.27,7.33,4.55,13619,0,0,...,0,0,0,0,0,0,0,1,0,0
1,1.0,2.03,62.0,58.0,8.06,8.12,5.05,13387,0,0,...,0,1,0,0,0,1,0,0,0,0
2,2.0,0.7,61.2,57.0,5.69,5.73,3.5,2772,0,0,...,0,0,0,0,0,0,1,0,0,0
3,3.0,0.32,61.6,56.0,4.38,4.41,2.71,666,0,0,...,0,0,0,0,0,0,1,0,0,0
4,4.0,1.7,62.6,59.0,7.65,7.61,4.77,14453,0,0,...,0,0,0,0,0,0,0,1,0,0


In [28]:
cont_features, cat_features = feature_distribution_types(train, display=True)
show_cardinality(train, cat_features)

cont_features = [feature for feature in cont_features if feature not in excluded_features]
cat_features = [feature for feature in cat_features if feature not in excluded_features]

FEATURES = cont_features + cat_features
FEATURES

[1m[91mContinuous Features=[0m['id', 'carat', 'depth', 'table', 'x', 'y', 'z', 'price', 'is_original', 'cut_Fair', 'cut_Good', 'cut_Ideal', 'cut_Premium', 'cut_Very Good', 'color_D', 'color_E', 'color_F', 'color_G', 'color_H', 'color_I', 'color_J', 'clarity_I1', 'clarity_IF', 'clarity_SI1', 'clarity_SI2', 'clarity_VS1', 'clarity_VS2', 'clarity_VVS1', 'clarity_VVS2']

[1m[91mCategorical Features=[0m[]
[1m[91m=== Cardinality ===[0m
Series([], dtype: float64)


['carat',
 'depth',
 'table',
 'x',
 'y',
 'z',
 'cut_Fair',
 'cut_Good',
 'cut_Ideal',
 'cut_Premium',
 'cut_Very Good',
 'color_D',
 'color_E',
 'color_F',
 'color_G',
 'color_H',
 'color_I',
 'color_J',
 'clarity_I1',
 'clarity_IF',
 'clarity_SI1',
 'clarity_SI2',
 'clarity_VS1',
 'clarity_VS2',
 'clarity_VVS1',
 'clarity_VVS2']

<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Optuna Hyperparameter Optimization</h1>
</div>

In [29]:
%%time

if Config.optimize:
    y = train[TARGET]
    X = train[FEATURES].copy()

    X_test = test[FEATURES].copy()
    X_train, X_valid, y_train, y_valid = model_selection.train_test_split(
        X, y, test_size=0.2, random_state=Config.seed
    )

# === XGB ===

time_limit = 3600 * 3
best_xgb_params = {}

if Config.optimize:
    study = optuna.create_study(direction=objective_direction)
    study.optimize(
        lambda trial: objective_xgb(trial, X_train, X_valid, y_train, y_valid),
        n_trials=Config.n_optuna_trials,
        # timeout=time_limit,  # this or n_trials
    )

if Config.optimize:
    print("Number of finished trials:", len(study.trials))
    print("Best XGB trial parameters:", study.best_trial.params)
    print("Best score:", study.best_value)
    best_xgb_params = study.best_trial.params

## === LGBM ===

time_limit = 3600 * 3
best_lgbm_params = {}

if Config.optimize:
    study = optuna.create_study(direction=objective_direction) # minimize, maximize
    study.optimize(
        lambda trial: objective_lgbm(trial, X_train, X_valid, y_train, y_valid),
        n_trials=Config.n_optuna_trials,
        # timeout=time_limit,  # this or n_trials
    )

if Config.optimize:
    print("Number of finished trials:", len(study.trials))
    print("Best LGBM trial parameters:", study.best_trial.params)
    print("Best score:", study.best_value)
    best_lgbm_params = study.best_trial.params

## === CatBoost

time_limit = 3600 * 3
# best_cb_params = {}
best_cb_params = {'learning_rate': 0.45743264601999495,
                  'l2_leaf_reg': 41.338946049390074,
                  'bagging_temperature': 0.3472567739474319,
                  'random_strength': 1.7332249677756242, 
                  'depth': 1,
                  'min_data_in_leaf': 6}

if Config.optimize:
    study = optuna.create_study(direction=objective_direction) # minimize, maximize
    study.optimize(
        lambda trial: objective_cb(trial, X_train, X_valid, y_train, y_valid),
        n_trials=Config.n_optuna_trials,
        # timeout=time_limit,  # this or n_trials
    )

if Config.optimize:
    print("Number of finished trials:", len(study.trials))
    print("Best Cat trial parameters:", study.best_trial.params)
    print("Best score:", study.best_value)
    best_cb_params = study.best_trial.params

[32m[I 2023-03-10 02:54:54,018][0m A new study created in memory with name: no-name-46a12985-57dc-42c1-8534-067ff1075d91[0m
[32m[I 2023-03-10 02:55:23,434][0m Trial 0 finished with value: 624.5305730801115 and parameters: {'eval_metric': 'rmse', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 4600, 'learning_rate': 0.012037697314187875, 'subsample': 0.44000000000000006, 'colsample_bytree': 0.5700000000000001, 'max_depth': 10, 'gamma': 32.6, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.6670124844497746, 'reg_alpha': 8.390149321281254e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 499.1067910283315}. Best is trial 0 with value: 624.5305730801115.[0m


Number of boosting rounds: 4599


[32m[I 2023-03-10 02:55:49,035][0m Trial 1 finished with value: 589.5477445871165 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 4200, 'learning_rate': 0.05485034970180515, 'subsample': 0.22, 'colsample_bytree': 0.2, 'max_depth': 10, 'gamma': 27.400000000000002, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 5.1407716447583686e-08, 'reg_alpha': 5.775387079856684e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 33.032065072111024}. Best is trial 1 with value: 589.5477445871165.[0m


Number of boosting rounds: 2244


[32m[I 2023-03-10 02:56:12,505][0m Trial 2 finished with value: 623.7102613055756 and parameters: {'eval_metric': 'rmse', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 1500, 'learning_rate': 0.06282743617745183, 'subsample': 0.48, 'colsample_bytree': 0.14, 'max_depth': 12, 'gamma': 84.60000000000001, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.0007145143925952154, 'reg_alpha': 3.210110408777323e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 2.585302620327552}. Best is trial 1 with value: 589.5477445871165.[0m


Number of boosting rounds: 1498


[32m[I 2023-03-10 03:16:08,233][0m Trial 3 finished with value: 612.4115749160492 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3600, 'learning_rate': 0.054172751947204084, 'subsample': 0.48, 'colsample_bytree': 0.49, 'max_depth': 20, 'gamma': 56.400000000000006, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 1.4359104574589307e-07, 'reg_alpha': 3.5374225673700653e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 3.068305085880294}. Best is trial 1 with value: 589.5477445871165.[0m


Number of boosting rounds: 145


[32m[I 2023-03-10 03:16:26,062][0m Trial 4 finished with value: 578.0545102605746 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 4000, 'learning_rate': 0.06687075085073052, 'subsample': 0.56, 'colsample_bytree': 0.46, 'max_depth': 6, 'gamma': 91.10000000000001, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 1.1522424492421137, 'reg_alpha': 1.123802551524816e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 2.5025321782284706}. Best is trial 4 with value: 578.0545102605746.[0m


Number of boosting rounds: 959


[32m[I 2023-03-10 03:16:34,959][0m Trial 5 finished with value: 739.7681868444187 and parameters: {'eval_metric': 'rmse', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3800, 'learning_rate': 0.0461553662111415, 'subsample': 0.15000000000000002, 'colsample_bytree': 0.21000000000000002, 'max_depth': 13, 'gamma': 42.800000000000004, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.0001948816059429619, 'reg_alpha': 4.400452447072481e-06, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 723.9586273931059}. Best is trial 4 with value: 578.0545102605746.[0m


Number of boosting rounds: 3799


[32m[I 2023-03-10 03:16:42,885][0m Trial 6 finished with value: 633.2939330323846 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3300, 'learning_rate': 0.09178878676083803, 'subsample': 0.1, 'colsample_bytree': 0.11, 'max_depth': 5, 'gamma': 84.0, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.0001658748794856428, 'reg_alpha': 6.736054468687154e-05, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.23726950872414562}. Best is trial 4 with value: 578.0545102605746.[0m


Number of boosting rounds: 3246


[32m[I 2023-03-10 03:16:46,218][0m Trial 7 finished with value: 606.6547019092785 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 1400, 'learning_rate': 0.10503404279241096, 'subsample': 0.51, 'colsample_bytree': 0.13, 'max_depth': 4, 'gamma': 90.0, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.0006517653665776923, 'reg_alpha': 40.999799726103994, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.46839123968328944}. Best is trial 4 with value: 578.0545102605746.[0m


Number of boosting rounds: 1398


[32m[I 2023-03-10 03:16:58,290][0m Trial 8 finished with value: 585.9204165129878 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 4200, 'learning_rate': 0.13324819645071662, 'subsample': 0.42000000000000004, 'colsample_bytree': 0.5700000000000001, 'max_depth': 5, 'gamma': 45.2, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 1.023524646648754, 'reg_alpha': 0.03762950794801685, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 19.262580909860144}. Best is trial 4 with value: 578.0545102605746.[0m


Number of boosting rounds: 881


[32m[I 2023-03-10 03:17:07,738][0m Trial 9 finished with value: 614.4442073147036 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 1400, 'learning_rate': 0.03104547050732262, 'subsample': 0.33999999999999997, 'colsample_bytree': 0.9800000000000001, 'max_depth': 13, 'gamma': 84.2, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 9.17126631906028e-07, 'reg_alpha': 0.02572762602669489, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 744.3885328824358}. Best is trial 4 with value: 578.0545102605746.[0m


Number of boosting rounds: 1399


[32m[I 2023-03-10 03:17:11,375][0m Trial 10 finished with value: 907.9779176776975 and parameters: {'eval_metric': 'rmse', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 2300, 'learning_rate': 0.17619894723852647, 'subsample': 0.85, 'colsample_bytree': 0.8700000000000001, 'max_depth': 1, 'gamma': 6.7, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 50.4707530950398, 'reg_alpha': 3.01179792732117e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.14774523065803574}. Best is trial 4 with value: 578.0545102605746.[0m


Number of boosting rounds: 2299


[32m[I 2023-03-10 03:17:29,773][0m Trial 11 finished with value: 578.3965848160024 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 4600, 'learning_rate': 0.18954426225816415, 'subsample': 0.75, 'colsample_bytree': 0.54, 'max_depth': 6, 'gamma': 62.2, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.28434718976466505, 'reg_alpha': 0.00265812751306177, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 23.421901112615302}. Best is trial 4 with value: 578.0545102605746.[0m


Number of boosting rounds: 297


[32m[I 2023-03-10 03:17:54,391][0m Trial 12 finished with value: 576.134023347808 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 5000, 'learning_rate': 0.1874584918010829, 'subsample': 0.72, 'colsample_bytree': 0.4, 'max_depth': 7, 'gamma': 62.0, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.06439186842552587, 'reg_alpha': 1.7305876706839657e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 53.940621955554384}. Best is trial 12 with value: 576.134023347808.[0m


Number of boosting rounds: 200


[32m[I 2023-03-10 03:18:22,119][0m Trial 13 finished with value: 582.5890942765091 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 5000, 'learning_rate': 0.1941878581315089, 'subsample': 0.67, 'colsample_bytree': 0.37, 'max_depth': 8, 'gamma': 68.10000000000001, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.031946895620813, 'reg_alpha': 1.5451992720931906e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 91.44010877428948}. Best is trial 12 with value: 576.134023347808.[0m


Number of boosting rounds: 384


[32m[I 2023-03-10 03:18:26,164][0m Trial 14 finished with value: 963.9685081283988 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 2700, 'learning_rate': 0.24304275455004112, 'subsample': 0.99, 'colsample_bytree': 0.7300000000000001, 'max_depth': 1, 'gamma': 97.30000000000001, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 53.91354093357375, 'reg_alpha': 1.1279391274618044e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 4.843761302571517}. Best is trial 12 with value: 576.134023347808.[0m


Number of boosting rounds: 114


[32m[I 2023-03-10 03:19:24,586][0m Trial 15 finished with value: 581.1759583425589 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 5000, 'learning_rate': 0.12660073188652085, 'subsample': 0.64, 'colsample_bytree': 0.36, 'max_depth': 8, 'gamma': 69.8, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.022506832929213218, 'reg_alpha': 1.3464494531816523e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 1.2821785933390761}. Best is trial 12 with value: 576.134023347808.[0m


Number of boosting rounds: 160


[32m[I 2023-03-10 03:32:56,590][0m Trial 16 finished with value: 595.7099985600242 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 4100, 'learning_rate': 0.09772437426994433, 'subsample': 0.83, 'colsample_bytree': 0.36, 'max_depth': 20, 'gamma': 75.3, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 6.003682086976881, 'reg_alpha': 2.2164251213459298e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 7.066206413085981}. Best is trial 12 with value: 576.134023347808.[0m


Number of boosting rounds: 140


[32m[I 2023-03-10 03:51:25,066][0m Trial 17 finished with value: 606.725291477452 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 2900, 'learning_rate': 0.02815981840170122, 'subsample': 0.67, 'colsample_bytree': 0.68, 'max_depth': 16, 'gamma': 16.5, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.021895262790167357, 'reg_alpha': 2.1533180638330874e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 0.9268561422332516}. Best is trial 12 with value: 576.134023347808.[0m


Number of boosting rounds: 185


[32m[I 2023-03-10 03:51:33,933][0m Trial 18 finished with value: 587.5230402326401 and parameters: {'eval_metric': 'rmse', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 4600, 'learning_rate': 0.24607984512217448, 'subsample': 0.59, 'colsample_bytree': 0.45, 'max_depth': 3, 'gamma': 53.1, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 4.626410429427926, 'reg_alpha': 6.994357472196623e-05, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 137.182533931613}. Best is trial 12 with value: 576.134023347808.[0m


Number of boosting rounds: 2016


[32m[I 2023-03-10 03:51:53,212][0m Trial 19 finished with value: 576.1760564150361 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3500, 'learning_rate': 0.07541627921947658, 'subsample': 0.96, 'colsample_bytree': 0.27, 'max_depth': 7, 'gamma': 76.10000000000001, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.005388274989609351, 'reg_alpha': 1.091442430983454e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 9.959346532302192}. Best is trial 12 with value: 576.134023347808.[0m


Number of boosting rounds: 1105


[32m[I 2023-03-10 03:52:09,312][0m Trial 20 finished with value: 579.2827234453364 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 2400, 'learning_rate': 0.14114457862096377, 'subsample': 0.99, 'colsample_bytree': 0.27, 'max_depth': 8, 'gamma': 74.0, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 1.740500607818068e-05, 'reg_alpha': 2.3239621490060868e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 13.07476952160792}. Best is trial 12 with value: 576.134023347808.[0m


Number of boosting rounds: 386


[32m[I 2023-03-10 03:52:30,190][0m Trial 21 finished with value: 573.4349262701497 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3400, 'learning_rate': 0.07701220723171703, 'subsample': 0.86, 'colsample_bytree': 0.41, 'max_depth': 7, 'gamma': 99.60000000000001, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.005703793079379129, 'reg_alpha': 1.1446734976615705e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 7.6682871998800515}. Best is trial 21 with value: 573.4349262701497.[0m


Number of boosting rounds: 545


[32m[I 2023-03-10 03:52:56,235][0m Trial 22 finished with value: 580.4535556323713 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3400, 'learning_rate': 0.08236406999314436, 'subsample': 0.87, 'colsample_bytree': 0.26, 'max_depth': 8, 'gamma': 77.0, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.00428378871441047, 'reg_alpha': 1.1758528421028888e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 7.894206564111571}. Best is trial 21 with value: 573.4349262701497.[0m


Number of boosting rounds: 798


[32m[I 2023-03-10 03:53:00,275][0m Trial 23 finished with value: 580.5092054370672 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 2000, 'learning_rate': 0.07554288278107171, 'subsample': 0.92, 'colsample_bytree': 0.33999999999999997, 'max_depth': 3, 'gamma': 63.400000000000006, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.0044890774040941, 'reg_alpha': 6.281120611209907e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 45.720737299361055}. Best is trial 21 with value: 573.4349262701497.[0m


Number of boosting rounds: 1994


[32m[I 2023-03-10 03:53:18,916][0m Trial 24 finished with value: 577.3913251172947 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3000, 'learning_rate': 0.11926998827759015, 'subsample': 0.75, 'colsample_bytree': 0.65, 'max_depth': 7, 'gamma': 99.7, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.0991180184666747, 'reg_alpha': 6.689570484333844e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 9.706208242647524}. Best is trial 21 with value: 573.4349262701497.[0m


Number of boosting rounds: 355


[32m[I 2023-03-10 03:54:02,264][0m Trial 25 finished with value: 579.7746213480914 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3300, 'learning_rate': 0.14803179383324266, 'subsample': 0.77, 'colsample_bytree': 0.42, 'max_depth': 11, 'gamma': 59.400000000000006, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.005156865674150792, 'reg_alpha': 1.0910964859640747e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 45.305511080936355}. Best is trial 21 with value: 573.4349262701497.[0m


Number of boosting rounds: 113


[32m[I 2023-03-10 03:54:05,650][0m Trial 26 finished with value: 912.6476192614176 and parameters: {'eval_metric': 'rmse', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 1000, 'learning_rate': 0.09155301330742681, 'subsample': 0.92, 'colsample_bytree': 0.05, 'max_depth': 9, 'gamma': 92.4, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.15800267284674285, 'reg_alpha': 7.566413796399177e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 16.51352688939014}. Best is trial 21 with value: 573.4349262701497.[0m


Number of boosting rounds: 999


[32m[I 2023-03-10 03:58:49,469][0m Trial 27 finished with value: 606.2120438149793 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3700, 'learning_rate': 0.07698598691991783, 'subsample': 0.79, 'colsample_bytree': 0.3, 'max_depth': 15, 'gamma': 46.1, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 4.670179170133207e-05, 'reg_alpha': 7.21590891847967e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 6.175927165588413}. Best is trial 21 with value: 573.4349262701497.[0m


Number of boosting rounds: 352


[32m[I 2023-03-10 03:58:55,824][0m Trial 28 finished with value: 578.342486488544 and parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 2700, 'learning_rate': 0.11066705863820196, 'subsample': 0.91, 'colsample_bytree': 0.22000000000000003, 'max_depth': 4, 'gamma': 78.30000000000001, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.0026419619633284027, 'reg_alpha': 6.437706180505102e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 78.7123480171194}. Best is trial 21 with value: 573.4349262701497.[0m


Number of boosting rounds: 2699


[32m[I 2023-03-10 03:59:26,109][0m Trial 29 finished with value: 581.536361153118 and parameters: {'eval_metric': 'rmse', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 4400, 'learning_rate': 0.011172627768170313, 'subsample': 0.73, 'colsample_bytree': 0.6200000000000001, 'max_depth': 10, 'gamma': 37.4, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.07075765212294023, 'reg_alpha': 7.341489082753956e-07, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 427.9268387003987}. Best is trial 21 with value: 573.4349262701497.[0m
[32m[I 2023-03-10 03:59:26,126][0m A new study created in memory with name: no-name-fe4ff815-a20d-4bdf-bcfd-2fbed2a8540f[0m


Number of boosting rounds: 4399
Number of finished trials: 30
Best XGB trial parameters: {'eval_metric': 'mae', 'objective': 'reg:squarederror', 'use_label_encoder': False, 'n_estimators': 3400, 'learning_rate': 0.07701220723171703, 'subsample': 0.86, 'colsample_bytree': 0.41, 'max_depth': 7, 'gamma': 99.60000000000001, 'booster': 'gbtree', 'tree_method': 'gpu_hist', 'reg_lambda': 0.005703793079379129, 'reg_alpha': 1.1446734976615705e-08, 'random_state': 42, 'n_jobs': 4, 'min_child_weight': 7.6682871998800515}
Best score: 573.4349262701497
Training until validation scores don't improve for 500 rounds
[500]	training's l1: 283.325	valid_1's l1: 302.708
Did not meet early stopping. Best iteration is:
[800]	training's l1: 278.057	valid_1's l1: 301.581


[32m[I 2023-03-10 04:00:08,481][0m Trial 0 finished with value: 610.1800501901016 and parameters: {'objective': 'mae', 'n_estimators': 800, 'reg_alpha': 5.222870102314783, 'reg_lambda': 1.9068385257322892e-06, 'colsample_bytree': 0.060000000000000005, 'num_leaves': 645, 'feature_fraction': 0.5352322318264991, 'bagging_fraction': 0.40778374052765687, 'bagging_freq': 2, 'min_child_samples': 299, 'subsample': 0.76, 'learning_rate': 0.097857619457139, 'max_depth': 63, 'random_state': 42, 'n_jobs': 4}. Best is trial 0 with value: 610.1800501901016.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 279.579	training's rmse: 541.371	valid_1's l1: 301.893	valid_1's rmse: 586.307
[1000]	training's l1: 272.734	training's rmse: 522.826	valid_1's l1: 302.025	valid_1's rmse: 586.303
Early stopping, best iteration is:
[785]	training's l1: 275.45	training's rmse: 530.099	valid_1's l1: 301.756	valid_1's rmse: 585.349


[32m[I 2023-03-10 04:01:00,408][0m Trial 1 finished with value: 585.3486869902777 and parameters: {'objective': 'rmse', 'n_estimators': 3025, 'reg_alpha': 0.9185262938626741, 'reg_lambda': 1.3350764528696037e-05, 'colsample_bytree': 0.060000000000000005, 'num_leaves': 862, 'feature_fraction': 0.355357721874337, 'bagging_fraction': 0.6000420540459005, 'bagging_freq': 1, 'min_child_samples': 136, 'subsample': 0.32, 'learning_rate': 0.055305047038007316, 'max_depth': 14, 'random_state': 42, 'n_jobs': 4}. Best is trial 1 with value: 585.3486869902777.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 226.97	training's rmse: 401.237	valid_1's l1: 302.484	valid_1's rmse: 598.604
Early stopping, best iteration is:
[110]	training's l1: 269.849	training's rmse: 505.443	valid_1's l1: 298.101	valid_1's rmse: 577.248


[32m[I 2023-03-10 04:01:48,175][0m Trial 2 finished with value: 577.248161870919 and parameters: {'objective': 'rmse', 'n_estimators': 1611, 'reg_alpha': 0.00017984676786724502, 'reg_lambda': 6.417273148257477e-07, 'colsample_bytree': 0.68, 'num_leaves': 449, 'feature_fraction': 0.8915913195564961, 'bagging_fraction': 0.5438016925873219, 'bagging_freq': 3, 'min_child_samples': 24, 'subsample': 0.9, 'learning_rate': 0.0462972157091811, 'max_depth': 27, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 307.232	training's rmse: 594.08	valid_1's l1: 312.444	valid_1's rmse: 603.719
[1000]	training's l1: 295.5	training's rmse: 571.78	valid_1's l1: 302.035	valid_1's rmse: 583.722
[1500]	training's l1: 292.672	training's rmse: 565.206	valid_1's l1: 300.643	valid_1's rmse: 580.816
[2000]	training's l1: 291.011	training's rmse: 560.975	valid_1's l1: 300.169	valid_1's rmse: 580.172
[2500]	training's l1: 290.004	training's rmse: 557.83	valid_1's l1: 299.998	valid_1's rmse: 579.725
Did not meet early stopping. Best iteration is:
[2516]	training's l1: 289.987	training's rmse: 557.675	valid_1's l1: 300.043	valid_1's rmse: 579.816


[32m[I 2023-03-10 04:02:40,675][0m Trial 3 finished with value: 579.8162453520144 and parameters: {'objective': 'rmse', 'n_estimators': 2516, 'reg_alpha': 0.002695679882826887, 'reg_lambda': 0.006291379242099273, 'colsample_bytree': 0.8600000000000001, 'num_leaves': 532, 'feature_fraction': 0.4966002333958688, 'bagging_fraction': 0.28778221268651516, 'bagging_freq': 14, 'min_child_samples': 58, 'subsample': 0.83, 'learning_rate': 0.01214995561143258, 'max_depth': 7, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 282.156	valid_1's l1: 304.986
[1000]	training's l1: 271.764	valid_1's l1: 302.707
[1500]	training's l1: 266.38	valid_1's l1: 302.271
Did not meet early stopping. Best iteration is:
[1917]	training's l1: 263.188	valid_1's l1: 301.87


[32m[I 2023-03-10 04:04:17,689][0m Trial 4 finished with value: 597.7114844567701 and parameters: {'objective': 'mae', 'n_estimators': 1917, 'reg_alpha': 0.014675768181173322, 'reg_lambda': 1.8177255713524397e-06, 'colsample_bytree': 0.14, 'num_leaves': 236, 'feature_fraction': 0.20448207867214613, 'bagging_fraction': 0.5592822698952267, 'bagging_freq': 8, 'min_child_samples': 237, 'subsample': 0.13, 'learning_rate': 0.17101301536117924, 'max_depth': 44, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 268.239	training's rmse: 526.895	valid_1's l1: 299.323	valid_1's rmse: 584.719
Early stopping, best iteration is:
[258]	training's l1: 277.095	training's rmse: 547.407	valid_1's l1: 298.217	valid_1's rmse: 585.235


[32m[I 2023-03-10 04:05:20,829][0m Trial 5 finished with value: 585.2351942989777 and parameters: {'objective': 'rmse', 'n_estimators': 4544, 'reg_alpha': 0.00011312528338952662, 'reg_lambda': 3.96548660172774e-08, 'colsample_bytree': 0.22000000000000003, 'num_leaves': 747, 'feature_fraction': 0.6205380563591816, 'bagging_fraction': 0.23907172501752924, 'bagging_freq': 1, 'min_child_samples': 57, 'subsample': 0.91, 'learning_rate': 0.04734307585597945, 'max_depth': 32, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 252.273	training's rmse: 459.96	valid_1's l1: 295.939	valid_1's rmse: 578.73
Early stopping, best iteration is:
[395]	training's l1: 259.452	training's rmse: 478.194	valid_1's l1: 296.012	valid_1's rmse: 577.429


[32m[I 2023-03-10 04:06:37,957][0m Trial 6 finished with value: 577.4290338828614 and parameters: {'objective': 'rmse', 'n_estimators': 2890, 'reg_alpha': 2.2394079840406564, 'reg_lambda': 0.059032779206919794, 'colsample_bytree': 0.46, 'num_leaves': 452, 'feature_fraction': 0.6698035657456157, 'bagging_fraction': 0.7432353208503566, 'bagging_freq': 2, 'min_child_samples': 23, 'subsample': 0.88, 'learning_rate': 0.017270560286700248, 'max_depth': 40, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 227.171	valid_1's l1: 293.559
Early stopping, best iteration is:
[150]	training's l1: 262.543	valid_1's l1: 290.633


[32m[I 2023-03-10 04:07:20,212][0m Trial 7 finished with value: 578.0672774112347 and parameters: {'objective': 'mae', 'n_estimators': 4927, 'reg_alpha': 2.4901693134852595e-06, 'reg_lambda': 3.3068146653342474, 'colsample_bytree': 0.8400000000000001, 'num_leaves': 318, 'feature_fraction': 0.9066891329150719, 'bagging_fraction': 0.4500256915393296, 'bagging_freq': 6, 'min_child_samples': 12, 'subsample': 0.53, 'learning_rate': 0.09177460546032865, 'max_depth': 47, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 419.586	valid_1's l1: 430.831
Did not meet early stopping. Best iteration is:
[786]	training's l1: 349.098	valid_1's l1: 363.269


[32m[I 2023-03-10 04:08:18,991][0m Trial 8 finished with value: 706.3550621488447 and parameters: {'objective': 'mae', 'n_estimators': 786, 'reg_alpha': 9.321137533891373e-05, 'reg_lambda': 0.00027923662583012696, 'colsample_bytree': 0.89, 'num_leaves': 853, 'feature_fraction': 0.1878535012938784, 'bagging_fraction': 0.9310009763168541, 'bagging_freq': 13, 'min_child_samples': 185, 'subsample': 0.95, 'learning_rate': 0.013817850312304106, 'max_depth': 21, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 276.308	valid_1's l1: 294.285
[1000]	training's l1: 266.808	valid_1's l1: 293.623
[1500]	training's l1: 260.758	valid_1's l1: 293.645
Early stopping, best iteration is:
[1269]	training's l1: 263.28	valid_1's l1: 293.469


[32m[I 2023-03-10 04:10:41,640][0m Trial 9 finished with value: 595.5773905204687 and parameters: {'objective': 'mae', 'n_estimators': 3588, 'reg_alpha': 0.0844389066148225, 'reg_lambda': 3.7718125560022013, 'colsample_bytree': 0.6900000000000001, 'num_leaves': 294, 'feature_fraction': 0.9263248485981144, 'bagging_fraction': 0.5111227226252406, 'bagging_freq': 3, 'min_child_samples': 168, 'subsample': 0.42000000000000004, 'learning_rate': 0.02556061928092162, 'max_depth': 47, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 302.506	training's rmse: 574.162	valid_1's l1: 315.963	valid_1's rmse: 609.171
Early stopping, best iteration is:
[240]	training's l1: 305.973	training's rmse: 582.787	valid_1's l1: 313.637	valid_1's rmse: 602.92


[32m[I 2023-03-10 04:10:57,162][0m Trial 10 finished with value: 602.9203106295739 and parameters: {'objective': 'rmse', 'n_estimators': 1688, 'reg_alpha': 7.82525078553057e-08, 'reg_lambda': 1.3927723504910945e-08, 'colsample_bytree': 0.53, 'num_leaves': 26, 'feature_fraction': 0.9866554830223505, 'bagging_fraction': 0.1604196543079337, 'bagging_freq': 7, 'min_child_samples': 106, 'subsample': 0.64, 'learning_rate': 0.2278137426597049, 'max_depth': 95, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 229.231	training's rmse: 393.671	valid_1's l1: 297.077	valid_1's rmse: 584.156
Early stopping, best iteration is:
[240]	training's l1: 256.353	training's rmse: 457.525	valid_1's l1: 297.499	valid_1's rmse: 577.992


[32m[I 2023-03-10 04:11:52,377][0m Trial 11 finished with value: 577.991810000909 and parameters: {'objective': 'rmse', 'n_estimators': 3641, 'reg_alpha': 0.23728314026817246, 'reg_lambda': 0.009976862070996188, 'colsample_bytree': 0.41, 'num_leaves': 457, 'feature_fraction': 0.7312949091682852, 'bagging_fraction': 0.7071076078635398, 'bagging_freq': 4, 'min_child_samples': 7, 'subsample': 0.67, 'learning_rate': 0.024011610399501154, 'max_depth': 72, 'random_state': 42, 'n_jobs': 4}. Best is trial 2 with value: 577.248161870919.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 273.444	training's rmse: 524.937	valid_1's l1: 295.938	valid_1's rmse: 575.84
[1000]	training's l1: 261.016	training's rmse: 500.255	valid_1's l1: 292.792	valid_1's rmse: 574.07
Early stopping, best iteration is:
[778]	training's l1: 262.711	training's rmse: 505.477	valid_1's l1: 292.737	valid_1's rmse: 573.57


[32m[I 2023-03-10 04:13:14,825][0m Trial 12 finished with value: 573.5699366535382 and parameters: {'objective': 'rmse', 'n_estimators': 1920, 'reg_alpha': 8.925460392245258, 'reg_lambda': 0.04159071849947123, 'colsample_bytree': 0.5, 'num_leaves': 492, 'feature_fraction': 0.8002599860483741, 'bagging_fraction': 0.7274584728092232, 'bagging_freq': 0, 'min_child_samples': 68, 'subsample': 0.99, 'learning_rate': 0.010050853525499873, 'max_depth': 25, 'random_state': 42, 'n_jobs': 4}. Best is trial 12 with value: 573.5699366535382.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 270.412	training's rmse: 525.633	valid_1's l1: 294.878	valid_1's rmse: 575.538
[1000]	training's l1: 259.267	training's rmse: 501.968	valid_1's l1: 292.682	valid_1's rmse: 574.438
Early stopping, best iteration is:
[715]	training's l1: 261.771	training's rmse: 509.744	valid_1's l1: 292.604	valid_1's rmse: 573.806


[32m[I 2023-03-10 04:14:47,093][0m Trial 13 finished with value: 573.8059645890795 and parameters: {'objective': 'rmse', 'n_estimators': 1675, 'reg_alpha': 0.0016991986206591346, 'reg_lambda': 0.00012663407129597916, 'colsample_bytree': 0.64, 'num_leaves': 630, 'feature_fraction': 0.8367988398614714, 'bagging_fraction': 0.7921834397618899, 'bagging_freq': 0, 'min_child_samples': 83, 'subsample': 1.0, 'learning_rate': 0.01060550599803384, 'max_depth': 29, 'random_state': 42, 'n_jobs': 4}. Best is trial 12 with value: 573.5699366535382.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 454.174	training's rmse: 816.617	valid_1's l1: 456.83	valid_1's rmse: 820.393
[1000]	training's l1: 374.896	training's rmse: 689.446	valid_1's l1: 379.11	valid_1's rmse: 695.663
[1500]	training's l1: 354.622	training's rmse: 651.871	valid_1's l1: 358.971	valid_1's rmse: 657.721
[2000]	training's l1: 343.814	training's rmse: 633.843	valid_1's l1: 348.168	valid_1's rmse: 639.424
Did not meet early stopping. Best iteration is:
[2097]	training's l1: 342.353	training's rmse: 631.654	valid_1's l1: 346.753	valid_1's rmse: 637.254


[32m[I 2023-03-10 04:15:11,632][0m Trial 14 finished with value: 637.2541909224542 and parameters: {'objective': 'rmse', 'n_estimators': 2098, 'reg_alpha': 7.032476164734587, 'reg_lambda': 0.00016449218978079982, 'colsample_bytree': 0.6100000000000001, 'num_leaves': 975, 'feature_fraction': 0.8210905617033171, 'bagging_fraction': 0.8546869775926317, 'bagging_freq': 0, 'min_child_samples': 110, 'subsample': 0.75, 'learning_rate': 0.010780261776947344, 'max_depth': 3, 'random_state': 42, 'n_jobs': 4}. Best is trial 12 with value: 573.5699366535382.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 255.408	training's rmse: 493.615	valid_1's l1: 293.508	valid_1's rmse: 574.674
Early stopping, best iteration is:
[411]	training's l1: 259.539	training's rmse: 503.416	valid_1's l1: 293.359	valid_1's rmse: 574.229


[32m[I 2023-03-10 04:17:13,840][0m Trial 15 finished with value: 574.2289966651664 and parameters: {'objective': 'rmse', 'n_estimators': 1367, 'reg_alpha': 0.018516625084192058, 'reg_lambda': 0.0025119406488414786, 'colsample_bytree': 0.36, 'num_leaves': 660, 'feature_fraction': 0.7751913218896728, 'bagging_fraction': 0.9869617147179042, 'bagging_freq': 11, 'min_child_samples': 76, 'subsample': 0.99, 'learning_rate': 0.01786862917212392, 'max_depth': 62, 'random_state': 42, 'n_jobs': 4}. Best is trial 12 with value: 573.5699366535382.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 276.354	training's rmse: 544.347	valid_1's l1: 295.144	valid_1's rmse: 579.128
[1000]	training's l1: 264.742	training's rmse: 519.102	valid_1's l1: 292.77	valid_1's rmse: 574.623
Early stopping, best iteration is:
[851]	training's l1: 266.61	training's rmse: 524.241	valid_1's l1: 292.617	valid_1's rmse: 574.476


[32m[I 2023-03-10 04:19:27,826][0m Trial 16 finished with value: 574.4755156479529 and parameters: {'objective': 'rmse', 'n_estimators': 2362, 'reg_alpha': 0.0018626849994770097, 'reg_lambda': 0.12735243855241876, 'colsample_bytree': 0.28, 'num_leaves': 626, 'feature_fraction': 0.7674961985352846, 'bagging_fraction': 0.7920652192430024, 'bagging_freq': 5, 'min_child_samples': 105, 'subsample': 1.0, 'learning_rate': 0.010853602252379703, 'max_depth': 29, 'random_state': 42, 'n_jobs': 4}. Best is trial 12 with value: 573.5699366535382.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 287.28	training's rmse: 551.213	valid_1's l1: 298.682	valid_1's rmse: 576.99
[1000]	training's l1: 276.942	training's rmse: 531.465	valid_1's l1: 294.798	valid_1's rmse: 574.449
Early stopping, best iteration is:
[830]	training's l1: 278.635	training's rmse: 535.875	valid_1's l1: 294.902	valid_1's rmse: 573.891


[32m[I 2023-03-10 04:20:35,774][0m Trial 17 finished with value: 573.8914512676984 and parameters: {'objective': 'rmse', 'n_estimators': 1364, 'reg_alpha': 0.2102627052551674, 'reg_lambda': 0.0001273467570118969, 'colsample_bytree': 0.9800000000000001, 'num_leaves': 174, 'feature_fraction': 0.9914419965832135, 'bagging_fraction': 0.67382824837449, 'bagging_freq': 10, 'min_child_samples': 75, 'subsample': 0.8, 'learning_rate': 0.010349926254376833, 'max_depth': 17, 'random_state': 42, 'n_jobs': 4}. Best is trial 12 with value: 573.5699366535382.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 267.501	training's rmse: 524.827	valid_1's l1: 293.467	valid_1's rmse: 574.753
Early stopping, best iteration is:
[466]	training's l1: 268.091	training's rmse: 526.51	valid_1's l1: 293.427	valid_1's rmse: 574.671


[32m[I 2023-03-10 04:21:37,855][0m Trial 18 finished with value: 574.6711294303051 and parameters: {'objective': 'rmse', 'n_estimators': 1108, 'reg_alpha': 1.1203839607782128e-05, 'reg_lambda': 2.875312424483345e-05, 'colsample_bytree': 0.5700000000000001, 'num_leaves': 575, 'feature_fraction': 0.6584752917812849, 'bagging_fraction': 0.8634536775316342, 'bagging_freq': 0, 'min_child_samples': 148, 'subsample': 0.59, 'learning_rate': 0.023603207409696257, 'max_depth': 35, 'random_state': 42, 'n_jobs': 4}. Best is trial 12 with value: 573.5699366535382.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 282.187	training's rmse: 562.853	valid_1's l1: 296.486	valid_1's rmse: 589.008
[1000]	training's l1: 273.334	training's rmse: 544.102	valid_1's l1: 296.027	valid_1's rmse: 584.707
Early stopping, best iteration is:
[832]	training's l1: 275.767	training's rmse: 549.432	valid_1's l1: 295.905	valid_1's rmse: 585.425


[32m[I 2023-03-10 04:24:10,466][0m Trial 19 finished with value: 585.4249973254223 and parameters: {'objective': 'rmse', 'n_estimators': 3224, 'reg_alpha': 0.6693379251193204, 'reg_lambda': 0.0014420341399687632, 'colsample_bytree': 0.7400000000000001, 'num_leaves': 774, 'feature_fraction': 0.8459815747000572, 'bagging_fraction': 0.6527463461999577, 'bagging_freq': 8, 'min_child_samples': 201, 'subsample': 0.45999999999999996, 'learning_rate': 0.01664264000019769, 'max_depth': 85, 'random_state': 42, 'n_jobs': 4}. Best is trial 12 with value: 573.5699366535382.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 278.371	training's rmse: 541.072	valid_1's l1: 293.336	valid_1's rmse: 571.474
[1000]	training's l1: 273.426	training's rmse: 526.007	valid_1's l1: 293.074	valid_1's rmse: 571.87
Early stopping, best iteration is:
[612]	training's l1: 276.857	training's rmse: 536.707	valid_1's l1: 293.035	valid_1's rmse: 571.018


[32m[I 2023-03-10 04:24:58,647][0m Trial 20 finished with value: 571.0176259512263 and parameters: {'objective': 'rmse', 'n_estimators': 2471, 'reg_alpha': 0.03799027760623744, 'reg_lambda': 0.13858303376079512, 'colsample_bytree': 0.48, 'num_leaves': 369, 'feature_fraction': 0.7317990147282567, 'bagging_fraction': 0.7817700320980545, 'bagging_freq': 5, 'min_child_samples': 48, 'subsample': 0.6799999999999999, 'learning_rate': 0.015222000529148337, 'max_depth': 10, 'random_state': 42, 'n_jobs': 4}. Best is trial 20 with value: 571.0176259512263.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 278.297	training's rmse: 541.245	valid_1's l1: 292.944	valid_1's rmse: 570.994
[1000]	training's l1: 273.245	training's rmse: 525.771	valid_1's l1: 292.679	valid_1's rmse: 571.535
Early stopping, best iteration is:
[562]	training's l1: 277.541	training's rmse: 539.178	valid_1's l1: 292.683	valid_1's rmse: 570.577


[32m[I 2023-03-10 04:25:45,863][0m Trial 21 finished with value: 570.5765990241852 and parameters: {'objective': 'rmse', 'n_estimators': 2455, 'reg_alpha': 9.94552434220875, 'reg_lambda': 0.29343449822461276, 'colsample_bytree': 0.49, 'num_leaves': 357, 'feature_fraction': 0.8120251736442161, 'bagging_fraction': 0.7940228434992287, 'bagging_freq': 5, 'min_child_samples': 46, 'subsample': 0.71, 'learning_rate': 0.01441490776807522, 'max_depth': 10, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 570.5765990241852.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 282.596	training's rmse: 547.855	valid_1's l1: 294.672	valid_1's rmse: 571.878
[1000]	training's l1: 277.234	training's rmse: 532.438	valid_1's l1: 293.844	valid_1's rmse: 571.461
Early stopping, best iteration is:
[631]	training's l1: 280.545	training's rmse: 542.345	valid_1's l1: 294.037	valid_1's rmse: 570.867


[32m[I 2023-03-10 04:26:28,643][0m Trial 22 finished with value: 570.8671320473082 and parameters: {'objective': 'rmse', 'n_estimators': 2522, 'reg_alpha': 9.185866561068051, 'reg_lambda': 0.4106934145111453, 'colsample_bytree': 0.48, 'num_leaves': 336, 'feature_fraction': 0.7254775358840269, 'bagging_fraction': 0.7563496376133395, 'bagging_freq': 5, 'min_child_samples': 44, 'subsample': 0.7, 'learning_rate': 0.015093325109687304, 'max_depth': 9, 'random_state': 42, 'n_jobs': 4}. Best is trial 21 with value: 570.5765990241852.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 274.654	training's rmse: 533.053	valid_1's l1: 292.518	valid_1's rmse: 570.324
[1000]	training's l1: 269.193	training's rmse: 516.264	valid_1's l1: 292.43	valid_1's rmse: 570.981
Early stopping, best iteration is:
[683]	training's l1: 272.48	training's rmse: 526.486	valid_1's l1: 292.257	valid_1's rmse: 570.029


[32m[I 2023-03-10 04:27:25,461][0m Trial 23 finished with value: 570.0292723996309 and parameters: {'objective': 'rmse', 'n_estimators': 2409, 'reg_alpha': 1.0629024047409223, 'reg_lambda': 0.7767503938596835, 'colsample_bytree': 0.31, 'num_leaves': 363, 'feature_fraction': 0.7234332443227668, 'bagging_fraction': 0.8772040134478133, 'bagging_freq': 5, 'min_child_samples': 43, 'subsample': 0.71, 'learning_rate': 0.015226580462754451, 'max_depth': 11, 'random_state': 42, 'n_jobs': 4}. Best is trial 23 with value: 570.0292723996309.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 374.702	training's rmse: 690.13	valid_1's l1: 378.626	valid_1's rmse: 695.874
[1000]	training's l1: 338.868	training's rmse: 626.917	valid_1's l1: 342.991	valid_1's rmse: 631.789
[1500]	training's l1: 327.091	training's rmse: 607.467	valid_1's l1: 331.622	valid_1's rmse: 613.6
[2000]	training's l1: 321.644	training's rmse: 598.079	valid_1's l1: 326.628	valid_1's rmse: 606.121
[2500]	training's l1: 317.893	training's rmse: 591.324	valid_1's l1: 323.331	valid_1's rmse: 601.047
[3000]	training's l1: 316.006	training's rmse: 587.87	valid_1's l1: 322.104	valid_1's rmse: 599.556
Did not meet early stopping. Best iteration is:
[3431]	training's l1: 314.824	training's rmse: 585.532	valid_1's l1: 321.358	valid_1's rmse: 598.593


[32m[I 2023-03-10 04:28:13,029][0m Trial 24 finished with value: 598.5931424003682 and parameters: {'objective': 'rmse', 'n_estimators': 3432, 'reg_alpha': 1.0112023755866533, 'reg_lambda': 0.8021977399546524, 'colsample_bytree': 0.3, 'num_leaves': 138, 'feature_fraction': 0.606192842997636, 'bagging_fraction': 0.8770395066821992, 'bagging_freq': 6, 'min_child_samples': 32, 'subsample': 0.7, 'learning_rate': 0.021135275731562285, 'max_depth': 3, 'random_state': 42, 'n_jobs': 4}. Best is trial 23 with value: 570.0292723996309.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 232.282	training's rmse: 402.091	valid_1's l1: 294.352	valid_1's rmse: 577.919
Early stopping, best iteration is:
[213]	training's l1: 261.067	training's rmse: 469.001	valid_1's l1: 296.274	valid_1's rmse: 574.209


[32m[I 2023-03-10 04:28:59,540][0m Trial 25 finished with value: 574.2089074666535 and parameters: {'objective': 'rmse', 'n_estimators': 4074, 'reg_alpha': 9.759407188061033, 'reg_lambda': 1.1172858938510666, 'colsample_bytree': 0.38, 'num_leaves': 371, 'feature_fraction': 0.699560119739213, 'bagging_fraction': 0.9320206449093876, 'bagging_freq': 9, 'min_child_samples': 1, 'subsample': 0.5700000000000001, 'learning_rate': 0.02829853009234547, 'max_depth': 17, 'random_state': 42, 'n_jobs': 4}. Best is trial 23 with value: 570.0292723996309.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 688.178	valid_1's l1: 691.487
[1000]	training's l1: 611.355	valid_1's l1: 615.757
[1500]	training's l1: 578.286	valid_1's l1: 582.862
[2000]	training's l1: 560.608	valid_1's l1: 565.13
Did not meet early stopping. Best iteration is:
[2212]	training's l1: 554.758	valid_1's l1: 559.323


[32m[I 2023-03-10 04:29:23,351][0m Trial 26 finished with value: 1074.0276911728529 and parameters: {'objective': 'mae', 'n_estimators': 2212, 'reg_alpha': 1.6220538572109424, 'reg_lambda': 6.658726877290712, 'colsample_bytree': 0.22000000000000003, 'num_leaves': 90, 'feature_fraction': 0.7158120439575003, 'bagging_fraction': 0.814992357213612, 'bagging_freq': 4, 'min_child_samples': 125, 'subsample': 0.5, 'learning_rate': 0.014187821449488906, 'max_depth': 1, 'random_state': 42, 'n_jobs': 4}. Best is trial 23 with value: 570.0292723996309.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 272.689	training's rmse: 520.751	valid_1's l1: 293.744	valid_1's rmse: 572.145
Early stopping, best iteration is:
[395]	training's l1: 274.534	training's rmse: 525.502	valid_1's l1: 293.965	valid_1's rmse: 571.82


[32m[I 2023-03-10 04:29:57,677][0m Trial 27 finished with value: 571.8201112739556 and parameters: {'objective': 'rmse', 'n_estimators': 2730, 'reg_alpha': 0.3076438906003267, 'reg_lambda': 0.375640194461802, 'colsample_bytree': 0.31, 'num_leaves': 236, 'feature_fraction': 0.578317154480727, 'bagging_fraction': 0.98754452727254, 'bagging_freq': 6, 'min_child_samples': 36, 'subsample': 0.61, 'learning_rate': 0.03189702269077777, 'max_depth': 10, 'random_state': 42, 'n_jobs': 4}. Best is trial 23 with value: 570.0292723996309.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 259.373	training's rmse: 487.097	valid_1's l1: 294.541	valid_1's rmse: 574.92
Early stopping, best iteration is:
[351]	training's l1: 268.201	training's rmse: 508.905	valid_1's l1: 294.578	valid_1's rmse: 572.968


[32m[I 2023-03-10 04:31:07,893][0m Trial 28 finished with value: 572.9677571014942 and parameters: {'objective': 'rmse', 'n_estimators': 2721, 'reg_alpha': 2.222430640946913, 'reg_lambda': 0.3702129218150611, 'colsample_bytree': 0.44, 'num_leaves': 379, 'feature_fraction': 0.766119584724596, 'bagging_fraction': 0.7040586127888544, 'bagging_freq': 7, 'min_child_samples': 39, 'subsample': 0.37, 'learning_rate': 0.018180417461531043, 'max_depth': 56, 'random_state': 42, 'n_jobs': 4}. Best is trial 23 with value: 570.0292723996309.[0m


Training until validation scores don't improve for 500 rounds
[500]	training's l1: 296.39	valid_1's l1: 304.625
[1000]	training's l1: 281.356	valid_1's l1: 293.66
[1500]	training's l1: 276.727	valid_1's l1: 292.44
[2000]	training's l1: 273.549	valid_1's l1: 292.002
[2500]	training's l1: 271.117	valid_1's l1: 291.78
[3000]	training's l1: 269.121	valid_1's l1: 291.65
[3500]	training's l1: 267.398	valid_1's l1: 291.605
Did not meet early stopping. Best iteration is:
[3897]	training's l1: 266.193	valid_1's l1: 291.576


[32m[I 2023-03-10 04:35:26,857][0m Trial 29 finished with value: 582.735801196882 and parameters: {'objective': 'mae', 'n_estimators': 3897, 'reg_alpha': 2.237205792538993, 'reg_lambda': 8.326943680293194, 'colsample_bytree': 0.5700000000000001, 'num_leaves': 233, 'feature_fraction': 0.5249004084896574, 'bagging_fraction': 0.6116398018366342, 'bagging_freq': 4, 'min_child_samples': 272, 'subsample': 0.77, 'learning_rate': 0.013939807815791148, 'max_depth': 21, 'random_state': 42, 'n_jobs': 4}. Best is trial 23 with value: 570.0292723996309.[0m
[32m[I 2023-03-10 04:35:26,867][0m A new study created in memory with name: no-name-a1524808-e777-4bc1-b89a-24fecdd8875e[0m


Number of finished trials: 30
Best LGBM trial parameters: {'objective': 'rmse', 'n_estimators': 2409, 'reg_alpha': 1.0629024047409223, 'reg_lambda': 0.7767503938596835, 'colsample_bytree': 0.31, 'num_leaves': 363, 'feature_fraction': 0.7234332443227668, 'bagging_fraction': 0.8772040134478133, 'bagging_freq': 5, 'min_child_samples': 43, 'subsample': 0.71, 'learning_rate': 0.015226580462754451, 'max_depth': 11, 'random_state': 42, 'n_jobs': 4}
Best score: 570.0292723996309


[32m[I 2023-03-10 04:35:28,686][0m Trial 0 finished with value: 971.8280420844575 and parameters: {'learning_rate': 0.5685438042246679, 'l2_leaf_reg': 5.029342087273549, 'bagging_temperature': 13.507582455939339, 'random_strength': 1.8509745816512462, 'depth': 1, 'min_data_in_leaf': 51}. Best is trial 0 with value: 971.8280420844575.[0m
[32m[I 2023-03-10 04:35:32,314][0m Trial 1 finished with value: 581.0607840072049 and parameters: {'learning_rate': 0.7584440387017448, 'l2_leaf_reg': 17.636436454608514, 'bagging_temperature': 0.9540300951894065, 'random_strength': 1.353285081243388, 'depth': 8, 'min_data_in_leaf': 104}. Best is trial 1 with value: 581.0607840072049.[0m
[32m[I 2023-03-10 04:35:34,345][0m Trial 2 finished with value: 658.4272919680703 and parameters: {'learning_rate': 0.39494832421837367, 'l2_leaf_reg': 49.30753434124007, 'bagging_temperature': 17.61732587522274, 'random_strength': 1.4255257012599225, 'depth': 3, 'min_data_in_leaf': 246}. Best is trial 1 with va

Number of finished trials: 30
Best Cat trial parameters: {'learning_rate': 0.23925516239349473, 'l2_leaf_reg': 3.49171040093455, 'bagging_temperature': 0.10109036926693746, 'random_strength': 1.9887587374486673, 'depth': 10, 'min_data_in_leaf': 72}
Best score: 575.2221729712609
CPU times: user 2h 7min 26s, sys: 7min, total: 2h 14min 27s
Wall time: 1h 42min 47s


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Train Models with Cross Validation</h1>
</div>

In [30]:
train = create_folds(train, Config.N_FOLDS)
# train = create_strat_folds(train, TARGET, Config.N_FOLDS)

n_folds=5, seed=42


In [31]:
all_cv_scores = pd.DataFrame(
    {
        "Model": pd.Series(dtype="str"),
        "Score": pd.Series(dtype="float"),
        "StdDev": pd.Series(dtype="float"),
        "RunTime": pd.Series(dtype="float"),
    }
)

oof = train[[ID, TARGET, "fold"]].copy().reset_index(drop=True).copy()
oof.set_index(ID, inplace=True)
oof.head()

Unnamed: 0_level_0,price,fold
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,13619,1
1.0,13387,2
2.0,2772,3
3.0,666,2
4.0,14453,0


In [32]:
def show_tree_model_fi(model, features:List[str]) -> None:
    print("\n=== Model Feature Importance ===")
    for i in model.feature_importances_.argsort()[::-1]:
        print(features[i], model.feature_importances_[i]/model.feature_importances_.sum())

def save_oof_predictions(model_name:str, final_valid_predictions, oof:pd.DataFrame) -> pd.DataFrame:
    final_valid_predictions_df = process_valid_predictions(
        final_valid_predictions, ID, model_name
    )
    display(final_valid_predictions_df.head())
    oof[f"pred_{model_name}"] = final_valid_predictions_df[f"pred_{model_name}"]

    return oof

def save_test_predictions(model_name:str, final_test_predictions, submission_df:pd.DataFrame, result_field:str=TARGET) -> None:
    result = merge_test_predictions(final_test_predictions, Config.calc_probability)
    # result[:20]
    submission_df[f"target_{model_name}"] = result #.astype(int)
    #     submission_df.head(10)
    ss = submission_df[[ID, f"target_{model_name}"]].copy().reset_index(drop=True)
    ss.rename(columns={f"target_{model_name}": result_field}, inplace=True)
    ss.to_csv(
        f"submission_{model_name}.csv", index=False
    )  # Can submit the individual model
    print("=== Target Value Counts ===")
#     display(ss[TARGET].value_counts())
    ss.head(10)

def process_valid_predictions(final_valid_predictions, train_id, model_name:str) -> pd.DataFrame:
    model = f"pred_{model_name}"
    final_valid_predictions_df = pd.DataFrame.from_dict(
        final_valid_predictions, orient="index"
    ).reset_index()
    final_valid_predictions_df.columns = [train_id, model]
    final_valid_predictions_df.set_index(train_id, inplace=True)
    final_valid_predictions_df.sort_index(inplace=True)
    final_valid_predictions_df.to_csv(f"train_pred_{model_name}.csv", index=True)

    return final_valid_predictions_df

def add_score(score_df:pd.DataFrame, model_name:str, score:float, std:float):
    dict1 = {"Model": model_name, "Score": cv_score, "StdDev": std_dev}
    score_df = score_df.append(dict1, ignore_index=True)
    return score_df

In [33]:
def train_cv_model(
    df:pd.DataFrame,
    test:pd.DataFrame,
    get_model_fn,
    FEATURES:List[str],
    TARGET:str,
    calc_probability:bool,
    rowid,
    params,
    n_folds:int=5,
    seed:int=42,
):

    final_test_predictions = []
    final_valid_predictions = {}
    fold_scores = []  # Scores of Validation Set
    feature_importance_lst = []

    test = test[FEATURES].copy()

    for fold in range(n_folds):
        print(10 * "=", f"Fold {fold+1}/{n_folds}", 10 * "=")

        start_time = time.time()

        xtrain = df[df.fold != fold].reset_index(
            drop=True
        )  # Everything not in validation fold
        xvalid = df[df.fold == fold].reset_index(drop=True)
        xtest = test.copy()

        valid_ids = xvalid.id.values.tolist()  # Id's of everything in validation fold

        ytrain = xtrain[TARGET]
        yvalid = xvalid[TARGET]

        xtrain = xtrain[FEATURES]
        xvalid = xvalid[FEATURES]

        scaler = preprocessing.StandardScaler()
#         scaler = preprocessing.MinMaxScaler()
        xtrain = scaler.fit(xtrain).transform(xtrain)
        xvalid = scaler.transform(xvalid)
        xtest = scaler.transform(xtest)

        model = get_model_fn # ()

        model.fit(
            xtrain,
            ytrain,
        )
        if calc_probability:
            preds_valid = model.predict_proba(xvalid)[:, 1]
            test_preds = model.predict_proba(xtest)[:, 1]
        else:
            preds_valid = model.predict(xvalid)
            test_preds = model.predict(xtest)

        preds_valid_class = model.predict(xvalid)
        
        final_test_predictions.append(test_preds)
        final_valid_predictions.update(dict(zip(valid_ids, preds_valid)))

#         fold_score = metrics.accuracy_score(yvalid, preds_valid_class)  # Validation Set Score
        fold_score = metrics.mean_absolute_error(
            yvalid, preds_valid
        ) 
#         fold_score = metrics.roc_auc_score(yvalid.values, preds_valid)  # Validation Set Score
#         show_classification_scores(yvalid.values, preds_valid_class)

#         fold_score = metrics.roc_auc_score(yvalid, preds_valid)  # Validation Set Score
#         fold_score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
        fold_scores.append(fold_score)
        #         importance_list.append(model.coef_.ravel())

        fi = []
        # Feature importance
#         fi = pd.DataFrame(
#             index=FEATURES,
#             data=model.coef_.ravel(),
#             columns=[f"{fold}_importance"],
#         )
        
        feature_importance_lst.append(fi)

        run_time = time.time() - start_time

        print(f"fold: {fold+1}, Score: {fold_score}, Run Time: {run_time:.2f}")

    return (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    )


def train_xgb_model(
    df:pd.DataFrame,
    test:pd.DataFrame,
    get_model_fn,
    FEATURES:List[str],
    TARGET:str,
    calc_probability:bool,
    rowid:str,
    params,
    n_folds:int=5,
    seed:int=42,
):

    print(params)
    final_test_predictions = []
    final_valid_predictions = {}
    fold_scores = []  # Scores of Validation Set
    feature_importance_lst = []

    test = test[FEATURES].copy()

    for fold in range(n_folds):
        print(10 * "=", f"Fold {fold+1}/{n_folds}", 10 * "=")

        start_time = time.time()

        xtrain = df[df.fold != fold].reset_index(
            drop=True
        )  # Everything not in validation fold
        xvalid = df[df.fold == fold].reset_index(drop=True)
        xtest = test.copy()

        valid_ids = xvalid.id.values.tolist()  # Id's of everything in validation fold

        ytrain = xtrain[TARGET]
        yvalid = xvalid[TARGET]

        xtrain = xtrain[FEATURES]
        xvalid = xvalid[FEATURES]

        model = get_model_fn # (params)

        model.fit(
            xtrain,
            ytrain,
            eval_set=[(xvalid, yvalid)],
            #             eval_metric="acc",  # auc
            verbose=0,
            #             early_stopping_rounds=3000,
            #             callbacks=[
            #                 xgb.log_evaluation(0),
            #                 xgb.early_stopping(500, False, True),
            #             ],
        )

        if calc_probability:
            preds_valid = model.predict_proba(xvalid)[:, 1]
            test_preds = model.predict_proba(xtest)[:, 1]
        else:
            preds_valid = model.predict(xvalid)
            test_preds = model.predict(xtest)

        preds_valid_class = model.predict(xvalid)
        
        final_test_predictions.append(test_preds)
        if Config.debug:
            print(f"GT Type: {type(yvalid.values)}")
            print(f"Preds Type: {type(preds_valid_class)}")
            print(f"         GT:{yvalid.values[:20]}")
            print(f"Preds Class:{preds_valid_class[:20]}")
            print(f"Preds Prob:{preds_valid[:20]}")
        final_valid_predictions.update(dict(zip(valid_ids, preds_valid_class)))

#         fold_score = metrics.cohen_kappa_score(yvalid,  preds_valid_class, weights = "quadratic")
#         fold_score = metrics.roc_auc_score(yvalid.values, preds_valid)  # Validation Set Score
#         show_classification_scores(yvalid.values, preds_valid_class)
        fold_score = metrics.mean_absolute_error(
            yvalid, preds_valid
        )  # Validation Set Score
#         fold_score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
        fold_scores.append(fold_score)

        # Feature importance
        fi = pd.DataFrame(
            index=FEATURES,
            data=model.feature_importances_,
            columns=[f"{fold}_importance"],
        )
        feature_importance_lst.append(fi)

        run_time = time.time() - start_time

        print(f"fold: {fold+1}, Score: {fold_score}, Run Time: {run_time:.2f}")

    return (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    )        

In [34]:
def run_linear_model(model_dict, model_name:str, features:List[str], oof:pd.DataFrame) -> (float, float, pd.DataFrame):
    (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    ) = train_cv_model(
        train,
        test,
        model_dict[model_name],
        features,
        TARGET,
        False, #Config.calc_probability,
        ID,
        {},
        Config.N_FOLDS,
        Config.seed,
    )

    cv_score, std_dev = show_fold_scores(fold_scores)

    oof = save_oof_predictions(model_name, final_valid_predictions, oof)
    oof.head()
    save_test_predictions(model_name, final_test_predictions, sample_submission, TARGET)

    return cv_score, std_dev, oof


def run_tree_model(model_dict, model_name:str, features:List[str], params, oof:pd.DataFrame) -> (float, float, pd.DataFrame):
    (
        model,
        feature_importance_lst,
        fold_scores,
        final_valid_predictions,
        final_test_predictions,
    ) = train_xgb_model(
        train,
        test,
        model_dict[model_name],
        features,
        TARGET,
        Config.calc_probability,
        ID,
        params,
        Config.N_FOLDS,
        Config.seed,
    )

    cv_score, std_dev = show_fold_scores(fold_scores)
    show_tree_model_fi(model, features)

    oof = save_oof_predictions(model_name, final_valid_predictions, oof)
    oof.head()
    save_test_predictions(model_name, final_test_predictions, sample_submission, TARGET)

    return cv_score, std_dev, oof

In [35]:
%%time

def run_models4features(model_dict, model_lst:List[str], target:str, feature_lst:List[str], all_cv_scores:pd.DataFrame, linear_models:bool=True) -> pd.DataFrame:

    oof = train[[ID, target, "fold"]].copy().reset_index(drop=True).copy()
    oof.set_index(ID, inplace=True)

    for idx, m in enumerate(model_lst):
        model = model_lst[idx]
        start_time = time.time()

        print(f"Model={model}")

        params = {}
        if linear_models:
                cv_score, std_dev, oof = run_linear_model(model_dict, model, feature_lst, oof)

        else:
            cv_score, std_dev, oof = run_tree_model(model_dict, model, feature_lst, params, oof)

        run_time = time.time() - start_time

        score_dict = {"Model": model, "Score": cv_score, "StdDev": std_dev, "RunTime": run_time}
        all_cv_scores = all_cv_scores.append(score_dict, ignore_index=True)
        print(f"Model Run Time: {run_time:.2f}")

    return all_cv_scores




CPU times: user 11 µs, sys: 0 ns, total: 11 µs
Wall time: 15.5 µs


In [36]:
lgbm_params = {'n_estimators': Config.N_ESTIMATORS,
                 'num_rounds': 404,
                 'learning_rate': 0.19,
                 'num_leaves': 17,
                 'max_depth': 8,
                 'min_data_in_leaf': 36,
                 'lambda_l1': 0.96,
                 'lambda_l2': 0.01,
                 'min_gain_to_split': 11.32,
                 'bagging_fraction': 0.6,
                 'feature_fraction': 0.9}


lgbm_params3 = {
    "n_estimators": Config.N_ESTIMATORS,
    'max_depth': 9,
    'learning_rate': 0.01,
    'min_data_in_leaf': 36, 
    'num_leaves': 100, 
    'feature_fraction': 0.8, 
    'bagging_fraction': 0.89, 
    'bagging_freq': 5, 
    'lambda_l2': 28,
    
    'seed': Config.seed,
    'objective': 'regression',
#     'boosting_type': 'gbdt',
#     'device': 'gpu', 
#     'gpu_platform_id': 0,
#     'gpu_device_id': 0,
    'n_jobs': -1,
    'metric': 'rmse',
    'verbose': -1
}
    
lgbm_params = gpu_ify_lgbm(lgbm_params)

In [37]:
xgb_params = {
    "n_estimators": Config.N_ESTIMATORS,  # 10_000,
    "max_depth": 10,  # 10
    "objective": "reg:squarederror", # Normal dist
#     "objective": "reg:gamma", # Gamma dist
    #     "enable_categorical": True,  # Only works with gpu_hist
    #     "eval_metric": "mae",
    #     "metric": "mae",
    #     "enable_categorical": True,
    "n_jobs": 8,  # 4
    "seed": Config.seed,
    "tree_method": "hist",
    #         "gpu_id": 0,
    "subsample": 0.9,  # 0.7
    "colsample_bytree": 0.7,
    "use_label_encoder": False,
    "learning_rate": 0.05,  # 0.01
}

xgb_params3 = {
    'n_estimators': Config.N_ESTIMATORS,
    'learning_rate': 0.05,
    'max_depth': 10,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'objective': 'reg:squarederror'
}

xgb_params_gamma = {
    "n_estimators": Config.N_ESTIMATORS,  # 10_000,
    "max_depth": 10,  # 10
    "objective": "reg:gamma", # "reg:gamma", "reg:squarederror"
    #     "enable_categorical": True,  # Only works with gpu_hist
    #     "eval_metric": "mae",
    #     "metric": "mae",
    #     "enable_categorical": True,
    "n_jobs": 8,  # 4
    "seed": Config.seed,
    "tree_method": "hist",
    #         "gpu_id": 0,
    "subsample": 0.9,  # 0.7
    "colsample_bytree": 0.7,
    "use_label_encoder": False,
    "learning_rate": 0.05,  # 0.01
}

xgb_params_gpu1 = {'objective': 'reg:squarederror',
              'booster': 'gbtree',
              'eval_metric': 'rmse',
              'n_estimators': 50000,
              'learning_rate': 0.1,
              'max_depth': 8,
              'colsample_bytree': 0.4,
              'subsample': 0.6,
              'alpha': 8,
              'lambda': 2,
              'random_state': Config.seed,
              'tree_method': 'gpu_hist'
              }

if Config.gpu:
    xgb_params["tree_method"] = "gpu_hist"
else:
    xgb_params["tree_method"] = "hist"

In [38]:
cb_params = {
    #     "learning_rate": 0.3277295792305584,
    "learning_rate": 0.05,
    "l2_leaf_reg": 3.1572972266001518,
    "bagging_temperature": 0.6799604234141348,
    "random_strength": 1.99590400593318,
    "depth": 10,
    "min_data_in_leaf": 93,
    # "iterations": 100,  # 10000
    "n_estimators": Config.N_ESTIMATORS,  # 10000
    "use_best_model": True,
    #     "task_type": "GPU",
    "random_seed": Config.seed,
}

cb_params = gpu_ify_cb(cb_params)

In [39]:
model_estimator_dict = {
    "xgb2": xgb.XGBRegressor(**xgb_params),
    "xgb_best_params": xgb.XGBRegressor(**best_xgb_params),
    "xgb3": xgb.XGBRegressor(**xgb_params3),
    "xgb_params_gamma": xgb.XGBRegressor(**xgb_params_gamma),
    "xgb_params_gpu1": xgb.XGBRegressor(**xgb_params_gpu1),
    
    
    "lgbm1": lgb.LGBMRegressor(**lgbm_params),

    "cat1": cb.CatBoostRegressor(),
    "cat2": cb.CatBoostRegressor(**cb_params),
    "cat_best_params": cb.CatBoostRegressor(**best_cb_params),

    "xgb1": xgb.XGBRegressor(),
    "lgbm0": lgb.LGBMRegressor(),
    "lgbm3": lgb.LGBMRegressor(lgbm_params3),
    "lgbm2": lgb.LGBMRegressor(
        learning_rate=0.05,
        max_depth=15,
        num_leaves=11,
        feature_fraction=0.3,
        subsample=0.1,
        n_jobs=-1,
    ),
    "lgbm3": lgb.LGBMRegressor(**lgbm_params),
    "lgbm_best_params": lgb.LGBMRegressor(**best_lgbm_params),


    "lin_reg": linear_model.LinearRegression(),
    "lasso": linear_model.Lasso(),
    "ridge": linear_model.Ridge(max_iter=7000),
    "ridge_25": linear_model.Ridge(fit_intercept=True, solver='auto', alpha=0.25, max_iter=7000),
    "ridge_50": linear_model.Ridge(fit_intercept=True, solver='auto', alpha=0.5, max_iter=7000),
}

## Tree Models

In [40]:
%%time

# model_lst = ["xgb3","xgb_best_params", "lgbm_best_params", "cat_best_params", "xgb1", "xgb2", "lgbm1", "lgbm2", "cat1", "cat2"]
# model_lst = ["xgb_params_gpu1","xgb_best_params", "lgbm_best_params", "cat_best_params", "xgb_params_gamma", "xgb3", "xgb1", "xgb2", "lgbm0", "lgbm1", "lgbm2", "lgbm3", "cat1", "cat2"]
model_lst = ["xgb3", "xgb1", "xgb2", "lgbm0", "lgbm1", "lgbm2", "lgbm3", "cat1", "cat2"]
# model_lst = = []
all_cv_scores = run_models4features(model_estimator_dict, model_lst, TARGET, FEATURES, all_cv_scores, linear_models=False)    

all_cv_scores.sort_values(by=["Score"], ascending=False)

Model=xgb3
{}
fold: 1, Score: 294.71503615332693, Run Time: 40.51
fold: 2, Score: 291.05066557309306, Run Time: 43.11
fold: 3, Score: 292.23438813737243, Run Time: 43.95
fold: 4, Score: 292.8416222447851, Run Time: 46.38
fold: 5, Score: 298.1499453915773, Run Time: 48.20
Scores -> Adjusted: 291.32092276 , mean: 293.79833150, std: 2.47740874

=== Model Feature Importance ===
y 0.47607616
carat 0.19923441
clarity_SI2 0.06468832
clarity_SI1 0.036328815
clarity_VVS2 0.02682921
color_J 0.024244588
clarity_VS1 0.020305462
color_I 0.01972157
clarity_VVS1 0.017993124
clarity_VS2 0.017387941
clarity_I1 0.01685976
color_H 0.01548362
clarity_IF 0.014319754
z 0.012763981
x 0.008592768
color_G 0.0072064423
color_D 0.005380176
color_E 0.004773336
color_F 0.004442274
cut_Ideal 0.0037930745
cut_Premium 0.000742291
cut_Fair 0.00070001045
depth 0.0005908223
table 0.0005829938
cut_Good 0.0005030157
cut_Very Good 0.00045608432


Unnamed: 0_level_0,pred_xgb3
id,Unnamed: 1_level_1
0.0,13567.95312
1.0,12602.54102
2.0,2834.5459
3.0,695.54175
4.0,15015.99316


Mode
=== Target Value Counts ===
Model Run Time: 226.04
Model=xgb1
{}
fold: 1, Score: 301.78501917080354, Run Time: 24.04
fold: 2, Score: 297.8963373027232, Run Time: 25.31
fold: 3, Score: 297.7470416011804, Run Time: 26.35
fold: 4, Score: 300.20821226194704, Run Time: 27.70
fold: 5, Score: 306.47107039979386, Run Time: 30.00
Scores -> Adjusted: 297.62056729 , mean: 300.82153615, std: 3.20096885

=== Model Feature Importance ===
y 0.4663739
carat 0.17956698
clarity_SI2 0.0562305
color_J 0.046717048
clarity_SI1 0.03725314
clarity_VVS2 0.034353077
clarity_I1 0.03210804
color_I 0.029947074
clarity_VVS1 0.019486794
clarity_IF 0.015827313
color_H 0.0152229965
color_E 0.009459043
color_D 0.008805475
clarity_VS2 0.008360341
z 0.008231128
clarity_VS1 0.0077900467
cut_Ideal 0.005499829
color_F 0.0053427014
x 0.0046494547
color_G 0.004503285
cut_Fair 0.0013055301
cut_Premium 0.0006932913
depth 0.0006715329
cut_Good 0.00061646994
table 0.00058547436
cut_Very Good 0.0003996174


Unnamed: 0_level_0,pred_xgb1
id,Unnamed: 1_level_1
0.0,13894.94043
1.0,12555.09473
2.0,2880.69775
3.0,712.01849
4.0,14758.49219


Mode
=== Target Value Counts ===
Model Run Time: 138.20
Model=xgb2
{}
fold: 1, Score: 295.44386523152053, Run Time: 5.68
fold: 2, Score: 292.22021563928126, Run Time: 7.12
fold: 3, Score: 293.20933100864943, Run Time: 8.27
fold: 4, Score: 293.93570258531605, Run Time: 10.69
fold: 5, Score: 299.5412034565594, Run Time: 11.60
Scores -> Adjusted: 292.30875324 , mean: 294.87006358, std: 2.56131034

=== Model Feature Importance ===
y 0.442372
carat 0.15849876
clarity_SI2 0.077546015
z 0.0483618
x 0.041224793
clarity_SI1 0.032756325
clarity_VVS2 0.03054043
color_J 0.022573877
clarity_VS1 0.017445697
clarity_I1 0.016119752
color_I 0.015596995
clarity_VVS1 0.015179364
clarity_VS2 0.0130287055
clarity_IF 0.011732769
color_H 0.010919328
color_F 0.00988803
color_D 0.009852844
color_E 0.009641757
color_G 0.0093454085
cut_Ideal 0.0029493654
table 0.0012894304
cut_Fair 0.0008043124
cut_Premium 0.0006351716
cut_Good 0.0006000064
cut_Very Good 0.0005620582
depth 0.0005349868


Unnamed: 0_level_0,pred_xgb2
id,Unnamed: 1_level_1
0.0,13731.49902
1.0,12486.21582
2.0,2838.27539
3.0,687.6286
4.0,14984.20898


Mode
=== Target Value Counts ===
Model Run Time: 47.48
Model=lgbm0
{}
fold: 1, Score: 302.19847624854765, Run Time: 4.07
fold: 2, Score: 298.4864215073444, Run Time: 5.57
fold: 3, Score: 299.3847727001143, Run Time: 6.98
fold: 4, Score: 300.0509082932714, Run Time: 9.01
fold: 5, Score: 306.1245280350128, Run Time: 10.14
Scores -> Adjusted: 298.52083933 , mean: 301.24902136, std: 2.72818203

=== Model Feature Importance ===
carat 0.11433333333333333
y 0.10333333333333333
z 0.07833333333333334
x 0.07466666666666667
depth 0.07
clarity_SI2 0.057666666666666665
color_J 0.050666666666666665
clarity_SI1 0.042666666666666665
color_I 0.04133333333333333
color_D 0.03666666666666667
clarity_I1 0.036
color_H 0.029666666666666668
table 0.029
color_E 0.029
clarity_VVS1 0.029
clarity_VS1 0.027666666666666666
clarity_IF 0.02666666666666667
color_F 0.025666666666666667
clarity_VS2 0.025333333333333333
clarity_VVS2 0.023666666666666666
color_G 0.021
cut_Ideal 0.012666666666666666
cut_Fair 0.005333333333

Unnamed: 0_level_0,pred_lgbm0
id,Unnamed: 1_level_1
0.0,13897.79108
1.0,12373.6771
2.0,2798.94058
3.0,691.11222
4.0,14956.9832


Mode
=== Target Value Counts ===
Model Run Time: 39.89
Model=lgbm1
{}
fold: 1, Score: 298.62565554027316, Run Time: 14.50
fold: 2, Score: 293.6234258624857, Run Time: 9.13
fold: 3, Score: 295.08833523342406, Run Time: 11.01
fold: 4, Score: 296.9794249489986, Run Time: 12.69
fold: 5, Score: 301.9545087848611, Run Time: 13.56
Scores -> Adjusted: 294.35866011 , mean: 297.25427007, std: 2.89560996

=== Model Feature Importance ===
carat 0.15433932759968724
y 0.14433150899139954
z 0.1385457388584832
x 0.12134480062548866
depth 0.11743549648162627
table 0.058326817826426895
clarity_SI2 0.031743549648162624
color_J 0.02329945269741986
color_I 0.020641125879593433
color_H 0.01892103205629398
clarity_SI1 0.017200938232994525
color_G 0.01641907740422205
color_E 0.014855355746677092
color_F 0.01344800625488663
color_D 0.012822517591868648
clarity_VS1 0.012509773260359656
cut_Premium 0.012040656763096168
clarity_VS2 0.011727912431587178
cut_Ideal 0.01125879593432369
clarity_VVS2 0.0096950742767787

Unnamed: 0_level_0,pred_lgbm1
id,Unnamed: 1_level_1
0.0,13434.91358
1.0,12224.46144
2.0,2811.78792
3.0,698.31991
4.0,14824.09103


Mode
=== Target Value Counts ===
Model Run Time: 64.91
Model=lgbm2
{}
fold: 1, Score: 428.3906869875579, Run Time: 3.71
fold: 2, Score: 421.9205384919484, Run Time: 4.41
fold: 3, Score: 425.29325104183187, Run Time: 6.12
fold: 4, Score: 427.91966866814465, Run Time: 7.83
fold: 5, Score: 431.2938167118121, Run Time: 8.65
Scores -> Adjusted: 423.80372035 , mean: 426.96359238, std: 3.15987203

=== Model Feature Importance ===
carat 0.204
y 0.182
color_I 0.083
clarity_VVS2 0.055
clarity_SI2 0.045
cut_Ideal 0.042
clarity_VVS1 0.041
color_E 0.041
clarity_VS2 0.039
clarity_VS1 0.038
clarity_I1 0.034
x 0.033
z 0.033
clarity_SI1 0.022
color_J 0.021
color_G 0.021
color_F 0.018
clarity_IF 0.015
color_H 0.014
color_D 0.011
cut_Fair 0.004
depth 0.002
cut_Good 0.001
table 0.001
cut_Premium 0.0
cut_Very Good 0.0


Unnamed: 0_level_0,pred_lgbm2
id,Unnamed: 1_level_1
0.0,12399.22458
1.0,13781.01161
2.0,2667.72767
3.0,811.73983
4.0,14161.82557


Mode
=== Target Value Counts ===
Model Run Time: 35.50
Model=lgbm3
{}
fold: 1, Score: 298.6266755101849, Run Time: 7.50
fold: 2, Score: 293.63084731820453, Run Time: 8.78
fold: 3, Score: 295.08796833287164, Run Time: 10.35
fold: 4, Score: 296.9793997198946, Run Time: 12.47
fold: 5, Score: 301.95557811202633, Run Time: 13.13
Scores -> Adjusted: 294.36184479 , mean: 297.25609380, std: 2.89424901

=== Model Feature Importance ===
carat 0.15433932759968724
y 0.14433150899139954
z 0.13870211102423768
x 0.12118842845973417
depth 0.11743549648162627
table 0.058326817826426895
clarity_SI2 0.031743549648162624
color_J 0.02329945269741986
color_I 0.020641125879593433
color_H 0.01892103205629398
clarity_SI1 0.017200938232994525
color_G 0.01641907740422205
color_E 0.014855355746677092
color_F 0.01344800625488663
color_D 0.012822517591868648
clarity_VS1 0.012509773260359656
cut_Premium 0.012040656763096168
clarity_VS2 0.011727912431587178
cut_Ideal 0.01125879593432369
clarity_VVS2 0.009695074276778

Unnamed: 0_level_0,pred_lgbm3
id,Unnamed: 1_level_1
0.0,13434.91361
1.0,12224.46145
2.0,2811.78792
3.0,698.31991
4.0,14824.09094


Mode
=== Target Value Counts ===
Model Run Time: 56.52
Model=cat1
{}
fold: 1, Score: 296.9685761360375, Run Time: 25.75
fold: 2, Score: 293.5278621777346, Run Time: 27.25
fold: 3, Score: 293.7082309629823, Run Time: 29.31
fold: 4, Score: 296.24575497664415, Run Time: 32.12
fold: 5, Score: 299.71177887936295, Run Time: 32.03
Scores -> Adjusted: 293.74621026 , mean: 296.03244063, std: 2.28623037

=== Model Feature Importance ===
carat 0.2963475012110215
y 0.21842774444767615
x 0.13178745405086806
z 0.09359989921603121
clarity_SI2 0.07220402337059788
color_J 0.03533876670018171
color_I 0.030202386090710676
clarity_SI1 0.029989421678705382
clarity_VVS2 0.016205565572363065
color_H 0.015876066241822563
clarity_I1 0.0119735342508382
clarity_VVS1 0.008410665145976721
color_D 0.00679388015359674
clarity_VS1 0.006398824176447105
color_E 0.0058582410987825795
clarity_IF 0.005663455306059709
color_F 0.004571180273704562
cut_Ideal 0.002673356994814298
depth 0.0023378187551711745
table 0.0017101135

Unnamed: 0_level_0,pred_cat1
id,Unnamed: 1_level_1
0.0,13892.14899
1.0,12362.21555
2.0,2913.91113
3.0,705.66616
4.0,15079.71822


Mode
=== Target Value Counts ===
Model Run Time: 150.71
Model=cat2
{}
fold: 1, Score: 320.037851041598, Run Time: 82.91
fold: 2, Score: 315.81998707369047, Run Time: 4.26
fold: 3, Score: 317.78885873725426, Run Time: 5.81
fold: 4, Score: 318.4137478472341, Run Time: 7.65
fold: 5, Score: 321.84694772817016, Run Time: 8.26
Scores -> Adjusted: 316.73861163 , mean: 318.78147849, std: 2.04286685

=== Model Feature Importance ===
y 0.2220849393769301
z 0.15991288714178273
carat 0.14566057646601596
clarity_SI2 0.11406477514508412
x 0.08218364605166953
color_J 0.05527856355288489
color_I 0.04730916406137489
clarity_SI1 0.04624125127618493
color_H 0.02338201866751044
clarity_VVS2 0.020566422195874994
clarity_I1 0.017669199424920297
clarity_VVS1 0.012010767430532981
color_D 0.010672195536827408
clarity_VS1 0.00959378590779735
color_E 0.009513695499838716
clarity_IF 0.0078115913826419595
color_F 0.007004937451387138
color_G 0.00393424763620508
cut_Ideal 0.0025445429040955388
clarity_VS2 0.0012628

Unnamed: 0_level_0,pred_cat2
id,Unnamed: 1_level_1
0.0,13344.28006
1.0,12542.9373
2.0,2826.30733
3.0,765.32848
4.0,14471.47673


Mode
=== Target Value Counts ===
Model Run Time: 113.04
CPU times: user 22min 5s, sys: 22.5 s, total: 22min 27s
Wall time: 14min 32s


Unnamed: 0,Model,Score,StdDev,RunTime
5,lgbm2,426.96359,3.15987,35.49974
8,cat2,318.78148,2.04287,113.04039
3,lgbm0,301.24902,2.72818,39.89433
1,xgb1,300.82154,3.20097,138.19885
6,lgbm3,297.25609,2.89425,56.51562
4,lgbm1,297.25427,2.89561,64.90748
7,cat1,296.03244,2.28623,150.70988
2,xgb2,294.87006,2.56131,47.48469
0,xgb3,293.79833,2.47741,226.04151


## Linear Models

In [41]:
model_lst = ["lin_reg", "lasso", "ridge", "ridge_25", "ridge_50"]
model_lst = ["lasso", "ridge",  "ridge_50"]
# model_lst = []
# all_cv_scores = run_models4features(model_lst, TARGET, FEATURES, all_cv_scores, linear_models=True)    
all_cv_scores = run_models4features(model_estimator_dict, model_lst, TARGET, FEATURES, all_cv_scores, linear_models=True)    

all_cv_scores.head()

Model=lasso
fold: 1, Score: 643.7941963585171, Run Time: 4.14
fold: 2, Score: 638.4343948412637, Run Time: 5.62
fold: 3, Score: 645.5670602320827, Run Time: 6.60
fold: 4, Score: 644.5535423176394, Run Time: 9.01
fold: 5, Score: 640.202803389652, Run Time: 11.51
Scores -> Adjusted: 639.78626450 , mean: 642.51039943, std: 2.72413493


Unnamed: 0_level_0,pred_lasso
id,Unnamed: 1_level_1
0.0,11369.31683
1.0,13264.20051
2.0,3575.87479
3.0,959.69054
4.0,12895.68808


Mode
=== Target Value Counts ===
Model Run Time: 41.04
Model=ridge
fold: 1, Score: 643.6029350439413, Run Time: 1.48
fold: 2, Score: 638.3798525169367, Run Time: 2.68
fold: 3, Score: 645.378745790205, Run Time: 3.86
fold: 4, Score: 644.3953419698855, Run Time: 6.07
fold: 5, Score: 640.2103311869689, Run Time: 7.26
Scores -> Adjusted: 639.73797942 , mean: 642.39344130, std: 2.65546188


Unnamed: 0_level_0,pred_ridge
id,Unnamed: 1_level_1
0.0,11365.59766
1.0,13281.57554
2.0,3570.67077
3.0,970.86667
4.0,12907.27292


Mode
=== Target Value Counts ===
Model Run Time: 25.66
Model=ridge_50
fold: 1, Score: 643.592054918037, Run Time: 1.31
fold: 2, Score: 638.368888637275, Run Time: 2.67
fold: 3, Score: 645.367922401013, Run Time: 4.06
fold: 4, Score: 644.3846726899225, Run Time: 5.85
fold: 5, Score: 640.1994547538842, Run Time: 7.25
Scores -> Adjusted: 639.72706754 , mean: 642.38259868, std: 2.65553114


Unnamed: 0_level_0,pred_ridge_50
id,Unnamed: 1_level_1
0.0,11365.90386
1.0,13281.94324
2.0,3570.60367
3.0,971.02095
4.0,12907.41615


Mode
=== Target Value Counts ===
Model Run Time: 25.64


Unnamed: 0,Model,Score,StdDev,RunTime
0,xgb3,293.79833,2.47741,226.04151
1,xgb1,300.82154,3.20097,138.19885
2,xgb2,294.87006,2.56131,47.48469
3,lgbm0,301.24902,2.72818,39.89433
4,lgbm1,297.25427,2.89561,64.90748


In [42]:
sample_submission.head(20)

Unnamed: 0,id,price,target_xgb3,target_xgb1,target_xgb2,target_lgbm0,target_lgbm1,target_lgbm2,target_lgbm3,target_cat1,target_cat2,target_lasso,target_ridge,target_ridge_50
0,193573,3969.155,850.64008,863.39423,873.41058,845.00626,831.73106,849.0311,831.73105,849.66503,893.66363,1122.77336,1126.43677,1126.54149
1,193574,3969.155,2453.35107,2453.50879,2429.81934,2516.13285,2455.03421,2628.9233,2455.03422,2428.91431,2528.77597,2336.61303,2326.17711,2326.0792
2,193575,3969.155,2299.41772,2277.79492,2271.19141,2312.48726,2282.05047,2455.92987,2282.05049,2249.38889,2313.05169,2275.41267,2258.40432,2258.18195
3,193576,3969.155,824.37177,829.64874,812.18494,833.12755,844.59355,825.05498,844.59356,841.44512,843.07182,1286.60594,1295.7163,1295.78686
4,193577,3969.155,5778.32568,5604.96094,5742.00977,5677.22418,5545.02071,5723.92561,5545.02066,5689.12993,5841.71126,6793.76562,6781.87932,6781.79378
5,193578,3969.155,679.14783,721.74066,683.8111,646.85482,711.75287,829.72593,711.75287,733.2797,724.89829,599.70772,609.83071,609.93063
6,193579,3969.155,12227.13965,12271.94434,12286.02148,12230.90206,12346.93689,11580.68038,12346.93681,12378.75111,12151.24082,11051.9774,11056.75891,11056.85506
7,193580,3969.155,2878.64893,2871.62061,2928.54932,2885.45318,2850.52843,2781.81996,2850.52842,2932.60449,2936.2026,3522.07652,3514.0807,3513.96258
8,193581,3969.155,14572.83594,14974.13379,15552.91113,14940.3296,15141.13342,14354.42963,15141.13346,15356.94329,15079.58421,15537.07988,15545.16514,15545.66964
9,193582,3969.155,1840.70691,1868.4032,1849.61938,1953.83815,1927.65446,1973.66732,1927.65446,1795.6559,1831.69432,2243.52578,2236.53769,2236.65566


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Blend Models</h1>
</div>

In [43]:
all_blend_scores = pd.DataFrame(
    {
        "Model": pd.Series(dtype="str"),
        "Score": pd.Series(dtype="float"),
        "StdDev": pd.Series(dtype="float"),
    }
)

In [44]:
model_lst

['lasso', 'ridge', 'ridge_50']

In [45]:
# model_lst = ["xgb_params_gpu1", "xgb_best_params","xgb1", "xgb2","xgb3", "cat1", "cat_best_params","lgbm0", "lgbm1", "lgbm3"]
model_lst = ["xgb1", "xgb2","xgb3", "cat1", "lgbm0", "lgbm1", "lgbm3"]

In [46]:
len(model_lst)

7

In [47]:
target_names = [f"target_{model}" for model in model_lst]
target_names

['target_xgb1',
 'target_xgb2',
 'target_xgb3',
 'target_cat1',
 'target_lgbm0',
 'target_lgbm1',
 'target_lgbm3']

In [48]:
sample_submission[TARGET] = sample_submission[target_names].sum(axis=1) / len(model_lst)

In [49]:
sample_submission[[ID, TARGET]].to_csv("submission_models_wt_avg.csv", index=False)
sample_submission[[ID, TARGET]].tail(8)

Unnamed: 0,id,price
129042,322615,2923.95301
129043,322616,655.18117
129044,322617,4062.08885
129045,322618,3822.17963
129046,322619,2504.89758
129047,322620,7611.92323
129048,322621,5453.39414
129049,322622,4117.32635


In [50]:
sample_submission[TARGET] = (
#     (sample_submission["target_xgb_bp"] * 2 )
#     + (sample_submission["target_lgbm_bp"]  )
    (sample_submission["target_xgb1"] * 3 )
    + (sample_submission["target_lgbm1"])
#     + (sample_submission["target_lgbm2"])    
#     + (sample_submission["target_lgbm2"])
    + (sample_submission["target_cat1"] )
    + (sample_submission["target_cat2"] )    
#     + (sample_submission["target_cat_bp"] )
#     + (sample_submission["target_svc"] )
#     + (sample_submission["target_log_reg3"] )
#     + (sample_submission["target_cat2"] )
)/6

# sample_submission[TARGET] = sample_submission[TARGET].astype(int)

In [51]:
sample_submission[[ID, TARGET]].to_csv("submission_wt_avg.csv", index=False)
sample_submission[[ID, TARGET]].tail(8)

Unnamed: 0,id,price
129042,322615,2853.78809
129043,322616,665.7152
129044,322617,4074.52658
129045,322618,3783.59696
129046,322619,2482.38025
129047,322620,7663.32922
129048,322621,5524.38807
129049,322622,4095.62464


In [52]:
all_cv_scores.sort_values(by=["Score"], ascending=False)

Unnamed: 0,Model,Score,StdDev,RunTime
9,lasso,642.5104,2.72413,41.04177
10,ridge,642.39344,2.65546,25.66121
11,ridge_50,642.3826,2.65553,25.64283
5,lgbm2,426.96359,3.15987,35.49974
8,cat2,318.78148,2.04287,113.04039
3,lgbm0,301.24902,2.72818,39.89433
1,xgb1,300.82154,3.20097,138.19885
6,lgbm3,297.25609,2.89425,56.51562
4,lgbm1,297.25427,2.89561,64.90748
7,cat1,296.03244,2.28623,150.70988


<div style="background-color:rgba(177, 156, 217, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Level 1 Stack Models</h1>
</div>

In [53]:
## TODO: Generate these dictionaries from model names

train_oof_dict = {
    "train_pred_cat1": "train_pred_cat1.csv",
    "train_pred_cat2": "train_pred_cat2.csv",
    "train_pred_lgbm1": "train_pred_lgbm1.csv",    
    "train_pred_lgbm2": "train_pred_lgbm2.csv",    
    "train_pred_xgb1": "train_pred_xgb1.csv"
}

test_pred_dict = {
    "submission_cat1": "submission_cat1.csv",
    "submission_cat2": "submission_cat2.csv",
    "submission_lgbm1": "submission_lgbm1.csv",
    "submission_lgbm2": "submission_lgbm2.csv",
    "submission_xgb1": "submission_xgb1.csv",
}

In [54]:
def blend_results(train_oof_dict, test_pred_dict):
    oof_df = pd.DataFrame()
    test_preds_df = pd.DataFrame()

    for name, train_oof_fname in train_oof_dict.items():
        fname = "../working/" + train_oof_fname
        print(f"Processing {name}, {train_oof_fname}")
        df = pd.read_csv(fname)
        print(df.head())
#         print(df.iloc[:,1])
        preds = pd.Series(df.iloc[:,1], name=name)
#         print(preds[:5])
        oof_df = pd.concat([oof_df, preds], axis=1)
    #     oof_df = pd.concat([oof_df, pd.Series(np.load(TRAIN_PATH / train_oof), name=name)], axis=1)

    for name, test_pred_fname in test_pred_dict.items():
        fname = "../working/" + test_pred_fname
        print(f"{name}, {test_pred_fname}")
        df = pd.read_csv(fname)
        print(df.head())
        preds = pd.Series(df.iloc[:,1], name=name)
        test_preds_df = pd.concat([test_preds_df, preds], axis=1)

    print("=== oof ===")
    print(oof_df.head())
    print("=== test_preds ===")
    print(test_preds_df.head())
    return oof_df, test_preds_df
    
# (oof_df, preds_df) = blend_results(train_oof_dict, test_pred_dict)    

In [55]:
def load_oof_results(train_oof_dict, test_pred_dict):
    oof_df = pd.DataFrame()
    test_preds_df = pd.DataFrame()

    for name, train_oof_fname in train_oof_dict.items():
        fname = "../working/" + train_oof_fname
        print(f"Processing {name}, {train_oof_fname}")
        df = pd.read_csv(fname)
        print(df.head())
#         print(df.iloc[:,1])
        preds = pd.Series(df.iloc[:,1], name=name)
#         print(preds[:5])
        oof_df = pd.concat([oof_df, preds], axis=1)
    #     oof_df = pd.concat([oof_df, pd.Series(np.load(TRAIN_PATH / train_oof), name=name)], axis=1)

    for name, test_pred_fname in test_pred_dict.items():
        fname = "../working/" + test_pred_fname
        print(f"{name}, {test_pred_fname}")
        df = pd.read_csv(fname)
        print(df.head())
        preds = pd.Series(df.iloc[:,1], name=name)
        test_preds_df = pd.concat([test_preds_df, preds], axis=1)

    print("=== oof ===")
    print(oof_df.head())
    print("=== test_preds ===")
    print(test_preds_df.head())
    return oof_df, test_preds_df
    
(oof_df, preds_df) = load_oof_results(train_oof_dict, test_pred_dict) 

Processing train_pred_cat1, train_pred_cat1.csv
    id    pred_cat1
0  0.0  13892.14899
1  1.0  12362.21555
2  2.0   2913.91113
3  3.0    705.66616
4  4.0  15079.71822
Processing train_pred_cat2, train_pred_cat2.csv
    id    pred_cat2
0  0.0  13344.28006
1  1.0  12542.93730
2  2.0   2826.30733
3  3.0    765.32848
4  4.0  14471.47673
Processing train_pred_lgbm1, train_pred_lgbm1.csv
    id   pred_lgbm1
0  0.0  13434.91358
1  1.0  12224.46144
2  2.0   2811.78792
3  3.0    698.31991
4  4.0  14824.09103
Processing train_pred_lgbm2, train_pred_lgbm2.csv
    id   pred_lgbm2
0  0.0  12399.22458
1  1.0  13781.01161
2  2.0   2667.72767
3  3.0    811.73983
4  4.0  14161.82557
Processing train_pred_xgb1, train_pred_xgb1.csv
    id   pred_xgb1
0  0.0  13894.9400
1  1.0  12555.0950
2  2.0   2880.6978
3  3.0    712.0185
4  4.0  14758.4920
submission_cat1, submission_cat1.csv
       id       price
0  193573   849.66503
1  193574  2428.91431
2  193575  2249.38889
3  193576   841.44512
4  193577  5689

In [56]:
oof_df.head()

Unnamed: 0,train_pred_cat1,train_pred_cat2,train_pred_lgbm1,train_pred_lgbm2,train_pred_xgb1
0,13892.14899,13344.28006,13434.91358,12399.22458,13894.94
1,12362.21555,12542.9373,12224.46144,13781.01161,12555.095
2,2913.91113,2826.30733,2811.78792,2667.72767,2880.6978
3,705.66616,765.32848,698.31991,811.73983,712.0185
4,15079.71822,14471.47673,14824.09103,14161.82557,14758.492


In [57]:
preds_df.head()

Unnamed: 0,submission_cat1,submission_cat2,submission_lgbm1,submission_lgbm2,submission_xgb1
0,849.66503,893.66363,831.73106,849.0311,863.3942
1,2428.91431,2528.77597,2455.03421,2628.9233,2453.5088
2,2249.38889,2313.05169,2282.05047,2455.92987,2277.795
3,841.44512,843.07182,844.59355,825.05498,829.64874
4,5689.12993,5841.71126,5545.02071,5723.92561,5604.961


In [58]:
type(preds_df)

pandas.core.frame.DataFrame

In [59]:
def run_lr(useful_features:List[str], TARGET:str, train_df:pd.DataFrame, test_df:pd.DataFrame) -> (List[float],List[float]):
    final_predictions = []
    scores = []

    kfold = model_selection.KFold(n_splits=Config.N_FOLDS, shuffle=True, random_state=Config.seed)

    for fold, (train_idx, valid_idx) in enumerate(kfold.split(train_df)):
        xtrain = train_df.iloc[train_idx].reset_index(drop=True)
        xvalid = train_df.iloc[valid_idx].reset_index(drop=True)

        xtest = test_df[useful_features].copy()

        ytrain = xtrain[TARGET]
        yvalid = xvalid[TARGET]

        xtrain = xtrain[useful_features]
        xvalid = xvalid[useful_features]

#         model = LogisticRegression()
        model = linear_model.LinearRegression()
        # Smaller C means more regularization; default=1.0
        # 2947.0517025518097
#         model = LogisticRegression(max_iter=500, C=2947.0517025518097, penalty='l2',solver='newton-cg')
#         model = LogisticRegression(C = 2947.0517025518097,
#                         max_iter = 500,
#                         penalty = 'l2',
#                         solver = 'liblinear')
        model.fit(xtrain, ytrain)

        preds_valid = model.predict_proba(xvalid)[:,-1]
        test_preds = model.predict_proba(xtest)[:,-1]

        final_predictions.append(test_preds)
#         score = metrics.roc_auc_score(yvalid, preds_valid)
        score = metrics.mean_squared_error(yvalid, preds_valid, squared=False)
        print(f"Fold={fold}, Score={score}")
        scores.append(score)
    return scores, final_predictions


In [60]:
# useful_features = ["pred_lda", "pred_gbc","pred_gbc2", "pred_cat_bp", "pred_cat1", "pred_lgbm1", "pred_lgbm2", "pred_lgbm_bp", "pred_xgb1", "pred_xgb_bp"]
useful_features = [ "train_pred_cat1", "train_pred_cat2", "train_pred_lgbm1", "train_pred_lgbm2", "train_pred_xgb1"]

In [61]:
oof_df[useful_features].head()

Unnamed: 0,train_pred_cat1,train_pred_cat2,train_pred_lgbm1,train_pred_lgbm2,train_pred_xgb1
0,13892.14899,13344.28006,13434.91358,12399.22458,13894.94
1,12362.21555,12542.9373,12224.46144,13781.01161,12555.095
2,2913.91113,2826.30733,2811.78792,2667.72767,2880.6978
3,705.66616,765.32848,698.31991,811.73983,712.0185
4,15079.71822,14471.47673,14824.09103,14161.82557,14758.492


In [62]:
# preds_df[useful_features].head()

In [63]:
# fold_scores, final_predictions = run_lr(useful_features, TARGET, oof_df, preds_df)
# test_preds = np.mean(np.column_stack(final_predictions), axis=1)
# cv_score, std_dev = show_fold_scores(fold_scores)
# create_submission("level1_lr", TARGET, test_preds)

In [64]:
pd.options.display.max_colwidth = 100
pd.set_option("display.max_rows", 999)
pd.set_option("display.precision", 5)
pd.options.display.float_format = '{:.2f}'.format
pd.options.display.max_colwidth

100

In [65]:
all_cv_scores.sort_values(by=["Score"], ascending=False)

Unnamed: 0,Model,Score,StdDev,RunTime
9,lasso,642.51,2.72,41.04
10,ridge,642.39,2.66,25.66
11,ridge_50,642.38,2.66,25.64
5,lgbm2,426.96,3.16,35.5
8,cat2,318.78,2.04,113.04
3,lgbm0,301.25,2.73,39.89
1,xgb1,300.82,3.2,138.2
6,lgbm3,297.26,2.89,56.52
4,lgbm1,297.25,2.9,64.91
7,cat1,296.03,2.29,150.71
