# Feature Engineering

These techniques are adapted from the ones covered in the [Feature Engineering](https://www.kaggle.com/learn/feature-engineering) course on Kaggle.

In [1]:
# Global variables for testing changes to this notebook quickly
FOLD_SEED = 0
NUM_FOLDS = 3
EARLY_STOP = 50
TRIALS = 100

In [2]:
# Essentials
import os
import warnings
import numpy as np
import pandas as pd
import time

# Preprocessing
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
from sklearn.feature_selection import mutual_info_regression
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import StratifiedKFold, KFold
from functools import partial 
from sklearn.impute import SimpleImputer, KNNImputer
from category_encoders import OrdinalEncoder, OneHotEncoder

# Feature Engineering
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from category_encoders import MEstimateEncoder

# Models
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

# Hyperparameter Tuning
import optuna
from optuna.samplers import GridSampler
from optuna.visualization import plot_parallel_coordinate

# Mute warnings
warnings.filterwarnings('ignore')

# Create Folds and Preprocessing

In [3]:
# Load the training data
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")

# Remove rows with missing target
train.dropna(axis=0, subset=['SalePrice'], inplace=True)

# Clean data, static transformations
def clean_data(*data):
    for df in data:
        # fix typos to match documentation
        df['MSZoning'] =  df['MSZoning'].replace({'C (all)': 'C'})
        df["Exterior2nd"] = df["Exterior2nd"].replace({"Brk Cmn":"BrkComm","Wd Shng": "WdShing"})
        df['Neighborhood'] = df['Neighborhood'].replace({'NAmes':'Names'})

        # Some values of GarageYrBlt are corrupt, replace them with YearBuilt
        df["GarageYrBlt"] = df["GarageYrBlt"].where(df.GarageYrBlt <= 2010, df.YearBuilt)
        
        # optional feature: A - agriculture, C - commercial, R - residential, I - industrial
        df["MSClass"] = df['MSZoning'].map({'A': 'A','C': 'C',"FV": 'R','I': 'I',
                                            "RH": 'R',"RL": 'R',"RP": 'R',"RM": 'R', np.nan:np.nan})
    return data
    
train, test = clean_data(train, test)

# List of categorical/numerical columns
columns = [col for col in test.columns if col not in ["Id","MSClass"]]
object_cols = [col for col in columns if train[col].dtype == "object"]
number_cols = [col for col in columns if train[col].dtype != "object"]

# Define bins
binner = KBinsDiscretizer(n_bins = 45, encode = 'ordinal', strategy = 'uniform')
y_bins = binner.fit_transform(pd.DataFrame(data=train['SalePrice']))

# Define folds
train["kfold"] = -1
kf = StratifiedKFold(NUM_FOLDS, shuffle = True, random_state = FOLD_SEED) 
for fold, (train_idx, valid_idx) in enumerate(kf.split(train, y_bins)):
    train.loc[valid_idx,"kfold"] = fold

In [4]:
def preprocessing(X_train, X_valid, X_test):
    
    # 1. impute numerical data
    columns = [col for col in X_train.columns if X_train[col].dtype != "object"]
    if columns:
        imputer = SimpleImputer(strategy='mean')
        X_train[columns] = imputer.fit_transform(X_train[columns])
        X_valid[columns] = imputer.transform(X_valid[columns])
        X_test[columns] = imputer.transform(X_test[columns])
    
    # 2. impute categorical data
    columns = [col for col in X_train.columns if X_train[col].dtype == "object"]
    if columns:
        imputer = SimpleImputer(strategy='constant', fill_value = 'None')
        X_train[columns] = imputer.fit_transform(X_train[columns])
        X_valid[columns] = imputer.transform(X_valid[columns])
        X_test[columns] = imputer.transform(X_test[columns])
    
    # 3. encode 1-10 ratings
    cols = ["OverallQual","OverallCond"]
    cols = [x for x in cols if x in X_train.columns]
    ratings = {float(a):b for b,a in enumerate(range(1,11))}
    mapping = [{'col':x, 'mapping': ratings} for x in cols]
    
    encoder = OrdinalEncoder(cols = cols, mapping = mapping)
    X_train = encoder.fit_transform(X_train)
    X_valid = encoder.transform(X_valid)
    X_test = encoder.transform(X_test)
    
    # 4. encode Poor, Fair, Avg, Good, Ex ratings
    cols = ["ExterQual","ExterCond","BsmtQual","BsmtCond","HeatingQC", "KitchenQual","FireplaceQu","GarageQual","GarageCond",'PoolQC']
    cols = [x for x in cols if x in X_train.columns]
    ratings = {"Po":0, "Fa":1, "TA":2, "Gd":3, "Ex":4}
    mapping = [{'col':x, 'mapping': ratings} for x in cols]
    
    encoder = OrdinalEncoder(cols = cols, mapping = mapping)
    X_train = encoder.fit_transform(X_train)
    X_valid = encoder.transform(X_valid)
    X_test = encoder.transform(X_test)
    
    # 5. encode remaining ordinal data
    cols = ["LotShape","LandSlope","BsmtExposure","BsmtFinType1","BsmtFinType2",
    "Functional","GarageFinish","PavedDrive","Utilities","CentralAir","Electrical",
    "Fence"]
    cols = [x for x in cols if x in X_train.columns]
    mapping = [{'col':"LotShape",
                'mapping': {"Reg":0, "IR1":1, "IR2":2, "IR3":3}},
               {'col':"LandSlope",
                'mapping': {"Sev":0, "Mod":1, "Gtl":2}},
               {'col':"BsmtExposure",
                'mapping': {"No":0, "Mn":1, "Av":2, "Gd":3}},
               {'col':"BsmtFinType1",
                'mapping': {"Unf":0, "LwQ":1, "Rec":2, "BLQ":3, "ALQ":4, "GLQ":5}},
               {'col':"BsmtFinType2",
                'mapping': {"Unf":0, "LwQ":1, "Rec":2, "BLQ":3, "ALQ":4, "GLQ":5}},
               {'col':"Functional",
                'mapping': {"Sal":0, "Sev":1, "Maj1":2, "Maj2":3, "Mod":4, "Min2":5, "Min1":6, "Typ":7}},
               {'col':"GarageFinish",
                'mapping': {"Unf":0, "RFn":1, "Fin":2}},
               {'col':"PavedDrive",
                'mapping': {"N":0, "P":1, "Y":2}},
               {'col':"Utilities",
                'mapping': {"NoSeWa":0, "NoSewr":1, "AllPub":2}},
               {'col':"CentralAir",
                'mapping': {"N":0, "Y":1}},
               {'col':"Electrical",
                'mapping': {"Mix":0, "FuseP":1, "FuseF":2, "FuseA":3, "SBrkr":4}},
               {'col':"Fence",
                'mapping': {"MnWw":0, "GdWo":1, "MnPrv":2, "GdPrv":3}}]
    mapping = [x for x in mapping if x['col'] in X_train.columns]
    
    encoder = OrdinalEncoder(cols = cols, mapping = mapping)
    X_train = encoder.fit_transform(X_train)
    X_valid = encoder.transform(X_valid)
    X_test = encoder.transform(X_test)
    
    # Determine cardinality of remaining categorical data
    columns = [col for col in X_train.columns if X_train[col].dtype == 'object']
    high_cols = [col for col in columns if X_train[col].nunique() >= 10]
    low_cols = [col for col in columns if X_train[col].nunique() < 10]
    
    # 6. ordinal encode high cardinality data
    if high_cols:
        encoder = OrdinalEncoder(cols = high_cols)
        X_train = encoder.fit_transform(X_train)
        X_valid = encoder.transform(X_valid)
        X_test = encoder.transform(X_test)
    
    # 7. one-hot encode low cardinality data
    if low_cols:
        encoder = OneHotEncoder(cols = low_cols, use_cat_names = True)
        X_train = encoder.fit_transform(X_train)
        X_valid = encoder.transform(X_valid)
        X_test = encoder.transform(X_test)
        
    return X_train, X_valid, X_test

# Scoring Functions

In [5]:
def score_xgboost(transforms = [preprocessing], params = {}, cols = columns, verbose = True):
    start = time.time()
    
    X = train.copy()
    scores = np.zeros(NUM_FOLDS)
    
    for i in range(NUM_FOLDS):
        X_train = X[X.kfold != i][cols].reset_index(drop=True)
        X_valid = X[X.kfold == i][cols].reset_index(drop=True)
        y_train = X[X.kfold != i]['SalePrice'].reset_index(drop=True)
        y_valid = X[X.kfold == i]['SalePrice'].reset_index(drop=True)
        X_test = test[cols].copy()
        
        # loop for applying the transformations
        for transform in transforms:
            try:
                X_train, X_valid, X_test = transform(X_train, X_valid, X_test, y_train = y_train)
            except:
                X_train, X_valid, X_test = transform(X_train, X_valid, X_test)
        
        model = XGBRegressor(**{**{'random_state':0, 'n_estimators': 3000},**params})
        model.fit(X_train, y_train,
                  verbose = False,
                  eval_set = [(X_valid, y_valid)],
                  eval_metric = "mae",
                  early_stopping_rounds = EARLY_STOP)

        preds = model.predict(X_valid)
        scores[i] = mean_absolute_error(y_valid, preds)
    end = time.time()
    if verbose:
        print("XGBoost  (3-fold Avg):", 
              round(scores.mean(), 4))
        print("XGBoost  (3-fold Max):", 
              round(scores.max(), 4), "\t",
              str(round(end-start, 3))+"s")

    return round(scores.mean(), 4), round(scores.max(), 4)

In [6]:
def score_lightgbm(transforms = [preprocessing], params = {}, cols = columns, verbose = True):
    start = time.time()
    
    X = train.copy()
    scores = np.zeros(NUM_FOLDS)
    
    for i in range(NUM_FOLDS):
        X_train = X[X.kfold != i][cols].reset_index(drop=True)
        X_valid = X[X.kfold == i][cols].reset_index(drop=True)
        y_train = X[X.kfold != i]['SalePrice'].reset_index(drop=True)
        y_valid = X[X.kfold == i]['SalePrice'].reset_index(drop=True)
        X_test = test[cols].copy()
        
        for transform in transforms:
            try:
                X_train, X_valid, X_test = transform(X_train, X_valid, X_test, y_train = y_train)
            except:
                X_train, X_valid, X_test = transform(X_train, X_valid, X_test)
                
        cat_cols = [x for x in X_train.columns if x in object_cols]
                
        model = LGBMRegressor(**{**{'random_state':0, 'n_estimators': 3000},**params})
        model.fit(X_train, y_train,
                  verbose = False,
                  eval_set = [(X_valid, y_valid)],
                  eval_metric = "mae",
                  categorical_feature = cat_cols,
                  early_stopping_rounds = EARLY_STOP)

        valid_preds = model.predict(X_valid)
        scores[i] = mean_absolute_error(y_valid, valid_preds)
    end = time.time()
    if verbose:
        print("LightGBM (3-fold Avg):", 
              round(scores.mean(), 4))
        print("LightGBM (3-fold Max):", 
              round(scores.max(), 4), "\t",
              str(round(end-start, 3))+"s")

    return round(scores.mean(), 4), round(scores.max(), 4)

In [7]:
def score_catboost(transforms = [preprocessing], params = {}, cols = columns, verbose = True):
    start = time.time()
    
    X = train.copy()
    scores = np.zeros(NUM_FOLDS)
    
    for i in range(NUM_FOLDS):
        X_train = X[X.kfold != i][cols].reset_index(drop=True)
        X_valid = X[X.kfold == i][cols].reset_index(drop=True)
        y_train = X[X.kfold != i]['SalePrice'].reset_index(drop=True)
        y_valid = X[X.kfold == i]['SalePrice'].reset_index(drop=True)
        X_test = test[cols].copy()
        
        # loop for applying the transformations
        for transform in transforms:
            try:
                X_train, X_valid, X_test = transform(X_train, X_valid, X_test, y_train = y_train)
            except:
                X_train, X_valid, X_test = transform(X_train, X_valid, X_test)
        
        model = CatBoostRegressor(**{**{'random_state':0, 
                                        'n_estimators': 3000,
                                        'eval_metric':"MAE",
                                        'early_stopping_rounds': EARLY_STOP,
                                        'verbose': False}, **params})
        model.fit(X_train, y_train,
                  eval_set = (X_valid, y_valid),
                  use_best_model=True)

        valid_preds = model.predict(X_valid)
        scores[i] = mean_absolute_error(y_valid, valid_preds)
    end = time.time()
    if verbose:
        print("CatBoost (3-fold Avg):", 
              round(scores.mean(), 4))
        print("CatBoost (3-fold Max):", 
              round(scores.max(), 4), "\t",
              str(round(end-start, 3))+"s")

    return round(scores.mean(), 4), round(scores.max(), 4)

In [8]:
def get_baseline():
    
    print("\nBaseline\n")
    xgb_avg, xgb_max = score_xgboost()
    lgbm_avg, lgbm_max = score_lightgbm()
    cat_avg, cat_max = score_catboost()
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
    
    return round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4), round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4)
    
BASELINE_AVG, BASELINE_MAX = get_baseline()


Baseline

XGBoost  (3-fold Avg): 17818.5785
XGBoost  (3-fold Max): 18420.5786 	 2.004s
LightGBM (3-fold Avg): 16481.3275
LightGBM (3-fold Max): 17172.4876 	 1.665s
CatBoost (3-fold Avg): 15152.8277
CatBoost (3-fold Max): 16049.8435 	 12.3s

Overall (Avg):         16484.2446
Overall (Max):         17818.5785


# Mutual Information

In [9]:
def remove_uninformative(X_train, X_valid, X_test, y_train, verbose = False):
    
    # 1. Determine uninformative columns
    scores =  mutual_info_regression(X_train, y_train)
    cols = [x for i, x in enumerate(X_train.columns) if scores[i] == 0]
    
    # 2. Drop the uninformative columns
    X_train.drop(cols, axis = 1, inplace = True)
    X_valid.drop(cols, axis = 1, inplace = True)
    X_test.drop(cols, axis = 1, inplace = True)
    
    if verbose:
        print("Dropped columns:", *cols)
    
    return X_train, X_valid, X_test

def test_uninformative():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)
    
    print("\nDrop Uninformative\n")
    transforms = [preprocessing, remove_uninformative]
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
    
test_uninformative()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Drop Uninformative

XGBoost  (3-fold Avg): 17908.9726
XGBoost  (3-fold Max): 18301.5848 	 4.426s
LightGBM (3-fold Avg): 16625.3983
LightGBM (3-fold Max): 17315.2156 	 3.986s
CatBoost (3-fold Avg): 15054.8003
CatBoost (3-fold Max): 15586.0086 	 16.549s

Overall (Avg):         16529.7237
Overall (Max):         17908.9726


# Mathematical Transformations

In [10]:
def mathematical_transformations(X_train, X_valid, X_test):
    
    X_train["LivLotRatio"] = X_train["GrLivArea"] / X_train["LotArea"]
    X_valid["LivLotRatio"] = X_valid["GrLivArea"] / X_valid["LotArea"]
    X_test["LivLotRatio"] = X_test["GrLivArea"] / X_test["LotArea"]
    
    X_train["Spaciousness"] = (X_train["1stFlrSF"]+X_train["2ndFlrSF"]) / X_train["TotRmsAbvGrd"]
    X_valid["Spaciousness"] = (X_valid["1stFlrSF"]+X_valid["2ndFlrSF"]) / X_valid["TotRmsAbvGrd"]
    X_test["Spaciousness"] = (X_test["1stFlrSF"]+X_test["2ndFlrSF"]) / X_test["TotRmsAbvGrd"]
    
    X_train["TotalOutsideSF"] = X_train["WoodDeckSF"] + X_train["OpenPorchSF"] + X_train["EnclosedPorch"] + X_train["3SsnPorch"] + X_train["ScreenPorch"]
    X_valid["TotalOutsideSF"] = X_valid["WoodDeckSF"] + X_valid["OpenPorchSF"] + X_valid["EnclosedPorch"] + X_valid["3SsnPorch"] + X_valid["ScreenPorch"]
    X_test["TotalOutsideSF"] = X_test["WoodDeckSF"] + X_test["OpenPorchSF"] + X_test["EnclosedPorch"] + X_test["3SsnPorch"] + X_test["ScreenPorch"]
    
    X_train['TotalLot'] = X_train['LotFrontage'] + X_train['LotArea']
    X_valid['TotalLot'] = X_valid['LotFrontage'] + X_valid['LotArea']
    X_test['TotalLot'] = X_test['LotFrontage'] + X_test['LotArea']
    
    X_train['TotalBsmtFin'] = X_train['BsmtFinSF1'] + X_train['BsmtFinSF2']
    X_valid['TotalBsmtFin'] = X_valid['BsmtFinSF1'] + X_valid['BsmtFinSF2']
    X_test['TotalBsmtFin'] = X_test['BsmtFinSF1'] + X_test['BsmtFinSF2']
    
    X_train['TotalSF'] = X_train['TotalBsmtSF'] + X_train['2ndFlrSF'] + X_train['1stFlrSF']
    X_valid['TotalSF'] = X_valid['TotalBsmtSF'] + X_valid['2ndFlrSF'] + X_valid['1stFlrSF']
    X_test['TotalSF'] = X_test['TotalBsmtSF'] + X_test['2ndFlrSF'] + X_test['1stFlrSF']
    
    X_train['TotalBath'] = X_train['FullBath'] + X_train['HalfBath'] * 0.5 + X_train['BsmtFullBath'] + X_train['BsmtHalfBath'] * 0.5
    X_valid['TotalBath'] = X_valid['FullBath'] + X_valid['HalfBath'] * 0.5 + X_valid['BsmtFullBath'] + X_valid['BsmtHalfBath'] * 0.5
    X_test['TotalBath'] = X_test['FullBath'] + X_test['HalfBath'] * 0.5 + X_test['BsmtFullBath'] + X_test['BsmtHalfBath'] * 0.5
    
    X_train['TotalPorch'] = X_train['OpenPorchSF'] + X_train['EnclosedPorch'] + X_train['ScreenPorch'] + X_train['WoodDeckSF']
    X_valid['TotalPorch'] = X_valid['OpenPorchSF'] + X_valid['EnclosedPorch'] + X_valid['ScreenPorch'] + X_valid['WoodDeckSF']
    X_test['TotalPorch'] = X_test['OpenPorchSF'] + X_test['EnclosedPorch'] + X_test['ScreenPorch'] + X_test['WoodDeckSF']
    
    return X_train, X_valid, X_test

def test_transformations():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)
    
    print("\nMathematical Transformations\n")
    transforms = [preprocessing, mathematical_transformations]
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
    
test_transformations()
    


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Mathematical Transformations

XGBoost  (3-fold Avg): 17238.8075
XGBoost  (3-fold Max): 17584.7709 	 1.975s
LightGBM (3-fold Avg): 16471.4805
LightGBM (3-fold Max): 17043.6677 	 1.793s
CatBoost (3-fold Avg): 14850.5287
CatBoost (3-fold Max): 15603.5834 	 12.814s

Overall (Avg):         16186.9389
Overall (Max):         17238.8075


# Encoding Interactions

In [11]:
def encode_interaction(X_train, X_valid, X_test, cat_col = "BldgType", num_col = "GrLivArea"):

    X_1 = pd.get_dummies(X_train[cat_col], prefix=cat_col)
    X_2 = pd.get_dummies(X_valid[cat_col], prefix=cat_col)
    X_3 = pd.get_dummies(X_test[cat_col], prefix=cat_col)
        
    for col in X_1.columns:
        X_train[col+"_"+num_col] = X_1[col]*X_train[num_col]
        X_valid[col+"_"+num_col] = X_2[col]*X_valid[num_col]
        X_test[col+"_"+num_col] = X_3[col]*X_test[num_col]
    
    return X_train, X_valid, X_test

def test_interaction():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)  
    
    transforms = [encode_interaction, preprocessing]
    print("\nInteraction Term:\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
    
test_interaction()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Interaction Term:

XGBoost  (3-fold Avg): 18108.8183
XGBoost  (3-fold Max): 18803.2203 	 2.649s
LightGBM (3-fold Avg): 16724.7195
LightGBM (3-fold Max): 17621.4513 	 1.755s
CatBoost (3-fold Avg): 15163.728
CatBoost (3-fold Max): 15694.709 	 11.808s

Overall (Avg):         16665.7553
Overall (Max):         18108.8183


# Count Features

In [12]:
def count_porch_types(X_train, X_valid, X_test):
    
    X_train["PorchTypes"] = X_train[["WoodDeckSF","OpenPorchSF","EnclosedPorch","3SsnPorch","ScreenPorch"]].gt(0).sum(axis=1)
    X_valid["PorchTypes"] = X_valid[["WoodDeckSF","OpenPorchSF","EnclosedPorch","3SsnPorch","ScreenPorch"]].gt(0).sum(axis=1)
    X_test["PorchTypes"] = X_test[["WoodDeckSF","OpenPorchSF","EnclosedPorch","3SsnPorch","ScreenPorch"]].gt(0).sum(axis=1)
        
    return X_train, X_valid, X_test

def test_count_feature():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)
    
    transforms = [preprocessing, count_porch_types]
    print("\nCount Feature:\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
      
test_count_feature()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Count Feature:

XGBoost  (3-fold Avg): 17770.3514
XGBoost  (3-fold Max): 18389.5722 	 2.03s
LightGBM (3-fold Avg): 16577.4662
LightGBM (3-fold Max): 17139.9331 	 1.609s
CatBoost (3-fold Avg): 15150.8127
CatBoost (3-fold Max): 15669.522 	 11.695s

Overall (Avg):         16499.5434
Overall (Max):         17770.3514


# Break Down Feature

In [13]:
def test_breakdown():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)
    
    print("\nBreak Down:\n")
    xgb_avg, xgb_max = score_xgboost(cols = columns + ["MSClass"])
    lgbm_avg, lgbm_max = score_lightgbm(cols = columns + ["MSClass"])
    cat_avg, cat_max = score_catboost(cols = columns + ["MSClass"])
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
      
    
test_breakdown()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Break Down:

XGBoost  (3-fold Avg): 17818.5785
XGBoost  (3-fold Max): 18420.5786 	 2.098s
LightGBM (3-fold Avg): 16481.3275
LightGBM (3-fold Max): 17172.4876 	 1.683s
CatBoost (3-fold Avg): 15156.8618
CatBoost (3-fold Max): 15920.7323 	 12.108s

Overall (Avg):         16485.5893
Overall (Max):         17818.5785


# Grouped Transform

In [14]:
def group_transformation(X_train, X_valid, X_test):
    
    X_train["MedNhbdLvArea"] = X_train.groupby("Neighborhood")["GrLivArea"].transform('median')
    
    # we use the medians from the training data to impute the test data
    mapping = dict()
    for x,y in zip(X_train["MedNhbdLvArea"].iteritems(), X_train['Neighborhood'].iteritems()):
        _,median_area = x
        _,nbhr = y
        if nbhr not in mapping: mapping[nbhr] = median_area
    
    X_valid["MedNhbdLvArea"] = X_valid['Neighborhood'].map(mapping)
    X_test["MedNhbdLvArea"] = X_test['Neighborhood'].map(mapping)
    
    return X_train, X_valid, X_test

def test_group():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)
    
    transforms = [preprocessing, group_transformation]
    print("\nGroup Transformation:\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
      
    
test_group()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Group Transformation:

XGBoost  (3-fold Avg): 17904.3209
XGBoost  (3-fold Max): 19005.0376 	 2.013s
LightGBM (3-fold Avg): 16662.3771
LightGBM (3-fold Max): 17680.7477 	 1.713s
CatBoost (3-fold Avg): 14713.3128
CatBoost (3-fold Max): 15112.7124 	 10.748s

Overall (Avg):         16426.6703
Overall (Max):         17904.3209


# Clustering (Labels)

In [15]:
def generate_cluster_labels(X_train, X_valid, X_test, name = "Area", features = ['LotArea', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF','GrLivArea']):
    
    # 1. normalize based on training data
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_train[features])
    X_valid_scaled = scaler.transform(X_valid[features])
    X_test_scaled = scaler.transform(X_test[features])
    
    # 2. create cluster labels (use predict)
    kmeans = KMeans(n_clusters = 10, n_init = 10, random_state=0)
    X_train[name + "_Cluster"] = kmeans.fit_predict(X_scaled)
    X_valid[name + "_Cluster"] = kmeans.predict(X_valid_scaled)
    X_test[name + "_Cluster"] = kmeans.predict(X_test_scaled)
         
    return X_train, X_valid, X_test

def test_cluster_labels():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)
    
    transforms = [preprocessing, generate_cluster_labels]
    print("\nCluster Labels:\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
     
test_cluster_labels()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Cluster Labels:

XGBoost  (3-fold Avg): 18035.7252
XGBoost  (3-fold Max): 18523.0575 	 2.765s
LightGBM (3-fold Avg): 16448.1335
LightGBM (3-fold Max): 16955.8606 	 1.816s
CatBoost (3-fold Avg): 14997.3111
CatBoost (3-fold Max): 15618.4954 	 10.332s

Overall (Avg):         16493.7233
Overall (Max):         18035.7252


# Clustering (Distances)

In [16]:
def generate_cluster_distances(X_train, X_valid, X_test, name = "Area", features = ['LotArea', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF','GrLivArea']):
    
    # 1. normalize based on training data
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_train[features])
    X_valid_scaled = scaler.transform(X_valid[features])
    X_test_scaled = scaler.transform(X_test[features])
    
    # 2. generate cluster distances (use transform)
    kmeans = KMeans(n_clusters = 10, n_init = 10, random_state=0)
    X_cd = kmeans.fit_transform(X_scaled)
    X_valid_cd = kmeans.transform(X_valid_scaled)
    X_test_cd = kmeans.transform(X_test_scaled)
    
    # 3. column labels
    X_cd = pd.DataFrame(X_cd, columns=[name + "_Centroid_" + str(i) for i in range(X_cd.shape[1])])
    X_valid_cd = pd.DataFrame(X_valid_cd, columns=[name + "_Centroid_" + str(i) for i in range(X_valid_cd.shape[1])])
    X_test_cd = pd.DataFrame(X_test_cd, columns=[name + "_Centroid_" + str(i) for i in range(X_test_cd.shape[1])])    
    
    return X_train.join(X_cd), X_valid.join(X_valid_cd), X_test.join(X_test_cd)

def test_cluster_distances():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)
    
    transforms = [preprocessing, generate_cluster_distances]
    print("\nCluster Distances:\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
     
    
test_cluster_distances()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Cluster Distances:

XGBoost  (3-fold Avg): 17902.8628
XGBoost  (3-fold Max): 18883.7932 	 2.393s
LightGBM (3-fold Avg): 16404.7344
LightGBM (3-fold Max): 16685.1865 	 2.077s
CatBoost (3-fold Avg): 14736.1507
CatBoost (3-fold Max): 15103.5523 	 11.909s

Overall (Avg):         16347.916
Overall (Max):         17902.8628


# Principal Component Analysis

In [17]:
# Performs PCA on the whole dataframe
def pca_transform(X_train, X_valid, X_test, 
                  features = ["GarageArea","YearRemodAdd","TotalBsmtSF","GrLivArea"], 
                  n_components = 2):
    
    # Normalize based on training data
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_train[features])
    X_valid_scaled = scaler.transform(X_valid[features])
    X_test_scaled = scaler.transform(X_test[features])
    
    # Create principal components
    pca = PCA(n_components)
    X_pca = pca.fit_transform(X_scaled)
    X_valid_pca = pca.transform(X_valid_scaled)
    X_test_pca = pca.transform(X_test_scaled)
    
    # Convert to dataframe
    component_names = [f"PC{i+1}" for i in range(X_pca.shape[1])]
    X_pca = pd.DataFrame(X_pca, columns=component_names)
    X_valid_pca = pd.DataFrame(X_valid_pca, columns=component_names)
    X_test_pca = pd.DataFrame(X_test_pca, columns=component_names)
    
    return X_train.join(X_pca), X_valid.join(X_valid_pca), X_test.join(X_test_pca)

def test_pca_features():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)
    
    transforms = [preprocessing, partial(pca_transform, n_components = 1)]
    print("\nPCA (1 components):\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
   
    
    transforms = [preprocessing, partial(pca_transform, n_components = 2)]
    print("\nPCA (2 components):\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
   
    
    transforms = [preprocessing, partial(pca_transform, n_components = 3)]
    print("\nPCA (3 components):\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
   
    
test_pca_features()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

PCA (1 components):

XGBoost  (3-fold Avg): 17325.0409
XGBoost  (3-fold Max): 18070.6054 	 1.905s
LightGBM (3-fold Avg): 16144.3925
LightGBM (3-fold Max): 16545.4439 	 1.858s
CatBoost (3-fold Avg): 14844.5589
CatBoost (3-fold Max): 15355.6194 	 10.502s

Overall (Avg):         16104.6641
Overall (Max):         17325.0409

PCA (2 components):

XGBoost  (3-fold Avg): 17173.8727
XGBoost  (3-fold Max): 18136.1141 	 1.94s
LightGBM (3-fold Avg): 16096.8338
LightGBM (3-fold Max): 16434.0747 	 1.826s
CatBoost (3-fold Avg): 14811.3066
CatBoost (3-fold Max): 15259.5257 	 11.896s

Overall (Avg):         16027.3377
Overall (Max):         17173.8727

PCA (3 components):

XGBoost  (3-fold Avg): 17227.3685
XGBoost  (3-fold Max): 18312.4517 	 1.939s
LightGBM (3-fold Avg): 16300.9474
LightGBM (3-fold Max): 16790.1549 	 1.861s
CatBoost (3-fold Avg): 14897.8518
CatBoost (3-fold Max): 15515.0018 	 11.524s

Overall (Avg):       

# Target Encoding

In [18]:
class CrossFoldEncoder:
    def __init__(self, encoder, **kwargs):
        self.encoder_ = encoder
        self.kwargs_ = kwargs  # keyword arguments for the encoder
        self.cv_ = KFold(n_splits=5)

    # Fit an encoder on one split and transform the feature on the
    # other. Iterating over the splits in all folds gives a complete
    # transformation. We also now have one trained encoder on each
    # fold.
    def fit_transform(self, X, y, cols):
        self.fitted_encoders_ = []
        self.cols_ = cols
        X_encoded = []
        for idx_encode, idx_train in self.cv_.split(X):
            fitted_encoder = self.encoder_(cols=cols, **self.kwargs_)
            fitted_encoder.fit(
                X.iloc[idx_encode, :], y.iloc[idx_encode],
            )
            X_encoded.append(fitted_encoder.transform(X.iloc[idx_train, :])[cols])
            self.fitted_encoders_.append(fitted_encoder)
        X_encoded = pd.concat(X_encoded)
        X_encoded.columns = [name + "_encoded" for name in X_encoded.columns]
        return X_encoded

    # To transform the test data, average the encodings learned from
    # each fold.
    def transform(self, X):
        from functools import reduce

        X_encoded_list = []
        for fitted_encoder in self.fitted_encoders_:
            X_encoded = fitted_encoder.transform(X)
            X_encoded_list.append(X_encoded[self.cols_])
        X_encoded = reduce(
            lambda x, y: x.add(y, fill_value=0), X_encoded_list
        ) / len(X_encoded_list)
        X_encoded.columns = [name + "_encoded" for name in X_encoded.columns]
        return X_encoded

In [19]:
def encode_neighborhood(X_train, X_valid, X_test, y_train):
    encoder = CrossFoldEncoder(MEstimateEncoder, m=1)
    X1_train = encoder.fit_transform(X_train, y_train, cols=["Neighborhood"])
    X1_valid = encoder.transform(X_valid)
    X1_test = encoder.transform(X_test)
        
    return X_train.join(X1_train), X_valid.join(X1_valid), X_test.join(X1_test)

def test_neighborhood_encoding():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX) 
    
    transforms = [preprocessing, encode_neighborhood]
    print("\nTarget Encoding ('Neighborhood'):\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
   
    
test_neighborhood_encoding()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Target Encoding ('Neighborhood'):

XGBoost  (3-fold Avg): 17457.4707
XGBoost  (3-fold Max): 18292.309 	 2.043s
LightGBM (3-fold Avg): 16597.4962
LightGBM (3-fold Max): 17185.1397 	 1.921s
CatBoost (3-fold Avg): 14639.1022
CatBoost (3-fold Max): 14970.7286 	 10.842s

Overall (Avg):         16231.3564
Overall (Max):         17457.4707


In [20]:
def encode_subclass(X_train, X_valid, X_test, y_train):
    encoder = CrossFoldEncoder(MEstimateEncoder, m=1)
    X1_train = encoder.fit_transform(X_train, y_train, cols=["MSSubClass"])
    X1_valid = encoder.transform(X_valid)
    X1_test = encoder.transform(X_test)
        
    return X_train.join(X1_train), X_valid.join(X1_valid), X_test.join(X1_test)

def test_subclass_encoding():
    
    print("\nBaseline\n")
    print("Overall (Avg):".ljust(22), BASELINE_AVG)
    print("Overall (Max):".ljust(22), BASELINE_MAX)    
    
    transforms = [preprocessing, encode_subclass]
    print("\nTarget Encoding ('SubClass'):\n")
    xgb_avg, xgb_max = score_xgboost(transforms)
    lgbm_avg, lgbm_max = score_lightgbm(transforms)
    cat_avg, cat_max = score_catboost(transforms)
    print("\nOverall (Avg):".ljust(23), round(np.mean([xgb_avg, lgbm_avg, cat_avg]), 4))
    print("Overall (Max):".ljust(22),round(np.max([xgb_avg, lgbm_avg, cat_avg]), 4))
   
    
test_subclass_encoding()


Baseline

Overall (Avg):         16484.2446
Overall (Max):         17818.5785

Target Encoding ('SubClass'):

XGBoost  (3-fold Avg): 18093.2523
XGBoost  (3-fold Max): 18547.7723 	 2.261s
LightGBM (3-fold Avg): 16692.4652
LightGBM (3-fold Max): 17203.6731 	 1.963s
CatBoost (3-fold Avg): 15037.7819
CatBoost (3-fold Max): 15669.5137 	 10.8s

Overall (Avg):         16607.8331
Overall (Max):         18093.2523


# Test Strategies

We test combinations of all the promising strategies using optuna


In [21]:
def feature_search():
    def objective(trial):
        math = trial.suggest_categorical('math', [True, False])
        count = trial.suggest_categorical('count', [True, False])
        group = trial.suggest_categorical('group', [True, False])
        cluster_dist = trial.suggest_categorical('cluster_dist', [True, False])
        pca = trial.suggest_categorical('pca', [True, False])
        target = trial.suggest_categorical('target', [True, False])
        drop_last = trial.suggest_categorical('drop_last', [True, False])
        
        transforms = [preprocessing]
        
        if math:
            transforms.append(mathematical_transformations)
        if count:
            transforms.append(count_porch_types)
        if group:
            transforms.append(group_transformation)
        if cluster_dist:
            transforms.append(generate_cluster_distances)
        if pca:
            transforms.append(pca_transform)
        if target:
            transforms.append(encode_neighborhood)
        if drop_last:
            transforms.append(remove_uninformative)

        scores =  np.array([score_xgboost(transforms = transforms)[0],
                            score_lightgbm(transforms = transforms)[0],
                            score_catboost(transforms = transforms)[0],])
        time.sleep(1)
        return scores.mean()
    
    search_space = dict(math = [True, False],
                        count = [True, False],
                        group = [True, False],
                        cluster_dist = [True, False],
                        pca = [True, False],
                        target = [True, False],
                        drop_last = [True, False])
    optuna.logging.set_verbosity(optuna.logging.DEBUG)
    study = optuna.create_study(sampler = GridSampler(search_space), 
                                direction="minimize")
    study.optimize(objective, n_trials=2**len(search_space))
    return study

study = feature_search()

[32m[I 2021-09-07 19:19:35,334][0m A new study created in memory with name: no-name-c83d4d39-a1c8-4d81-afef-4e76cd8b8895[0m


XGBoost  (3-fold Avg): 17573.9972
XGBoost  (3-fold Max): 18235.9336 	 4.153s
LightGBM (3-fold Avg): 16089.2872
LightGBM (3-fold Max): 16498.7109 	 3.893s
CatBoost (3-fold Avg): 14904.1363
CatBoost (3-fold Max): 15453.2451 	 12.357s


[32m[I 2021-09-07 19:19:56,750][0m Trial 0 finished with value: 16189.140233333334 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 0 with value: 16189.140233333334.[0m


XGBoost  (3-fold Avg): 17579.3272
XGBoost  (3-fold Max): 18261.5149 	 4.726s
LightGBM (3-fold Avg): 16160.661
LightGBM (3-fold Max): 16761.8491 	 4.415s
CatBoost (3-fold Avg): 14637.9043
CatBoost (3-fold Max): 15191.5539 	 14.861s


[32m[I 2021-09-07 19:20:21,773][0m Trial 1 finished with value: 16125.964166666667 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 1 with value: 16125.964166666667.[0m


XGBoost  (3-fold Avg): 17494.2584
XGBoost  (3-fold Max): 18261.0356 	 2.507s
LightGBM (3-fold Avg): 16403.4604
LightGBM (3-fold Max): 16900.2917 	 2.292s
CatBoost (3-fold Avg): 14325.8916
CatBoost (3-fold Max): 14611.6419 	 10.203s


[32m[I 2021-09-07 19:20:37,787][0m Trial 2 finished with value: 16074.536800000002 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 2 with value: 16074.536800000002.[0m


XGBoost  (3-fold Avg): 17732.561
XGBoost  (3-fold Max): 18285.4702 	 4.448s
LightGBM (3-fold Avg): 16477.2731
LightGBM (3-fold Max): 16657.612 	 4.185s
CatBoost (3-fold Avg): 14705.1184
CatBoost (3-fold Max): 15046.4876 	 13.732s


[32m[I 2021-09-07 19:21:01,171][0m Trial 3 finished with value: 16304.984166666667 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 2 with value: 16074.536800000002.[0m


XGBoost  (3-fold Avg): 17201.3622
XGBoost  (3-fold Max): 18363.8773 	 2.079s
LightGBM (3-fold Avg): 16553.4289
LightGBM (3-fold Max): 17172.6484 	 1.817s
CatBoost (3-fold Avg): 14358.7316
CatBoost (3-fold Max): 14996.528 	 10.604s


[32m[I 2021-09-07 19:21:16,684][0m Trial 4 finished with value: 16037.840900000001 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 4 with value: 16037.840900000001.[0m


XGBoost  (3-fold Avg): 17305.2243
XGBoost  (3-fold Max): 17871.5301 	 1.812s
LightGBM (3-fold Avg): 16455.4277
LightGBM (3-fold Max): 17338.4018 	 1.637s
CatBoost (3-fold Avg): 14476.7801
CatBoost (3-fold Max): 15164.3212 	 9.919s


[32m[I 2021-09-07 19:21:31,071][0m Trial 5 finished with value: 16079.144033333336 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 4 with value: 16037.840900000001.[0m


XGBoost  (3-fold Avg): 16979.1497
XGBoost  (3-fold Max): 17982.1462 	 2.421s
LightGBM (3-fold Avg): 16130.2844
LightGBM (3-fold Max): 16972.4943 	 2.099s
CatBoost (3-fold Avg): 14534.0076
CatBoost (3-fold Max): 15102.6598 	 10.695s


[32m[I 2021-09-07 19:21:47,307][0m Trial 6 finished with value: 15881.147233333331 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17055.057
XGBoost  (3-fold Max): 17382.824 	 4.094s
LightGBM (3-fold Avg): 16670.2915
LightGBM (3-fold Max): 16954.3406 	 3.975s
CatBoost (3-fold Avg): 14540.2857
CatBoost (3-fold Max): 14867.7663 	 14.936s


[32m[I 2021-09-07 19:22:11,331][0m Trial 7 finished with value: 16088.544733333334 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17182.7315
XGBoost  (3-fold Max): 17843.2409 	 4.046s
LightGBM (3-fold Avg): 16466.9651
LightGBM (3-fold Max): 17186.4011 	 3.901s
CatBoost (3-fold Avg): 14515.3711
CatBoost (3-fold Max): 15030.7271 	 13.118s


[32m[I 2021-09-07 19:22:33,407][0m Trial 8 finished with value: 16055.022566666667 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17377.3754
XGBoost  (3-fold Max): 17487.3996 	 4.849s
LightGBM (3-fold Avg): 16497.031
LightGBM (3-fold Max): 16844.476 	 4.513s
CatBoost (3-fold Avg): 14275.2167
CatBoost (3-fold Max): 14766.5891 	 10.897s


[32m[I 2021-09-07 19:22:54,690][0m Trial 9 finished with value: 16049.874366666665 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17397.5811
XGBoost  (3-fold Max): 18679.5039 	 2.268s
LightGBM (3-fold Avg): 16463.7704
LightGBM (3-fold Max): 17226.0353 	 2.152s
CatBoost (3-fold Avg): 14300.8707
CatBoost (3-fold Max): 14767.2258 	 10.236s


[32m[I 2021-09-07 19:23:10,364][0m Trial 10 finished with value: 16054.074066666668 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17553.9371
XGBoost  (3-fold Max): 17976.5817 	 2.178s
LightGBM (3-fold Avg): 16126.8996
LightGBM (3-fold Max): 16786.669 	 2.014s
CatBoost (3-fold Avg): 14367.8879
CatBoost (3-fold Max): 14887.773 	 10.431s


[32m[I 2021-09-07 19:23:26,006][0m Trial 11 finished with value: 16016.241533333334 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17626.5549
XGBoost  (3-fold Max): 18279.8297 	 4.431s
LightGBM (3-fold Avg): 16178.0749
LightGBM (3-fold Max): 16462.5548 	 4.321s
CatBoost (3-fold Avg): 14616.6845
CatBoost (3-fold Max): 14924.7103 	 14.407s


[32m[I 2021-09-07 19:23:50,177][0m Trial 12 finished with value: 16140.4381 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17324.6739
XGBoost  (3-fold Max): 17650.7112 	 2.539s
LightGBM (3-fold Avg): 16385.3553
LightGBM (3-fold Max): 16754.6821 	 2.239s
CatBoost (3-fold Avg): 14302.1132
CatBoost (3-fold Max): 14682.2153 	 11.175s


[32m[I 2021-09-07 19:24:07,152][0m Trial 13 finished with value: 16004.047466666669 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17340.9658
XGBoost  (3-fold Max): 17966.5153 	 4.409s
LightGBM (3-fold Avg): 16135.4552
LightGBM (3-fold Max): 16604.9433 	 4.219s
CatBoost (3-fold Avg): 14744.4465
CatBoost (3-fold Max): 15255.4781 	 16.297s


[32m[I 2021-09-07 19:24:33,085][0m Trial 14 finished with value: 16073.6225 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17175.1544
XGBoost  (3-fold Max): 18160.1309 	 4.641s
LightGBM (3-fold Avg): 16522.9466
LightGBM (3-fold Max): 17132.3856 	 4.448s
CatBoost (3-fold Avg): 14432.4575
CatBoost (3-fold Max): 15262.5529 	 12.42s


[32m[I 2021-09-07 19:24:55,612][0m Trial 15 finished with value: 16043.5195 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17293.6474
XGBoost  (3-fold Max): 18339.2861 	 4.162s
LightGBM (3-fold Avg): 16645.442
LightGBM (3-fold Max): 17254.1629 	 3.94s
CatBoost (3-fold Avg): 14515.3506
CatBoost (3-fold Max): 14805.9743 	 13.319s


[32m[I 2021-09-07 19:25:18,057][0m Trial 16 finished with value: 16151.479999999998 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 16903.0544
XGBoost  (3-fold Max): 17545.8152 	 2.138s
LightGBM (3-fold Avg): 16609.0517
LightGBM (3-fold Max): 17131.4914 	 2.088s
CatBoost (3-fold Avg): 14199.4963
CatBoost (3-fold Max): 14918.8101 	 10.948s


[32m[I 2021-09-07 19:25:34,251][0m Trial 17 finished with value: 15903.867466666668 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17793.3409
XGBoost  (3-fold Max): 18446.6145 	 4.372s
LightGBM (3-fold Avg): 16190.6447
LightGBM (3-fold Max): 16685.2492 	 4.23s
CatBoost (3-fold Avg): 14621.0766
CatBoost (3-fold Max): 15285.3694 	 12.592s


[32m[I 2021-09-07 19:25:56,455][0m Trial 18 finished with value: 16201.6874 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17308.1559
XGBoost  (3-fold Max): 17965.4481 	 2.717s
LightGBM (3-fold Avg): 16521.4743
LightGBM (3-fold Max): 16953.2851 	 2.304s
CatBoost (3-fold Avg): 14192.0821
CatBoost (3-fold Max): 14613.0434 	 12.765s


[32m[I 2021-09-07 19:26:15,254][0m Trial 19 finished with value: 16007.237433333334 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17436.0585
XGBoost  (3-fold Max): 17701.7344 	 2.389s
LightGBM (3-fold Avg): 16560.8989
LightGBM (3-fold Max): 17080.6774 	 2.124s
CatBoost (3-fold Avg): 14336.9254
CatBoost (3-fold Max): 15127.1264 	 11.67s


[32m[I 2021-09-07 19:26:32,458][0m Trial 20 finished with value: 16111.294266666666 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 17204.5251
XGBoost  (3-fold Max): 18187.4663 	 2.209s
LightGBM (3-fold Avg): 16780.85
LightGBM (3-fold Max): 17176.2614 	 1.938s
CatBoost (3-fold Avg): 14606.401
CatBoost (3-fold Max): 15139.3525 	 10.8s


[32m[I 2021-09-07 19:26:48,418][0m Trial 21 finished with value: 16197.258699999998 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 6 with value: 15881.147233333331.[0m


XGBoost  (3-fold Avg): 16696.2021
XGBoost  (3-fold Max): 17852.4777 	 4.498s
LightGBM (3-fold Avg): 16550.6314
LightGBM (3-fold Max): 17132.4875 	 4.313s
CatBoost (3-fold Avg): 14228.0493
CatBoost (3-fold Max): 14704.6873 	 13.779s


[32m[I 2021-09-07 19:27:12,021][0m Trial 22 finished with value: 15824.96093333333 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 22 with value: 15824.96093333333.[0m


XGBoost  (3-fold Avg): 17123.518
XGBoost  (3-fold Max): 17770.2867 	 4.816s
LightGBM (3-fold Avg): 16038.2068
LightGBM (3-fold Max): 16608.4459 	 4.605s
CatBoost (3-fold Avg): 14311.3116
CatBoost (3-fold Max): 14800.0146 	 11.535s


[32m[I 2021-09-07 19:27:33,996][0m Trial 23 finished with value: 15824.345466666666 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 23 with value: 15824.345466666666.[0m


XGBoost  (3-fold Avg): 16844.5129
XGBoost  (3-fold Max): 17989.9744 	 4.226s
LightGBM (3-fold Avg): 16267.5819
LightGBM (3-fold Max): 16975.1168 	 3.94s
CatBoost (3-fold Avg): 14573.0813
CatBoost (3-fold Max): 14977.2447 	 15.836s


[32m[I 2021-09-07 19:27:59,022][0m Trial 24 finished with value: 15895.0587 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 23 with value: 15824.345466666666.[0m


XGBoost  (3-fold Avg): 17457.4707
XGBoost  (3-fold Max): 18292.309 	 2.021s
LightGBM (3-fold Avg): 16597.4962
LightGBM (3-fold Max): 17185.1397 	 1.832s
CatBoost (3-fold Avg): 14639.1022
CatBoost (3-fold Max): 14970.7286 	 10.61s


[32m[I 2021-09-07 19:28:14,506][0m Trial 25 finished with value: 16231.356366666667 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 23 with value: 15824.345466666666.[0m


XGBoost  (3-fold Avg): 16773.0617
XGBoost  (3-fold Max): 17507.9376 	 2.509s
LightGBM (3-fold Avg): 16249.3667
LightGBM (3-fold Max): 16743.4692 	 2.251s
CatBoost (3-fold Avg): 14308.1904
CatBoost (3-fold Max): 14682.2043 	 8.361s


[32m[I 2021-09-07 19:28:28,646][0m Trial 26 finished with value: 15776.872933333332 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17348.8617
XGBoost  (3-fold Max): 18004.5614 	 4.858s
LightGBM (3-fold Avg): 16729.4079
LightGBM (3-fold Max): 17390.8261 	 4.387s
CatBoost (3-fold Avg): 14427.1479
CatBoost (3-fold Max): 14972.6141 	 18.163s


[32m[I 2021-09-07 19:28:57,073][0m Trial 27 finished with value: 16168.472499999998 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17770.3514
XGBoost  (3-fold Max): 18389.5722 	 2.008s
LightGBM (3-fold Avg): 16577.4662
LightGBM (3-fold Max): 17139.9331 	 1.629s
CatBoost (3-fold Avg): 15150.8127
CatBoost (3-fold Max): 15669.522 	 11.176s


[32m[I 2021-09-07 19:29:12,899][0m Trial 28 finished with value: 16499.543433333332 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17349.6126
XGBoost  (3-fold Max): 17585.8226 	 4.456s
LightGBM (3-fold Avg): 16279.2098
LightGBM (3-fold Max): 16949.6037 	 4.439s
CatBoost (3-fold Avg): 14783.8951
CatBoost (3-fold Max): 15222.7436 	 17.018s


[32m[I 2021-09-07 19:29:39,835][0m Trial 29 finished with value: 16137.572500000002 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17094.9463
XGBoost  (3-fold Max): 18207.4611 	 2.425s
LightGBM (3-fold Avg): 16112.5476
LightGBM (3-fold Max): 16698.8386 	 2.316s
CatBoost (3-fold Avg): 14291.6821
CatBoost (3-fold Max): 14759.7798 	 8.783s


[32m[I 2021-09-07 19:29:54,381][0m Trial 30 finished with value: 15833.058666666666 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 16890.7517
XGBoost  (3-fold Max): 17606.9722 	 4.526s
LightGBM (3-fold Avg): 16360.034
LightGBM (3-fold Max): 16967.7201 	 4.338s
CatBoost (3-fold Avg): 14345.5187
CatBoost (3-fold Max): 14784.6784 	 14.123s


[32m[I 2021-09-07 19:30:18,389][0m Trial 31 finished with value: 15865.4348 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17056.0035
XGBoost  (3-fold Max): 18095.5161 	 2.142s
LightGBM (3-fold Avg): 16349.294
LightGBM (3-fold Max): 16993.6694 	 1.815s
CatBoost (3-fold Avg): 14606.2999
CatBoost (3-fold Max): 15290.6571 	 8.968s


[32m[I 2021-09-07 19:30:32,337][0m Trial 32 finished with value: 16003.8658 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17308.7527
XGBoost  (3-fold Max): 18196.8029 	 4.576s
LightGBM (3-fold Avg): 16307.9684
LightGBM (3-fold Max): 16936.1134 	 4.315s
CatBoost (3-fold Avg): 14285.5229
CatBoost (3-fold Max): 15193.0086 	 11.826s


[32m[I 2021-09-07 19:30:54,073][0m Trial 33 finished with value: 15967.41466666667 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17988.7028
XGBoost  (3-fold Max): 18843.7464 	 3.785s
LightGBM (3-fold Avg): 16711.3912
LightGBM (3-fold Max): 17465.1916 	 3.663s
CatBoost (3-fold Avg): 15136.9236
CatBoost (3-fold Max): 15734.5019 	 13.924s


[32m[I 2021-09-07 19:31:16,456][0m Trial 34 finished with value: 16612.3392 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17058.4484
XGBoost  (3-fold Max): 17900.641 	 2.806s
LightGBM (3-fold Avg): 16339.3749
LightGBM (3-fold Max): 16911.3193 	 2.335s
CatBoost (3-fold Avg): 14125.5255
CatBoost (3-fold Max): 14583.9688 	 10.643s


[32m[I 2021-09-07 19:31:33,260][0m Trial 35 finished with value: 15841.11626666667 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 18215.8929
XGBoost  (3-fold Max): 18958.6211 	 4.052s
LightGBM (3-fold Avg): 16506.4281
LightGBM (3-fold Max): 17132.1997 	 3.68s
CatBoost (3-fold Avg): 15055.7246
CatBoost (3-fold Max): 15682.681 	 11.349s


[32m[I 2021-09-07 19:31:53,365][0m Trial 36 finished with value: 16592.681866666666 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17173.8727
XGBoost  (3-fold Max): 18136.1141 	 1.895s
LightGBM (3-fold Avg): 16096.8338
LightGBM (3-fold Max): 16434.0747 	 1.785s
CatBoost (3-fold Avg): 14811.3066
CatBoost (3-fold Max): 15259.5257 	 11.726s


[32m[I 2021-09-07 19:32:09,792][0m Trial 37 finished with value: 16027.337699999998 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 16928.2419
XGBoost  (3-fold Max): 18177.5403 	 2.399s
LightGBM (3-fold Avg): 16370.0481
LightGBM (3-fold Max): 16840.1842 	 2.248s
CatBoost (3-fold Avg): 14329.538
CatBoost (3-fold Max): 14984.4568 	 10.362s


[32m[I 2021-09-07 19:32:25,819][0m Trial 38 finished with value: 15875.942666666668 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17338.3016
XGBoost  (3-fold Max): 17895.2025 	 4.805s
LightGBM (3-fold Avg): 16544.3854
LightGBM (3-fold Max): 17051.9374 	 4.627s
CatBoost (3-fold Avg): 14051.2608
CatBoost (3-fold Max): 14319.1562 	 21.113s


[32m[I 2021-09-07 19:32:57,372][0m Trial 39 finished with value: 15977.982599999998 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17365.2033
XGBoost  (3-fold Max): 17642.3651 	 4.571s
LightGBM (3-fold Avg): 16579.1509
LightGBM (3-fold Max): 17061.3409 	 4.246s
CatBoost (3-fold Avg): 14198.7344
CatBoost (3-fold Max): 14714.9294 	 13.021s


[32m[I 2021-09-07 19:33:20,224][0m Trial 40 finished with value: 16047.6962 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17071.0656
XGBoost  (3-fold Max): 18199.5171 	 4.499s
LightGBM (3-fold Avg): 16167.1582
LightGBM (3-fold Max): 16852.5731 	 4.333s
CatBoost (3-fold Avg): 14849.2623
CatBoost (3-fold Max): 15546.6583 	 11.118s


[32m[I 2021-09-07 19:33:41,185][0m Trial 41 finished with value: 16029.162033333334 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17273.0578
XGBoost  (3-fold Max): 18220.548 	 1.933s
LightGBM (3-fold Avg): 16383.8305
LightGBM (3-fold Max): 17170.7174 	 1.853s
CatBoost (3-fold Avg): 14419.6065
CatBoost (3-fold Max): 14997.785 	 9.521s


[32m[I 2021-09-07 19:33:55,510][0m Trial 42 finished with value: 16025.498266666667 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17662.9724
XGBoost  (3-fold Max): 18090.8123 	 4.698s
LightGBM (3-fold Avg): 16378.6119
LightGBM (3-fold Max): 16845.7532 	 4.535s
CatBoost (3-fold Avg): 14401.8919
CatBoost (3-fold Max): 14786.9886 	 13.353s


[32m[I 2021-09-07 19:34:19,109][0m Trial 43 finished with value: 16147.825400000002 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 16725.8029
XGBoost  (3-fold Max): 17769.7487 	 2.345s
LightGBM (3-fold Avg): 16311.5051
LightGBM (3-fold Max): 16928.9896 	 2.038s
CatBoost (3-fold Avg): 14320.0606
CatBoost (3-fold Max): 14981.3695 	 9.991s


[32m[I 2021-09-07 19:34:34,491][0m Trial 44 finished with value: 15785.789533333334 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17318.4996
XGBoost  (3-fold Max): 18040.65 	 2.462s
LightGBM (3-fold Avg): 16539.976
LightGBM (3-fold Max): 16997.2477 	 2.29s
CatBoost (3-fold Avg): 14138.7162
CatBoost (3-fold Max): 14579.8257 	 12.043s


[32m[I 2021-09-07 19:34:52,309][0m Trial 45 finished with value: 15999.063933333333 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17028.9274
XGBoost  (3-fold Max): 17381.2864 	 2.291s
LightGBM (3-fold Avg): 16358.4543
LightGBM (3-fold Max): 16795.0215 	 2.024s
CatBoost (3-fold Avg): 14405.5069
CatBoost (3-fold Max): 14922.0275 	 10.532s


[32m[I 2021-09-07 19:35:08,169][0m Trial 46 finished with value: 15930.962866666667 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17314.1981
XGBoost  (3-fold Max): 17569.3577 	 4.782s
LightGBM (3-fold Avg): 16277.4563
LightGBM (3-fold Max): 16564.0817 	 4.589s
CatBoost (3-fold Avg): 14450.2334
CatBoost (3-fold Max): 14780.3352 	 15.704s


[32m[I 2021-09-07 19:35:34,267][0m Trial 47 finished with value: 16013.962599999999 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17946.6165
XGBoost  (3-fold Max): 18877.2923 	 2.267s
LightGBM (3-fold Avg): 16468.4455
LightGBM (3-fold Max): 16666.6208 	 2.012s
CatBoost (3-fold Avg): 14826.5159
CatBoost (3-fold Max): 15173.4145 	 10.922s


[32m[I 2021-09-07 19:35:50,494][0m Trial 48 finished with value: 16413.8593 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 16878.9873
XGBoost  (3-fold Max): 17480.7286 	 4.417s
LightGBM (3-fold Avg): 16349.9693
LightGBM (3-fold Max): 16644.9701 	 4.104s
CatBoost (3-fold Avg): 14201.8839
CatBoost (3-fold Max): 14623.2064 	 12.022s


[32m[I 2021-09-07 19:36:12,054][0m Trial 49 finished with value: 15810.28016666667 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 17825.7893
XGBoost  (3-fold Max): 18494.1449 	 4.555s
LightGBM (3-fold Avg): 16111.3156
LightGBM (3-fold Max): 16751.3647 	 4.44s
CatBoost (3-fold Avg): 14645.8743
CatBoost (3-fold Max): 15353.5991 	 13.13s


[32m[I 2021-09-07 19:36:35,195][0m Trial 50 finished with value: 16194.3264 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 26 with value: 15776.872933333332.[0m


XGBoost  (3-fold Avg): 16879.9692
XGBoost  (3-fold Max): 17571.6641 	 5.374s
LightGBM (3-fold Avg): 16232.4525
LightGBM (3-fold Max): 16822.8103 	 4.704s
CatBoost (3-fold Avg): 14138.275
CatBoost (3-fold Max): 14689.2325 	 13.582s


[32m[I 2021-09-07 19:36:59,865][0m Trial 51 finished with value: 15750.232233333334 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 51 with value: 15750.232233333334.[0m


XGBoost  (3-fold Avg): 17382.1029
XGBoost  (3-fold Max): 18017.0621 	 4.464s
LightGBM (3-fold Avg): 16677.7533
LightGBM (3-fold Max): 17187.8399 	 4.161s
CatBoost (3-fold Avg): 14313.4535
CatBoost (3-fold Max): 14761.1792 	 12.774s


[32m[I 2021-09-07 19:37:22,285][0m Trial 52 finished with value: 16124.436566666665 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 51 with value: 15750.232233333334.[0m


XGBoost  (3-fold Avg): 17506.8896
XGBoost  (3-fold Max): 17984.5947 	 4.933s
LightGBM (3-fold Avg): 16435.3372
LightGBM (3-fold Max): 16949.1936 	 4.698s
CatBoost (3-fold Avg): 14191.8232
CatBoost (3-fold Max): 14897.5492 	 15.11s


[32m[I 2021-09-07 19:37:48,042][0m Trial 53 finished with value: 16044.683333333334 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 51 with value: 15750.232233333334.[0m


XGBoost  (3-fold Avg): 17631.3335
XGBoost  (3-fold Max): 18363.6619 	 4.094s
LightGBM (3-fold Avg): 16411.7568
LightGBM (3-fold Max): 17028.7741 	 3.883s
CatBoost (3-fold Avg): 14439.0694
CatBoost (3-fold Max): 14918.4347 	 10.669s


[32m[I 2021-09-07 19:38:07,704][0m Trial 54 finished with value: 16160.719899999998 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 51 with value: 15750.232233333334.[0m


XGBoost  (3-fold Avg): 17320.6705
XGBoost  (3-fold Max): 17994.6646 	 4.868s
LightGBM (3-fold Avg): 16670.0052
LightGBM (3-fold Max): 17667.3914 	 4.384s
CatBoost (3-fold Avg): 14389.8017
CatBoost (3-fold Max): 14866.6296 	 20.867s


[32m[I 2021-09-07 19:38:38,839][0m Trial 55 finished with value: 16126.8258 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 51 with value: 15750.232233333334.[0m


XGBoost  (3-fold Avg): 17022.0785
XGBoost  (3-fold Max): 18043.177 	 4.916s
LightGBM (3-fold Avg): 16254.2192
LightGBM (3-fold Max): 16702.057 	 4.514s
CatBoost (3-fold Avg): 14309.1624
CatBoost (3-fold Max): 14782.428 	 11.197s


[32m[I 2021-09-07 19:39:00,475][0m Trial 56 finished with value: 15861.820033333332 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 51 with value: 15750.232233333334.[0m


XGBoost  (3-fold Avg): 17438.4748
XGBoost  (3-fold Max): 17639.327 	 2.136s
LightGBM (3-fold Avg): 16341.4648
LightGBM (3-fold Max): 16934.9977 	 2.041s
CatBoost (3-fold Avg): 14525.3257
CatBoost (3-fold Max): 15037.321 	 13.019s


[32m[I 2021-09-07 19:39:18,692][0m Trial 57 finished with value: 16101.7551 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 51 with value: 15750.232233333334.[0m


XGBoost  (3-fold Avg): 17673.338
XGBoost  (3-fold Max): 18377.4757 	 2.136s
LightGBM (3-fold Avg): 16207.1803
LightGBM (3-fold Max): 16902.8738 	 2.124s
CatBoost (3-fold Avg): 14702.2727
CatBoost (3-fold Max): 15527.0414 	 10.302s


[32m[I 2021-09-07 19:39:34,269][0m Trial 58 finished with value: 16194.263666666666 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 51 with value: 15750.232233333334.[0m


XGBoost  (3-fold Avg): 16711.598
XGBoost  (3-fold Max): 17494.9791 	 4.837s
LightGBM (3-fold Avg): 16179.6177
LightGBM (3-fold Max): 16877.8646 	 4.73s
CatBoost (3-fold Avg): 14246.5644
CatBoost (3-fold Max): 14985.5779 	 12.046s


[32m[I 2021-09-07 19:39:56,902][0m Trial 59 finished with value: 15712.593366666668 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 16652.362
XGBoost  (3-fold Max): 17067.7064 	 5.033s
LightGBM (3-fold Avg): 16646.9589
LightGBM (3-fold Max): 17341.8605 	 4.668s
CatBoost (3-fold Avg): 14243.9112
CatBoost (3-fold Max): 14669.2189 	 15.634s


[32m[I 2021-09-07 19:40:23,254][0m Trial 60 finished with value: 15847.744033333336 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17740.5268
XGBoost  (3-fold Max): 18746.192 	 3.839s
LightGBM (3-fold Avg): 16658.6334
LightGBM (3-fold Max): 17485.6168 	 3.711s
CatBoost (3-fold Avg): 14637.1969
CatBoost (3-fold Max): 15140.7877 	 14.78s


[32m[I 2021-09-07 19:40:46,601][0m Trial 61 finished with value: 16345.452366666666 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17535.3169
XGBoost  (3-fold Max): 18092.1251 	 2.166s
LightGBM (3-fold Avg): 16657.2253
LightGBM (3-fold Max): 17131.5159 	 1.924s
CatBoost (3-fold Avg): 14478.1503
CatBoost (3-fold Max): 14892.9031 	 10.47s


[32m[I 2021-09-07 19:41:02,181][0m Trial 62 finished with value: 16223.564166666665 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17125.5865
XGBoost  (3-fold Max): 17272.739 	 4.593s
LightGBM (3-fold Avg): 16239.5547
LightGBM (3-fold Max): 16640.3209 	 4.539s
CatBoost (3-fold Avg): 14181.4355
CatBoost (3-fold Max): 14424.0443 	 16.884s


[32m[I 2021-09-07 19:41:29,223][0m Trial 63 finished with value: 15848.8589 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17488.5638
XGBoost  (3-fold Max): 18539.7984 	 4.211s
LightGBM (3-fold Avg): 16593.5197
LightGBM (3-fold Max): 17055.5459 	 4.061s
CatBoost (3-fold Avg): 14384.9756
CatBoost (3-fold Max): 14854.7518 	 13.782s


[32m[I 2021-09-07 19:41:52,304][0m Trial 64 finished with value: 16155.686366666667 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17210.553
XGBoost  (3-fold Max): 18308.6802 	 2.399s
LightGBM (3-fold Avg): 16216.1029
LightGBM (3-fold Max): 16691.606 	 2.18s
CatBoost (3-fold Avg): 14258.0211
CatBoost (3-fold Max): 14447.6137 	 13.442s


[32m[I 2021-09-07 19:42:11,342][0m Trial 65 finished with value: 15894.892333333331 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 16943.4798
XGBoost  (3-fold Max): 17673.5783 	 2.56s
LightGBM (3-fold Avg): 16245.5415
LightGBM (3-fold Max): 16694.7707 	 2.321s
CatBoost (3-fold Avg): 14028.0062
CatBoost (3-fold Max): 14597.1475 	 10.341s


[32m[I 2021-09-07 19:42:27,579][0m Trial 66 finished with value: 15739.009166666665 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17335.1191
XGBoost  (3-fold Max): 17898.417 	 4.033s
LightGBM (3-fold Avg): 16552.9468
LightGBM (3-fold Max): 17399.6621 	 3.9s
CatBoost (3-fold Avg): 14414.6906
CatBoost (3-fold Max): 14919.5979 	 11.456s


[32m[I 2021-09-07 19:42:47,981][0m Trial 67 finished with value: 16100.918833333335 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17080.6032
XGBoost  (3-fold Max): 18591.4403 	 2.54s
LightGBM (3-fold Avg): 16369.0712
LightGBM (3-fold Max): 16823.3003 	 2.253s
CatBoost (3-fold Avg): 14138.5484
CatBoost (3-fold Max): 14704.8089 	 10.492s


[32m[I 2021-09-07 19:43:04,285][0m Trial 68 finished with value: 15862.740933333334 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17234.9114
XGBoost  (3-fold Max): 18047.4469 	 4.456s
LightGBM (3-fold Avg): 16458.3799
LightGBM (3-fold Max): 17282.0778 	 4.041s
CatBoost (3-fold Avg): 14340.6645
CatBoost (3-fold Max): 15070.8143 	 11.945s


[32m[I 2021-09-07 19:43:25,743][0m Trial 69 finished with value: 16011.318599999999 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17710.0494
XGBoost  (3-fold Max): 18857.7952 	 4.344s
LightGBM (3-fold Avg): 16465.6986
LightGBM (3-fold Max): 17062.4281 	 4.012s
CatBoost (3-fold Avg): 15037.2321
CatBoost (3-fold Max): 15401.4144 	 10.498s


[32m[I 2021-09-07 19:43:45,618][0m Trial 70 finished with value: 16404.3267 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 17400.3329
XGBoost  (3-fold Max): 18616.4546 	 2.27s
LightGBM (3-fold Avg): 16188.4166
LightGBM (3-fold Max): 16827.2462 	 2.03s
CatBoost (3-fold Avg): 14410.6344
CatBoost (3-fold Max): 15016.2067 	 10.872s


[32m[I 2021-09-07 19:44:01,799][0m Trial 71 finished with value: 15999.794633333337 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 59 with value: 15712.593366666668.[0m


XGBoost  (3-fold Avg): 16928.963
XGBoost  (3-fold Max): 17788.1364 	 2.441s
LightGBM (3-fold Avg): 16134.8192
LightGBM (3-fold Max): 16604.6019 	 2.331s
CatBoost (3-fold Avg): 14062.0705
CatBoost (3-fold Max): 14534.122 	 8.787s


[32m[I 2021-09-07 19:44:16,376][0m Trial 72 finished with value: 15708.617566666668 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17171.481
XGBoost  (3-fold Max): 17738.5653 	 4.301s
LightGBM (3-fold Avg): 16397.1936
LightGBM (3-fold Max): 16701.7372 	 4.114s
CatBoost (3-fold Avg): 14227.4542
CatBoost (3-fold Max): 14636.9881 	 14.105s


[32m[I 2021-09-07 19:44:39,919][0m Trial 73 finished with value: 15932.042933333332 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16810.3026
XGBoost  (3-fold Max): 17332.9741 	 2.262s
LightGBM (3-fold Avg): 16407.9763
LightGBM (3-fold Max): 16866.3034 	 2.018s
CatBoost (3-fold Avg): 14260.1466
CatBoost (3-fold Max): 14856.8532 	 10.841s


[32m[I 2021-09-07 19:44:56,052][0m Trial 74 finished with value: 15826.141833333333 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16832.385
XGBoost  (3-fold Max): 17831.9736 	 2.126s
LightGBM (3-fold Avg): 16386.0991
LightGBM (3-fold Max): 16832.9148 	 2.033s
CatBoost (3-fold Avg): 14129.5616
CatBoost (3-fold Max): 14747.5907 	 9.833s


[32m[I 2021-09-07 19:45:11,061][0m Trial 75 finished with value: 15782.681900000001 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17368.5527
XGBoost  (3-fold Max): 18057.6611 	 2.119s
LightGBM (3-fold Avg): 16627.5963
LightGBM (3-fold Max): 17130.4014 	 1.853s
CatBoost (3-fold Avg): 14535.4082
CatBoost (3-fold Max): 14884.9241 	 9.799s


[32m[I 2021-09-07 19:45:25,857][0m Trial 76 finished with value: 16177.185733333334 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16944.4337
XGBoost  (3-fold Max): 17478.8711 	 4.837s
LightGBM (3-fold Avg): 16182.1387
LightGBM (3-fold Max): 16686.1526 	 4.709s
CatBoost (3-fold Avg): 14098.8127
CatBoost (3-fold Max): 14642.8118 	 12.217s


[32m[I 2021-09-07 19:45:48,640][0m Trial 77 finished with value: 15741.795033333336 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17576.2621
XGBoost  (3-fold Max): 17824.5422 	 4.541s
LightGBM (3-fold Avg): 16313.8333
LightGBM (3-fold Max): 16949.6037 	 4.367s
CatBoost (3-fold Avg): 14571.3374
CatBoost (3-fold Max): 15014.0315 	 16.536s


[32m[I 2021-09-07 19:46:15,115][0m Trial 78 finished with value: 16153.810933333332 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17159.1246
XGBoost  (3-fold Max): 17901.6871 	 4.681s
LightGBM (3-fold Avg): 16604.586
LightGBM (3-fold Max): 17193.8065 	 4.62s
CatBoost (3-fold Avg): 14020.5052
CatBoost (3-fold Max): 14576.1987 	 18.986s


[32m[I 2021-09-07 19:46:44,427][0m Trial 79 finished with value: 15928.071933333333 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17530.9741
XGBoost  (3-fold Max): 18251.2869 	 3.952s
LightGBM (3-fold Avg): 16035.9742
LightGBM (3-fold Max): 16355.0132 	 3.868s
CatBoost (3-fold Avg): 14744.2609
CatBoost (3-fold Max): 15200.3591 	 13.852s


[32m[I 2021-09-07 19:47:07,122][0m Trial 80 finished with value: 16103.736400000002 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17075.0567
XGBoost  (3-fold Max): 17469.0162 	 2.442s
LightGBM (3-fold Avg): 16447.8901
LightGBM (3-fold Max): 16784.9124 	 2.251s
CatBoost (3-fold Avg): 14279.2482
CatBoost (3-fold Max): 14633.2249 	 10.261s


[32m[I 2021-09-07 19:47:23,097][0m Trial 81 finished with value: 15934.065000000002 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17934.8906
XGBoost  (3-fold Max): 19299.0719 	 2.072s
LightGBM (3-fold Avg): 16264.9304
LightGBM (3-fold Max): 17022.9423 	 1.774s
CatBoost (3-fold Avg): 14909.7338
CatBoost (3-fold Max): 15527.7372 	 7.71s


[32m[I 2021-09-07 19:47:35,672][0m Trial 82 finished with value: 16369.8516 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17359.2306
XGBoost  (3-fold Max): 17669.4883 	 2.137s
LightGBM (3-fold Avg): 16693.1629
LightGBM (3-fold Max): 17486.3793 	 1.92s
CatBoost (3-fold Avg): 14306.4255
CatBoost (3-fold Max): 14849.9813 	 11.747s


[32m[I 2021-09-07 19:47:52,496][0m Trial 83 finished with value: 16119.606333333331 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17054.2799
XGBoost  (3-fold Max): 17930.9265 	 2.089s
LightGBM (3-fold Avg): 16417.2865
LightGBM (3-fold Max): 16754.2994 	 1.973s
CatBoost (3-fold Avg): 14349.1329
CatBoost (3-fold Max): 14767.2994 	 12.502s


[32m[I 2021-09-07 19:48:10,088][0m Trial 84 finished with value: 15940.233099999998 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16897.2044
XGBoost  (3-fold Max): 17443.7883 	 4.731s
LightGBM (3-fold Avg): 16342.6114
LightGBM (3-fold Max): 17016.5727 	 4.492s
CatBoost (3-fold Avg): 14368.4664
CatBoost (3-fold Max): 14811.2953 	 14.308s


[32m[I 2021-09-07 19:48:34,637][0m Trial 85 finished with value: 15869.427399999999 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16916.3419
XGBoost  (3-fold Max): 17908.6549 	 2.197s
LightGBM (3-fold Avg): 16433.6678
LightGBM (3-fold Max): 17112.2073 	 2.073s
CatBoost (3-fold Avg): 14223.7417
CatBoost (3-fold Max): 14808.4507 	 9.929s


[32m[I 2021-09-07 19:48:49,854][0m Trial 86 finished with value: 15857.91713333333 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17456.1308
XGBoost  (3-fold Max): 18083.8381 	 2.129s
LightGBM (3-fold Avg): 16587.17
LightGBM (3-fold Max): 16962.1198 	 1.854s
CatBoost (3-fold Avg): 14347.5895
CatBoost (3-fold Max): 14794.2965 	 10.714s


[32m[I 2021-09-07 19:49:05,565][0m Trial 87 finished with value: 16130.296766666666 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17508.8002
XGBoost  (3-fold Max): 18435.034 	 4.34s
LightGBM (3-fold Avg): 16556.4326
LightGBM (3-fold Max): 16953.3585 	 4.141s
CatBoost (3-fold Avg): 14419.8642
CatBoost (3-fold Max): 14875.1703 	 12.456s


[32m[I 2021-09-07 19:49:27,516][0m Trial 88 finished with value: 16161.698999999999 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17535.8035
XGBoost  (3-fold Max): 17770.6328 	 2.103s
LightGBM (3-fold Avg): 16303.4528
LightGBM (3-fold Max): 16940.4123 	 2.038s
CatBoost (3-fold Avg): 14597.5723
CatBoost (3-fold Max): 15127.9664 	 14.097s


[32m[I 2021-09-07 19:49:46,771][0m Trial 89 finished with value: 16145.609533333334 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17200.1828
XGBoost  (3-fold Max): 17864.3198 	 1.786s
LightGBM (3-fold Avg): 16542.7044
LightGBM (3-fold Max): 17393.1229 	 1.626s
CatBoost (3-fold Avg): 14545.244
CatBoost (3-fold Max): 15090.6301 	 10.777s


[32m[I 2021-09-07 19:50:01,980][0m Trial 90 finished with value: 16096.043733333332 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17627.3654
XGBoost  (3-fold Max): 18175.3109 	 2.052s
LightGBM (3-fold Avg): 16191.3244
LightGBM (3-fold Max): 16722.4206 	 2.029s
CatBoost (3-fold Avg): 14694.349
CatBoost (3-fold Max): 15056.2126 	 9.194s


[32m[I 2021-09-07 19:50:16,274][0m Trial 91 finished with value: 16171.012933333333 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17818.5785
XGBoost  (3-fold Max): 18420.5786 	 1.892s
LightGBM (3-fold Avg): 16481.3275
LightGBM (3-fold Max): 17172.4876 	 1.544s
CatBoost (3-fold Avg): 15152.8277
CatBoost (3-fold Max): 16049.8435 	 10.495s


[32m[I 2021-09-07 19:50:31,227][0m Trial 92 finished with value: 16484.244566666668 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16948.3154
XGBoost  (3-fold Max): 17668.3076 	 2.677s
LightGBM (3-fold Avg): 16120.2164
LightGBM (3-fold Max): 16650.5071 	 2.351s
CatBoost (3-fold Avg): 14121.2045
CatBoost (3-fold Max): 14707.7824 	 10.622s


[32m[I 2021-09-07 19:50:47,896][0m Trial 93 finished with value: 15729.9121 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17733.9145
XGBoost  (3-fold Max): 18469.4847 	 2.162s
LightGBM (3-fold Avg): 16060.5895
LightGBM (3-fold Max): 16565.9142 	 2.131s
CatBoost (3-fold Avg): 14632.0682
CatBoost (3-fold Max): 15403.6722 	 9.895s


[32m[I 2021-09-07 19:51:03,104][0m Trial 94 finished with value: 16142.190733333335 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17349.1182
XGBoost  (3-fold Max): 17597.0513 	 2.493s
LightGBM (3-fold Avg): 16557.9348
LightGBM (3-fold Max): 17084.3455 	 1.858s
CatBoost (3-fold Avg): 14332.5403
CatBoost (3-fold Max): 14737.3373 	 10.548s


[32m[I 2021-09-07 19:51:19,022][0m Trial 95 finished with value: 16079.864433333334 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17417.9476
XGBoost  (3-fold Max): 18195.0552 	 4.294s
LightGBM (3-fold Avg): 16291.0696
LightGBM (3-fold Max): 16626.4596 	 4.105s
CatBoost (3-fold Avg): 14150.8541
CatBoost (3-fold Max): 14641.8787 	 12.767s


[32m[I 2021-09-07 19:51:41,212][0m Trial 96 finished with value: 15953.290433333334 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17500.0948
XGBoost  (3-fold Max): 18275.8696 	 2.017s
LightGBM (3-fold Avg): 16675.5946
LightGBM (3-fold Max): 17219.8985 	 1.857s
CatBoost (3-fold Avg): 14699.5154
CatBoost (3-fold Max): 15139.8092 	 9.761s


[32m[I 2021-09-07 19:51:55,873][0m Trial 97 finished with value: 16291.734933333335 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17902.8628
XGBoost  (3-fold Max): 18883.7932 	 2.213s
LightGBM (3-fold Avg): 16404.7344
LightGBM (3-fold Max): 16685.1865 	 1.974s
CatBoost (3-fold Avg): 14736.1507
CatBoost (3-fold Max): 15103.5523 	 11.614s


[32m[I 2021-09-07 19:52:12,703][0m Trial 98 finished with value: 16347.915966666667 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17570.4149
XGBoost  (3-fold Max): 17865.0856 	 4.617s
LightGBM (3-fold Avg): 16360.2974
LightGBM (3-fold Max): 16826.7624 	 4.217s
CatBoost (3-fold Avg): 14346.9612
CatBoost (3-fold Max): 14818.2268 	 14.541s


[32m[I 2021-09-07 19:52:37,086][0m Trial 99 finished with value: 16092.557833333332 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17482.4809
XGBoost  (3-fold Max): 18212.429 	 4.244s
LightGBM (3-fold Avg): 16387.0639
LightGBM (3-fold Max): 17036.3195 	 3.865s
CatBoost (3-fold Avg): 14874.4772
CatBoost (3-fold Max): 15495.2482 	 13.813s


[32m[I 2021-09-07 19:53:00,023][0m Trial 100 finished with value: 16248.007333333335 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17124.7725
XGBoost  (3-fold Max): 17886.2197 	 2.549s
LightGBM (3-fold Avg): 16378.1735
LightGBM (3-fold Max): 16857.1011 	 2.315s
CatBoost (3-fold Avg): 14153.5017
CatBoost (3-fold Max): 14594.4042 	 11.647s


[32m[I 2021-09-07 19:53:17,556][0m Trial 101 finished with value: 15885.482566666666 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17904.3209
XGBoost  (3-fold Max): 19005.0376 	 1.657s
LightGBM (3-fold Avg): 16662.3771
LightGBM (3-fold Max): 17680.7477 	 1.56s
CatBoost (3-fold Avg): 14713.3128
CatBoost (3-fold Max): 15112.7124 	 9.647s


[32m[I 2021-09-07 19:53:31,443][0m Trial 102 finished with value: 16426.670266666668 and parameters: {'math': False, 'count': False, 'group': True, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17673.5925
XGBoost  (3-fold Max): 18932.0684 	 3.919s
LightGBM (3-fold Avg): 16749.1506
LightGBM (3-fold Max): 17578.4473 	 3.714s
CatBoost (3-fold Avg): 14599.9345
CatBoost (3-fold Max): 14953.3887 	 12.467s


[32m[I 2021-09-07 19:53:52,566][0m Trial 103 finished with value: 16340.892533333332 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17072.4692
XGBoost  (3-fold Max): 17672.5719 	 4.617s
LightGBM (3-fold Avg): 16339.0667
LightGBM (3-fold Max): 17204.5608 	 4.446s
CatBoost (3-fold Avg): 14304.6945
CatBoost (3-fold Max): 15045.8853 	 13.01s


[32m[I 2021-09-07 19:54:15,657][0m Trial 104 finished with value: 15905.410133333333 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17340.0973
XGBoost  (3-fold Max): 17618.9126 	 4.24s
LightGBM (3-fold Avg): 16405.0558
LightGBM (3-fold Max): 16978.3922 	 3.833s
CatBoost (3-fold Avg): 14960.8278
CatBoost (3-fold Max): 15263.1148 	 11.19s


[32m[I 2021-09-07 19:54:35,934][0m Trial 105 finished with value: 16235.326966666666 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17371.4393
XGBoost  (3-fold Max): 18774.8195 	 4.299s
LightGBM (3-fold Avg): 16700.2789
LightGBM (3-fold Max): 17221.5364 	 4.007s
CatBoost (3-fold Avg): 14521.4747
CatBoost (3-fold Max): 14585.7064 	 15.659s


[32m[I 2021-09-07 19:55:00,913][0m Trial 106 finished with value: 16197.730966666668 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 18033.8925
XGBoost  (3-fold Max): 19377.9257 	 1.96s
LightGBM (3-fold Avg): 16182.0684
LightGBM (3-fold Max): 16791.8051 	 1.808s
CatBoost (3-fold Avg): 14654.7057
CatBoost (3-fold Max): 15180.7881 	 11.016s


[32m[I 2021-09-07 19:55:16,710][0m Trial 107 finished with value: 16290.222200000002 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17188.8119
XGBoost  (3-fold Max): 18250.7931 	 4.928s
LightGBM (3-fold Avg): 16269.2014
LightGBM (3-fold Max): 16839.3336 	 4.689s
CatBoost (3-fold Avg): 14237.8341
CatBoost (3-fold Max): 14877.4647 	 13.443s


[32m[I 2021-09-07 19:55:40,791][0m Trial 108 finished with value: 15898.6158 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17883.0034
XGBoost  (3-fold Max): 19346.9066 	 4.122s
LightGBM (3-fold Avg): 16171.8578
LightGBM (3-fold Max): 16770.5064 	 4.0s
CatBoost (3-fold Avg): 14625.8974
CatBoost (3-fold Max): 15279.5412 	 13.413s


[32m[I 2021-09-07 19:56:03,339][0m Trial 109 finished with value: 16226.919533333334 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17354.3313
XGBoost  (3-fold Max): 17982.9869 	 2.152s
LightGBM (3-fold Avg): 16209.6819
LightGBM (3-fold Max): 16709.7738 	 2.015s
CatBoost (3-fold Avg): 14733.7256
CatBoost (3-fold Max): 15396.244 	 9.69s


[32m[I 2021-09-07 19:56:18,209][0m Trial 110 finished with value: 16099.246266666667 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17587.6517
XGBoost  (3-fold Max): 18774.4732 	 1.83s
LightGBM (3-fold Avg): 16687.6169
LightGBM (3-fold Max): 17603.5464 	 1.572s
CatBoost (3-fold Avg): 14803.0809
CatBoost (3-fold Max): 15240.1072 	 9.555s


[32m[I 2021-09-07 19:56:32,186][0m Trial 111 finished with value: 16359.449833333332 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17060.2245
XGBoost  (3-fold Max): 17822.6785 	 4.496s
LightGBM (3-fold Avg): 16514.0729
LightGBM (3-fold Max): 17097.886 	 4.305s
CatBoost (3-fold Avg): 14357.4897
CatBoost (3-fold Max): 14913.7573 	 11.625s


[32m[I 2021-09-07 19:56:53,625][0m Trial 112 finished with value: 15977.262366666664 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17238.8075
XGBoost  (3-fold Max): 17584.7709 	 1.852s
LightGBM (3-fold Avg): 16471.4805
LightGBM (3-fold Max): 17043.6677 	 1.605s
CatBoost (3-fold Avg): 14850.5287
CatBoost (3-fold Max): 15603.5834 	 11.483s


[32m[I 2021-09-07 19:57:09,588][0m Trial 113 finished with value: 16186.938900000001 and parameters: {'math': True, 'count': False, 'group': False, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16514.6291
XGBoost  (3-fold Max): 17376.607 	 4.633s
LightGBM (3-fold Avg): 16379.5343
LightGBM (3-fold Max): 17039.2585 	 4.257s
CatBoost (3-fold Avg): 14412.8662
CatBoost (3-fold Max): 14912.5064 	 11.216s


[32m[I 2021-09-07 19:57:30,712][0m Trial 114 finished with value: 15769.009866666665 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17604.0204
XGBoost  (3-fold Max): 17668.1628 	 2.285s
LightGBM (3-fold Avg): 16615.9779
LightGBM (3-fold Max): 17568.7743 	 2.081s
CatBoost (3-fold Avg): 14192.2149
CatBoost (3-fold Max): 14680.7106 	 12.231s


[32m[I 2021-09-07 19:57:48,324][0m Trial 115 finished with value: 16137.404400000001 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': True, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17469.7368
XGBoost  (3-fold Max): 17951.9736 	 4.738s
LightGBM (3-fold Avg): 16152.6052
LightGBM (3-fold Max): 16762.1861 	 4.274s
CatBoost (3-fold Avg): 14354.2666
CatBoost (3-fold Max): 14909.4569 	 13.492s


[32m[I 2021-09-07 19:58:11,855][0m Trial 116 finished with value: 15992.202866666667 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17660.7968
XGBoost  (3-fold Max): 18394.9077 	 2.35s
LightGBM (3-fold Avg): 16195.3947
LightGBM (3-fold Max): 16829.5358 	 2.081s
CatBoost (3-fold Avg): 14597.918
CatBoost (3-fold Max): 15235.7651 	 10.567s


[32m[I 2021-09-07 19:58:27,869][0m Trial 117 finished with value: 16151.369833333332 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17253.264
XGBoost  (3-fold Max): 17716.7425 	 4.357s
LightGBM (3-fold Avg): 16437.861
LightGBM (3-fold Max): 16909.552 	 4.165s
CatBoost (3-fold Avg): 14489.5982
CatBoost (3-fold Max): 14833.2755 	 12.309s


[32m[I 2021-09-07 19:58:49,725][0m Trial 118 finished with value: 16060.241066666667 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16703.2036
XGBoost  (3-fold Max): 17262.8041 	 4.23s
LightGBM (3-fold Avg): 16267.467
LightGBM (3-fold Max): 16585.465 	 4.089s
CatBoost (3-fold Avg): 14403.2365
CatBoost (3-fold Max): 14830.3984 	 11.727s


[32m[I 2021-09-07 19:59:10,781][0m Trial 119 finished with value: 15791.302366666665 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17269.8727
XGBoost  (3-fold Max): 17584.7709 	 1.89s
LightGBM (3-fold Avg): 16408.5278
LightGBM (3-fold Max): 17043.6677 	 1.624s
CatBoost (3-fold Avg): 14811.2806
CatBoost (3-fold Max): 15448.6134 	 11.55s


[32m[I 2021-09-07 19:59:26,858][0m Trial 120 finished with value: 16163.227033333334 and parameters: {'math': True, 'count': True, 'group': False, 'cluster_dist': False, 'pca': False, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17412.1894
XGBoost  (3-fold Max): 18573.3236 	 4.183s
LightGBM (3-fold Avg): 16606.2921
LightGBM (3-fold Max): 17143.5554 	 4.011s
CatBoost (3-fold Avg): 14541.8018
CatBoost (3-fold Max): 14689.075 	 12.293s


[32m[I 2021-09-07 19:59:48,361][0m Trial 121 finished with value: 16186.761099999998 and parameters: {'math': False, 'count': True, 'group': True, 'cluster_dist': False, 'pca': False, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17002.8327
XGBoost  (3-fold Max): 17812.5624 	 4.748s
LightGBM (3-fold Avg): 16120.9587
LightGBM (3-fold Max): 16468.6503 	 4.476s
CatBoost (3-fold Avg): 14392.1254
CatBoost (3-fold Max): 14906.8061 	 11.954s


[32m[I 2021-09-07 20:00:10,558][0m Trial 122 finished with value: 15838.638933333335 and parameters: {'math': False, 'count': False, 'group': False, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': True}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17344.2631
XGBoost  (3-fold Max): 18383.0898 	 2.276s
LightGBM (3-fold Avg): 16571.1219
LightGBM (3-fold Max): 17238.2061 	 2.065s
CatBoost (3-fold Avg): 14345.1069
CatBoost (3-fold Max): 15100.1826 	 10.787s


[32m[I 2021-09-07 20:00:26,700][0m Trial 123 finished with value: 16086.830633333331 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': True, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17165.7622
XGBoost  (3-fold Max): 18124.6084 	 2.128s
LightGBM (3-fold Avg): 16659.4611
LightGBM (3-fold Max): 17279.2576 	 1.81s
CatBoost (3-fold Avg): 14479.8172
CatBoost (3-fold Max): 15107.9904 	 10.405s


[32m[I 2021-09-07 20:00:42,072][0m Trial 124 finished with value: 16101.680166666665 and parameters: {'math': True, 'count': False, 'group': True, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16951.8199
XGBoost  (3-fold Max): 18002.5969 	 2.042s
LightGBM (3-fold Avg): 16080.4732
LightGBM (3-fold Max): 16718.3609 	 1.767s
CatBoost (3-fold Avg): 14821.4812
CatBoost (3-fold Max): 15516.0122 	 9.473s


[32m[I 2021-09-07 20:00:56,387][0m Trial 125 finished with value: 15951.2581 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': False, 'pca': True, 'target': False, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 17037.394
XGBoost  (3-fold Max): 17500.7787 	 2.543s
LightGBM (3-fold Avg): 16584.762
LightGBM (3-fold Max): 17185.5498 	 2.298s
CatBoost (3-fold Avg): 14066.064
CatBoost (3-fold Max): 14719.2971 	 11.044s


[32m[I 2021-09-07 20:01:13,291][0m Trial 126 finished with value: 15896.073333333334 and parameters: {'math': True, 'count': True, 'group': True, 'cluster_dist': True, 'pca': False, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


XGBoost  (3-fold Avg): 16885.8223
XGBoost  (3-fold Max): 17680.4788 	 2.206s
LightGBM (3-fold Avg): 16455.192
LightGBM (3-fold Max): 16680.2114 	 2.065s
CatBoost (3-fold Avg): 14348.006
CatBoost (3-fold Max): 14823.4012 	 11.12s


[32m[I 2021-09-07 20:01:29,706][0m Trial 127 finished with value: 15896.3401 and parameters: {'math': False, 'count': True, 'group': False, 'cluster_dist': False, 'pca': True, 'target': True, 'drop_last': False}. Best is trial 72 with value: 15708.617566666668.[0m


In [22]:
print("Best Options:", study.best_params)

Best Options: {'math': True, 'count': True, 'group': False, 'cluster_dist': True, 'pca': True, 'target': True, 'drop_last': False}


In [24]:
plot_parallel_coordinate(study)