environment: skopt

# Scikit-Optimize

Probably you'll have to reload the notebook for the changes being in place. Scikit-Optimize works only with sklearn 0.23.2<br>
works with env skopt

In [1]:
import skopt
import importlib
import sklearn
display(skopt.__version__)
importlib.reload(sklearn)
display(sklearn.__version__)

# Since BayesSearchCV can not deal with missing values, we have to impute them before:

from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import numpy as np
import pandas as pd


from sklearn.model_selection import train_test_split
train = pd.read_csv('../data/train.csv', sep=",")
test = pd.read_csv('../data/test.csv')

import sklearn
y = train['SalePrice']
X = train.drop('SalePrice', axis=1)
categorical = [var for var in X.columns if X[var].dtype=='O']
numerical = [var for var in X.columns if X[var].dtype!='O']
X[categorical] = X[categorical].fillna('None')

# auto-sklearn can not deal with categorical variables
X= pd.concat([pd.get_dummies(X[categorical], dummy_na=True), X[numerical]], axis=1)

y = np.log1p(y)
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, random_state=42, test_size=0.2)

'0.9.0'

'1.0.2'

In [4]:
numeric_transformer = SimpleImputer(missing_values=np.nan, strategy='mean')

X_train.loc[:, numerical] = numeric_transformer.fit_transform(X_train.loc[:, numerical])
X_test.loc[:, numerical] = numeric_transformer.fit_transform(X_test.loc[:, numerical])

from skopt.space import Real, Categorical, Integer
from skopt import BayesSearchCV
from sklearn.ensemble import GradientBoostingRegressor

regressor = BayesSearchCV(
    estimator = GradientBoostingRegressor(),
      search_spaces ={
         'learning_rate': Real(0.01,0.3),
         'loss': Categorical(['squared_error']),
         'max_depth': Integer(3,6),
         'n_estimators': Integer(400, 1000),
         'subsample': Real(0.6, 1.0),
         'max_features': Real(0.6, 1.0) 
      },
    n_iter=64,
    random_state=0,
    verbose=1, iid=True,
    cv=5, n_jobs=-1
  )
regressor.fit(X_train,y_train)



Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

BayesSearchCV(cv=5, estimator=GradientBoostingRegressor(), iid=True, n_iter=64,
              n_jobs=-1, random_state=0,
              search_spaces={'learning_rate': Real(low=0.01, high=0.3, prior='uniform', transform='normalize'),
                             'loss': Categorical(categories=('squared_error',), prior=None),
                             'max_depth': Integer(low=3, high=6, prior='uniform', transform='normalize'),
                             'max_features': Real(low=0.6, high=1.0, prior='uniform', transform='normalize'),
                             'n_estimators': Integer(low=400, high=1000, prior='uniform', transform='normalize'),
                             'subsample': Real(low=0.6, high=1.0, prior='uniform', transform='normalize')},
              verbose=1)

In [5]:
predictions = regressor.predict(X_test)
print("mean-squared-error:", sklearn.metrics.mean_squared_error(y_test, predictions, squared=False))

mean-squared-error: 0.133699999611186


# OPTUNA

In [None]:
!pip install optuna

In [6]:
import lightgbm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import optuna
from sklearn.model_selection import cross_val_score

train = pd.read_csv('../data/train.csv', sep=",")
categorical = [var for var in train.columns if train[var].dtype=='O']

for cat_feat in categorical:
    train[cat_feat] = train[cat_feat].astype('category')
    
y = train['SalePrice']
X = train.drop('SalePrice', axis=1)
y = np.log1p(y)
X_train, X_test, y_train, y_test = \
        train_test_split(X, y, random_state=42, test_size=0.2)

def objective(trial):

    x_tr, x_te, y_tr, y_te = \
        train_test_split(X_train, y_train, random_state=42, test_size=0.2)
    
    model = lightgbm.LGBMRegressor()

    param = {
        "objective": "regression",
        "metric": "'neg_mean_squared_error'",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 0.1, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 1.0, log=True),
        "n_estimators" : trial.suggest_int("n_estimators", 600, 1200),
        "num_leaves": trial.suggest_int("num_leaves", 4, 30),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "subsample_freq": trial.suggest_int("subsample_freq",0, 8),
        "colsample_bytree": trial.suggest_float("colsample_bytree",0.5,1.0),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0)
    }
    model.set_params(**param)
    clf = cross_val_score(
    model,
         x_tr, y_tr, scoring = 'neg_mean_squared_error'
    )

    return np.mean(np.sqrt(-clf))

study = optuna.create_study()
study.optimize(objective , n_trials =70)
trial = study.best_trial
model = lightgbm.LGBMRegressor()
model.set_params(**trial.params)
for k,v in trial.params.items():
    print(f'{k}: {v}')
model.fit(X_train, y_train)
print(f'result on hold-out set after HPO: {np.sqrt(mean_squared_error(y_test, model.predict(X_test)))}')

[I 2024-04-06 14:21:12,904] A new study created in memory with name: no-name-32b65a43-2bbf-45a5-9637-9fd3fabc9e59
[I 2024-04-06 14:21:14,228] Trial 0 finished with value: 0.13180292881418804 and parameters: {'learning_rate': 0.040634652600699865, 'reg_lambda': 1.7644120835714382e-06, 'n_estimators': 980, 'num_leaves': 6, 'bagging_freq': 6, 'min_child_samples': 61, 'subsample_freq': 2, 'colsample_bytree': 0.502318225885592, 'subsample': 0.825573308608514}. Best is trial 0 with value: 0.13180292881418804.
[I 2024-04-06 14:21:17,056] Trial 1 finished with value: 0.13298950862484893 and parameters: {'learning_rate': 0.0038482042196899555, 'reg_lambda': 5.917825012149067e-05, 'n_estimators': 998, 'num_leaves': 25, 'bagging_freq': 7, 'min_child_samples': 18, 'subsample_freq': 7, 'colsample_bytree': 0.5891849855217228, 'subsample': 0.7137351350364128}. Best is trial 0 with value: 0.13180292881418804.
[I 2024-04-06 14:21:18,760] Trial 2 finished with value: 0.15753499409228872 and parameters: 

[I 2024-04-06 14:21:47,448] Trial 21 finished with value: 0.12775640870844837 and parameters: {'learning_rate': 0.014764575885820641, 'reg_lambda': 0.0019849242115293976, 'n_estimators': 1072, 'num_leaves': 8, 'bagging_freq': 2, 'min_child_samples': 27, 'subsample_freq': 3, 'colsample_bytree': 0.9931936715253107, 'subsample': 0.8980935494872238}. Best is trial 10 with value: 0.1270305153288948.
[I 2024-04-06 14:21:48,782] Trial 22 finished with value: 0.12950010888761415 and parameters: {'learning_rate': 0.017446301509077975, 'reg_lambda': 0.05580486731560558, 'n_estimators': 1038, 'num_leaves': 6, 'bagging_freq': 1, 'min_child_samples': 20, 'subsample_freq': 5, 'colsample_bytree': 0.994330239395611, 'subsample': 0.9989670494935069}. Best is trial 10 with value: 0.1270305153288948.
[I 2024-04-06 14:21:50,794] Trial 23 finished with value: 0.1283753342620691 and parameters: {'learning_rate': 0.00907148698630641, 'reg_lambda': 0.002791819164872382, 'n_estimators': 1181, 'num_leaves': 7, 

[I 2024-04-06 14:22:27,313] Trial 42 finished with value: 0.1283423803955424 and parameters: {'learning_rate': 0.051760177520479594, 'reg_lambda': 7.276422834650084e-05, 'n_estimators': 1140, 'num_leaves': 5, 'bagging_freq': 3, 'min_child_samples': 22, 'subsample_freq': 0, 'colsample_bytree': 0.6565289041889135, 'subsample': 0.8760972646675143}. Best is trial 25 with value: 0.12657976796036413.
[I 2024-04-06 14:22:31,376] Trial 43 finished with value: 0.12736856760205137 and parameters: {'learning_rate': 0.011718808870426289, 'reg_lambda': 2.7096775136870017e-05, 'n_estimators': 1166, 'num_leaves': 22, 'bagging_freq': 2, 'min_child_samples': 15, 'subsample_freq': 2, 'colsample_bytree': 0.508799801224848, 'subsample': 0.9578331539987619}. Best is trial 25 with value: 0.12657976796036413.
[I 2024-04-06 14:22:32,837] Trial 44 finished with value: 0.1314591829577632 and parameters: {'learning_rate': 0.023106637556503832, 'reg_lambda': 3.2221944286101394e-07, 'n_estimators': 1130, 'num_leav

[I 2024-04-06 14:23:17,038] Trial 63 finished with value: 0.12672237553645865 and parameters: {'learning_rate': 0.016679872191643057, 'reg_lambda': 1.0832115276618648e-06, 'n_estimators': 1035, 'num_leaves': 6, 'bagging_freq': 3, 'min_child_samples': 8, 'subsample_freq': 1, 'colsample_bytree': 0.5945358680550409, 'subsample': 0.9442747254709616}. Best is trial 25 with value: 0.12657976796036413.
[I 2024-04-06 14:23:18,603] Trial 64 finished with value: 0.12947074949187837 and parameters: {'learning_rate': 0.007632647380323216, 'reg_lambda': 2.8180792422583147e-07, 'n_estimators': 1023, 'num_leaves': 6, 'bagging_freq': 3, 'min_child_samples': 5, 'subsample_freq': 1, 'colsample_bytree': 0.5940486075189773, 'subsample': 0.9424282021309835}. Best is trial 25 with value: 0.12657976796036413.
[I 2024-04-06 14:23:20,373] Trial 65 finished with value: 0.12755957666170206 and parameters: {'learning_rate': 0.013517391924224553, 'reg_lambda': 8.098849736205568e-08, 'n_estimators': 1116, 'num_leav

learning_rate: 0.052226551130774554
reg_lambda: 3.8990531711968316e-05
n_estimators: 1106
num_leaves: 4
bagging_freq: 3
min_child_samples: 12
subsample_freq: 1
colsample_bytree: 0.8282327226851848
subsample: 0.9566485616199342
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000258 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3407
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 77
[LightGBM] [Info] Start training from score 12.030658
result on hold-out set after HPO: 0.12973232038555924
