In [None]:
#!pip install optuna

In [1]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
import numpy as np

# Machine learning models that we'll use
import xgboost as xgb
import lightgbm as lgbm
#from sklearn.linear_model import Ridge
import catboost as cat
# optuna
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)

  from pandas import MultiIndex, Int64Index
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


In [2]:
# We set as_frame parameter to True and access the return object's "frame"
# attribute to get the dataset as pandas dataframe.

df = fetch_california_housing(as_frame=True)["frame"]
print(df.shape)
df.head()

(20640, 9)


Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [3]:
from sklearn.model_selection import train_test_split

trainX, testX, trainy, testy = train_test_split(df.drop(columns=['MedHouseVal']), df.MedHouseVal, test_size=0.15, shuffle=True, random_state=42)

In [4]:
from sklearn.metrics import mean_squared_error

# instantiating the model
cat_model = cat.CatBoostRegressor(verbose=False)
cat_model.fit(trainX, trainy)
y_pred_cat = cat_model.predict(testX)

xgb_model = xgb.XGBRegressor()
xgb_model.fit(trainX, trainy, verbose=False)
y_pred_xgb = xgb_model.predict(testX)

lgbm_model = lgbm.LGBMRegressor()
lgbm_model.fit(trainX, trainy, verbose=-1)
y_pred_lgbm = lgbm_model.predict(testX)

# combining predictions by taking simple average using numpy
y_pred_final = np.mean([y_pred_cat, y_pred_xgb, y_pred_lgbm], axis=0)

# let's calculate mse
mse = mean_squared_error(testy, y_pred_lgbm)

print(f"Simple Average Ensemble's MSE: {mse}") 
#Simple Average Ensemble's MSE: 0.20581131648521198



Simple Average Ensemble's MSE: 0.21066428139566207


In [9]:
np.zeros(5).mean()

0.0

In [10]:
from sklearn.model_selection import KFold
import optuna

def objective(trial):
  STEP_SIZE = 1

  weights = []
  all_models_predictions = []

  # we'll use a variable for setting upper limit for suggested value
  # since we intend to update it after each weight suggestion
  upper_limit = 100

  w_cat = trial.suggest_int("w_cat", 0, upper_limit, step=STEP_SIZE)
  weights.append(w_cat)

  # Update upper limit to 100 - all the previous weights combined, which in this case is just w_ridge
  # WHY? well because we want to keep our sum of all weights equal to 100
  # and this is one way of ensuring that!
  upper_limit -= sum(weights)
  upper_limit = upper_limit

  w_xgb = trial.suggest_int("w_xgb", 0, upper_limit, step=STEP_SIZE)
  weights.append(w_xgb)

  # for the final weight we won't use optuna, rather we'll manually set it equal
  # to whatever value remains after subtracting the sum of suggested weight values from 100
  # This will also make sure that the sum of all weights remains equal to 100.
  w_lgbm = 100 - sum(weights)
  weights.append(w_lgbm)

  # Just as a sanity check, we'll check that the sum of all weights is equal to 100
  weights_sum = sum(weights)

  if weights_sum != 100:
    raise Exception(f"Weights sum must be equal to 100. Instead {weights_sum} was encountered!")
  
  # We'll use the default parameter values for all our models
  cat_model = cat.CatBoostRegressor()
  cat_model.fit(trainX, trainy)
  y_pred_cat = cat_model.predict(testX)
  all_models_predictions.append(y_pred_cat)

  xgb_model = xgb.XGBRegressor()
  xgb_model.fit(trainX, trainy)
  y_pred_xgb = xgb_model.predict(testX)
  all_models_predictions.append(y_pred_xgb)

  lgbm_model = lgbm.LGBMRegressor()
  lgbm_model.fit(trainX, trainy, verbose=-1)
  y_pred_lgbm = lgbm_model.predict(testX)
  all_models_predictions.append(y_pred_lgbm)

  # let's take the weighted average of the predictions using numpy
  y_pred_final = np.average(all_models_predictions, weights=weights, axis=0)
  # computing our metric i.e. MSE
  mse = mean_squared_error(testy, y_pred_final)

  return mse

In [11]:
study = optuna.create_study(study_name="optimizing weights", direction="minimize")
study.optimize(objective, n_trials=20)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[I 2023-08-02 01:26:10,666] A new study created in memory with name: optimizing weights


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 26.9ms	remaining: 26.9s
1:	learn: 1.0764030	total: 55ms	remaining: 27.5s
2:	learn: 1.0409692	total: 74.5ms	remaining: 24.8s
3:	learn: 1.0083718	total: 85ms	remaining: 21.2s


4:	learn: 0.9788923	total: 192ms	remaining: 38.3s
5:	learn: 0.9515839	total: 215ms	remaining: 35.6s
6:	learn: 0.9247004	total: 221ms	remaining: 31.4s
7:	learn: 0.9001281	total: 227ms	remaining: 28.2s
8:	learn: 0.8778985	total: 236ms	remaining: 26s
9:	learn: 0.8559244	total: 246ms	remaining: 24.4s
10:	learn: 0.8370810	total: 276ms	remaining: 24.8s
11:	learn: 0.8193220	total: 283ms	remaining: 23.3s
12:	learn: 0.8039389	total: 304ms	remaining: 23.1s
13:	learn: 0.7893027	total: 349ms	remaining: 24.5s
14:	learn: 0.7759102	total: 361ms	remaining: 23.7s
15:	learn: 0.7622046	total: 374ms	remaining: 23s
16:	learn: 0.7511464	total: 389ms	remaining: 22.5s
17:	learn: 0.7402253	total: 404ms	remaining: 22s
18:	learn: 0.7304534	total: 415ms	remaining: 21.4s
19:	learn: 0.7192693	total: 427ms	remaining: 20.9s
20:	learn: 0.7104842	total: 445ms	remaining: 20.8s
21:	learn: 0.7020815	total: 455ms	remaining: 20.2s
22:	learn: 0.6950149	total: 475ms	remaining: 20.2s
23:	learn: 0.6869116	total: 495ms	remaining

[I 2023-08-02 01:26:25,624] Trial 0 finished with value: 0.19715595834169064 and parameters: {'w_cat': 75, 'w_xgb': 10}. Best is trial 0 with value: 0.19715595834169064.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 11.7ms	remaining: 11.7s
1:	learn: 1.0764030	total: 16.9ms	remaining: 8.43s
2:	learn: 1.0409692	total: 27.7ms	remaining: 9.2s
3:	learn: 1.0083718	total: 39.6ms	remaining: 9.87s
4:	learn: 0.9788923	total: 48.1ms	remaining: 9.58s
5:	learn: 0.9515839	total: 52.8ms	remaining: 8.75s
6:	learn: 0.9247004	total: 61.1ms	remaining: 8.67s
7:	learn: 0.9001281	total: 68.4ms	remaining: 8.49s
8:	learn: 0.8778985	total: 74.6ms	remaining: 8.21s
9:	learn: 0.8559244	total: 83.8ms	remaining: 8.29s
10:	learn: 0.8370810	total: 88.7ms	remaining: 7.97s
11:	learn: 0.8193220	total: 97.4ms	remaining: 8.02s
12:	learn: 0.8039389	total: 103ms	remaining: 7.84s
13:	learn: 0.7893027	total: 108ms	remaining: 7.61s
14:	learn: 0.7759102	total: 113ms	remaining: 7.42s
15:	learn: 0.7622046	total: 118ms	remaining: 7.24s
16:	learn: 0.7511464	total: 122ms	remaining: 7.07s
17:	learn: 0.7402253	total: 127ms	remaining: 6.95s
18:	learn: 0.7304534	total: 133ms	remaining: 6.85s


[I 2023-08-02 01:26:40,681] Trial 1 finished with value: 0.20181813164108237 and parameters: {'w_cat': 13, 'w_xgb': 22}. Best is trial 0 with value: 0.19715595834169064.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 11.9ms	remaining: 11.9s
1:	learn: 1.0764030	total: 22.8ms	remaining: 11.4s
2:	learn: 1.0409692	total: 30ms	remaining: 9.96s
3:	learn: 1.0083718	total: 36.9ms	remaining: 9.19s
4:	learn: 0.9788923	total: 41.9ms	remaining: 8.34s
5:	learn: 0.9515839	total: 47.2ms	remaining: 7.81s
6:	learn: 0.9247004	total: 51.5ms	remaining: 7.31s
7:	learn: 0.9001281	total: 57ms	remaining: 7.07s
8:	learn: 0.8778985	total: 62.9ms	remaining: 6.92s
9:	learn: 0.8559244	total: 67.6ms	remaining: 6.69s
10:	learn: 0.8370810	total: 73.6ms	remaining: 6.62s
11:	learn: 0.8193220	total: 82.8ms	remaining: 6.81s
12:	learn: 0.8039389	total: 89ms	remaining: 6.76s
13:	learn: 0.7893027	total: 93.7ms	remaining: 6.6s
14:	learn: 0.7759102	total: 98.7ms	remaining: 6.48s
15:	learn: 0.7622046	total: 104ms	remaining: 6.4s
16:	learn: 0.7511464	total: 109ms	remaining: 6.29s
17:	learn: 0.7402253	total: 113ms	remaining: 6.19s
18:	learn: 0.7304534	total: 119ms	remaining: 6.14s
19:	

[I 2023-08-02 01:26:56,726] Trial 2 finished with value: 0.20598280864692864 and parameters: {'w_cat': 15, 'w_xgb': 0}. Best is trial 0 with value: 0.19715595834169064.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 6.99ms	remaining: 6.99s
1:	learn: 1.0764030	total: 12.8ms	remaining: 6.39s
2:	learn: 1.0409692	total: 18.7ms	remaining: 6.2s
3:	learn: 1.0083718	total: 24.2ms	remaining: 6.02s
4:	learn: 0.9788923	total: 28.9ms	remaining: 5.76s
5:	learn: 0.9515839	total: 34.6ms	remaining: 5.74s
6:	learn: 0.9247004	total: 40.3ms	remaining: 5.71s
7:	learn: 0.9001281	total: 45.1ms	remaining: 5.59s
8:	learn: 0.8778985	total: 51.2ms	remaining: 5.64s
9:	learn: 0.8559244	total: 58.8ms	remaining: 5.82s
10:	learn: 0.8370810	total: 67.7ms	remaining: 6.09s
11:	learn: 0.8193220	total: 75.8ms	remaining: 6.24s
12:	learn: 0.8039389	total: 93ms	remaining: 7.06s
13:	learn: 0.7893027	total: 104ms	remaining: 7.34s
14:	learn: 0.7759102	total: 110ms	remaining: 7.21s
15:	learn: 0.7622046	total: 115ms	remaining: 7.09s
16:	learn: 0.7511464	total: 122ms	remaining: 7.03s
17:	learn: 0.7402253	total: 127ms	remaining: 6.94s
18:	learn: 0.7304534	total: 132ms	remaining: 6.82s
1

[I 2023-08-02 01:27:10,054] Trial 3 finished with value: 0.20667093800721376 and parameters: {'w_cat': 25, 'w_xgb': 74}. Best is trial 0 with value: 0.19715595834169064.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 7.16ms	remaining: 7.16s
1:	learn: 1.0764030	total: 13.6ms	remaining: 6.78s
2:	learn: 1.0409692	total: 18.8ms	remaining: 6.26s
3:	learn: 1.0083718	total: 23.7ms	remaining: 5.9s
4:	learn: 0.9788923	total: 29.5ms	remaining: 5.87s
5:	learn: 0.9515839	total: 35ms	remaining: 5.8s
6:	learn: 0.9247004	total: 40ms	remaining: 5.67s
7:	learn: 0.9001281	total: 46.2ms	remaining: 5.72s
8:	learn: 0.8778985	total: 51.7ms	remaining: 5.69s
9:	learn: 0.8559244	total: 57.3ms	remaining: 5.67s
10:	learn: 0.8370810	total: 62.3ms	remaining: 5.6s
11:	learn: 0.8193220	total: 68.1ms	remaining: 5.61s
12:	learn: 0.8039389	total: 73.5ms	remaining: 5.58s
13:	learn: 0.7893027	total: 78.1ms	remaining: 5.5s
14:	learn: 0.7759102	total: 86.5ms	remaining: 5.68s
15:	learn: 0.7622046	total: 92.3ms	remaining: 5.68s
16:	learn: 0.7511464	total: 97.6ms	remaining: 5.64s
17:	learn: 0.7402253	total: 103ms	remaining: 5.64s
18:	learn: 0.7304534	total: 108ms	remaining: 5.6s
19:

[I 2023-08-02 01:27:19,778] Trial 4 finished with value: 0.1970082649223974 and parameters: {'w_cat': 76, 'w_xgb': 17}. Best is trial 4 with value: 0.1970082649223974.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 8.14ms	remaining: 8.13s
1:	learn: 1.0764030	total: 14.1ms	remaining: 7.02s
2:	learn: 1.0409692	total: 19ms	remaining: 6.33s
3:	learn: 1.0083718	total: 24.5ms	remaining: 6.1s
4:	learn: 0.9788923	total: 29.7ms	remaining: 5.91s
5:	learn: 0.9515839	total: 34.5ms	remaining: 5.71s
6:	learn: 0.9247004	total: 40.1ms	remaining: 5.69s
7:	learn: 0.9001281	total: 45.6ms	remaining: 5.65s
8:	learn: 0.8778985	total: 50.4ms	remaining: 5.55s
9:	learn: 0.8559244	total: 57.9ms	remaining: 5.73s
10:	learn: 0.8370810	total: 64.7ms	remaining: 5.81s
11:	learn: 0.8193220	total: 70.2ms	remaining: 5.78s
12:	learn: 0.8039389	total: 74.4ms	remaining: 5.65s
13:	learn: 0.7893027	total: 81.3ms	remaining: 5.72s
14:	learn: 0.7759102	total: 86.6ms	remaining: 5.68s
15:	learn: 0.7622046	total: 91.4ms	remaining: 5.62s
16:	learn: 0.7511464	total: 95.9ms	remaining: 5.55s
17:	learn: 0.7402253	total: 101ms	remaining: 5.52s
18:	learn: 0.7304534	total: 106ms	remaining: 5.4

[I 2023-08-02 01:27:28,035] Trial 5 finished with value: 0.2027072661482089 and parameters: {'w_cat': 5, 'w_xgb': 41}. Best is trial 4 with value: 0.1970082649223974.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 7.39ms	remaining: 7.39s
1:	learn: 1.0764030	total: 12.7ms	remaining: 6.34s
2:	learn: 1.0409692	total: 17.4ms	remaining: 5.78s
3:	learn: 1.0083718	total: 22.3ms	remaining: 5.55s
4:	learn: 0.9788923	total: 27.4ms	remaining: 5.46s
5:	learn: 0.9515839	total: 32.2ms	remaining: 5.33s
6:	learn: 0.9247004	total: 36.9ms	remaining: 5.23s
7:	learn: 0.9001281	total: 41.9ms	remaining: 5.19s
8:	learn: 0.8778985	total: 46.5ms	remaining: 5.12s
9:	learn: 0.8559244	total: 51.8ms	remaining: 5.13s
10:	learn: 0.8370810	total: 56.5ms	remaining: 5.08s
11:	learn: 0.8193220	total: 61.5ms	remaining: 5.07s
12:	learn: 0.8039389	total: 66.8ms	remaining: 5.07s
13:	learn: 0.7893027	total: 71.5ms	remaining: 5.03s
14:	learn: 0.7759102	total: 76.5ms	remaining: 5.02s
15:	learn: 0.7622046	total: 82.5ms	remaining: 5.07s
16:	learn: 0.7511464	total: 86.8ms	remaining: 5.02s
17:	learn: 0.7402253	total: 91.3ms	remaining: 4.98s
18:	learn: 0.7304534	total: 97.1ms	remaining

[I 2023-08-02 01:27:35,846] Trial 6 finished with value: 0.2036303230739042 and parameters: {'w_cat': 27, 'w_xgb': 65}. Best is trial 4 with value: 0.1970082649223974.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 6.63ms	remaining: 6.62s
1:	learn: 1.0764030	total: 13.4ms	remaining: 6.67s
2:	learn: 1.0409692	total: 18.9ms	remaining: 6.27s
3:	learn: 1.0083718	total: 23.5ms	remaining: 5.86s
4:	learn: 0.9788923	total: 28.6ms	remaining: 5.7s
5:	learn: 0.9515839	total: 33.7ms	remaining: 5.58s
6:	learn: 0.9247004	total: 38.1ms	remaining: 5.41s
7:	learn: 0.9001281	total: 43.4ms	remaining: 5.38s
8:	learn: 0.8778985	total: 48.3ms	remaining: 5.31s
9:	learn: 0.8559244	total: 52.7ms	remaining: 5.21s
10:	learn: 0.8370810	total: 57.9ms	remaining: 5.21s
11:	learn: 0.8193220	total: 62.5ms	remaining: 5.14s
12:	learn: 0.8039389	total: 67.4ms	remaining: 5.12s
13:	learn: 0.7893027	total: 72.8ms	remaining: 5.13s
14:	learn: 0.7759102	total: 77.2ms	remaining: 5.07s
15:	learn: 0.7622046	total: 84.6ms	remaining: 5.2s
16:	learn: 0.7511464	total: 89.1ms	remaining: 5.15s
17:	learn: 0.7402253	total: 93.2ms	remaining: 5.09s
18:	learn: 0.7304534	total: 98ms	remaining: 5.

[I 2023-08-02 01:27:44,248] Trial 7 finished with value: 0.19705063178439117 and parameters: {'w_cat': 72, 'w_xgb': 10}. Best is trial 4 with value: 0.1970082649223974.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 15.8ms	remaining: 15.8s
1:	learn: 1.0764030	total: 20.2ms	remaining: 10.1s
2:	learn: 1.0409692	total: 25.5ms	remaining: 8.47s
3:	learn: 1.0083718	total: 30.3ms	remaining: 7.55s
4:	learn: 0.9788923	total: 36.8ms	remaining: 7.33s
5:	learn: 0.9515839	total: 42.6ms	remaining: 7.06s
6:	learn: 0.9247004	total: 47.3ms	remaining: 6.71s
7:	learn: 0.9001281	total: 51.6ms	remaining: 6.39s
8:	learn: 0.8778985	total: 57.2ms	remaining: 6.3s
9:	learn: 0.8559244	total: 61.6ms	remaining: 6.09s
10:	learn: 0.8370810	total: 70.3ms	remaining: 6.32s
11:	learn: 0.8193220	total: 75.9ms	remaining: 6.25s
12:	learn: 0.8039389	total: 81.6ms	remaining: 6.2s
13:	learn: 0.7893027	total: 86.7ms	remaining: 6.11s
14:	learn: 0.7759102	total: 91ms	remaining: 5.97s
15:	learn: 0.7622046	total: 95.5ms	remaining: 5.87s
16:	learn: 0.7511464	total: 100ms	remaining: 5.79s
17:	learn: 0.7402253	total: 104ms	remaining: 5.7s
18:	learn: 0.7304534	total: 110ms	remaining: 5.66s


[I 2023-08-02 01:27:52,604] Trial 8 finished with value: 0.208978976772808 and parameters: {'w_cat': 4, 'w_xgb': 1}. Best is trial 4 with value: 0.1970082649223974.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 8.13ms	remaining: 8.13s
1:	learn: 1.0764030	total: 15.7ms	remaining: 7.84s
2:	learn: 1.0409692	total: 20.3ms	remaining: 6.75s
3:	learn: 1.0083718	total: 25.9ms	remaining: 6.45s
4:	learn: 0.9788923	total: 30.9ms	remaining: 6.15s
5:	learn: 0.9515839	total: 35.6ms	remaining: 5.89s
6:	learn: 0.9247004	total: 40.7ms	remaining: 5.77s
7:	learn: 0.9001281	total: 46.3ms	remaining: 5.74s
8:	learn: 0.8778985	total: 50.9ms	remaining: 5.6s
9:	learn: 0.8559244	total: 56.2ms	remaining: 5.56s
10:	learn: 0.8370810	total: 60.8ms	remaining: 5.47s
11:	learn: 0.8193220	total: 65.2ms	remaining: 5.37s
12:	learn: 0.8039389	total: 70.5ms	remaining: 5.35s
13:	learn: 0.7893027	total: 76ms	remaining: 5.35s
14:	learn: 0.7759102	total: 83.2ms	remaining: 5.46s
15:	learn: 0.7622046	total: 88.4ms	remaining: 5.44s
16:	learn: 0.7511464	total: 94ms	remaining: 5.43s
17:	learn: 0.7402253	total: 98.1ms	remaining: 5.35s
18:	learn: 0.7304534	total: 103ms	remaining: 5.34

[I 2023-08-02 01:28:03,139] Trial 9 finished with value: 0.20194144851111648 and parameters: {'w_cat': 13, 'w_xgb': 21}. Best is trial 4 with value: 0.1970082649223974.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 9.15ms	remaining: 9.14s
1:	learn: 1.0764030	total: 15.9ms	remaining: 7.92s
2:	learn: 1.0409692	total: 22.2ms	remaining: 7.38s
3:	learn: 1.0083718	total: 29.6ms	remaining: 7.37s
4:	learn: 0.9788923	total: 35.6ms	remaining: 7.09s
5:	learn: 0.9515839	total: 41.8ms	remaining: 6.92s
6:	learn: 0.9247004	total: 47.7ms	remaining: 6.77s
7:	learn: 0.9001281	total: 53.8ms	remaining: 6.67s
8:	learn: 0.8778985	total: 61ms	remaining: 6.71s
9:	learn: 0.8559244	total: 67.2ms	remaining: 6.65s
10:	learn: 0.8370810	total: 73.6ms	remaining: 6.62s
11:	learn: 0.8193220	total: 80.5ms	remaining: 6.62s
12:	learn: 0.8039389	total: 87.5ms	remaining: 6.64s
13:	learn: 0.7893027	total: 94.7ms	remaining: 6.67s
14:	learn: 0.7759102	total: 101ms	remaining: 6.64s
15:	learn: 0.7622046	total: 108ms	remaining: 6.67s
16:	learn: 0.7511464	total: 115ms	remaining: 6.67s
17:	learn: 0.7402253	total: 122ms	remaining: 6.63s
18:	learn: 0.7304534	total: 129ms	remaining: 6.65s

[I 2023-08-02 01:28:18,507] Trial 10 finished with value: 0.199985440425712 and parameters: {'w_cat': 99, 'w_xgb': 0}. Best is trial 4 with value: 0.1970082649223974.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 7.3ms	remaining: 7.3s
1:	learn: 1.0764030	total: 13.9ms	remaining: 6.92s
2:	learn: 1.0409692	total: 18.7ms	remaining: 6.2s
3:	learn: 1.0083718	total: 24ms	remaining: 5.98s
4:	learn: 0.9788923	total: 29.1ms	remaining: 5.79s
5:	learn: 0.9515839	total: 33.7ms	remaining: 5.58s
6:	learn: 0.9247004	total: 39.2ms	remaining: 5.56s
7:	learn: 0.9001281	total: 44.4ms	remaining: 5.5s
8:	learn: 0.8778985	total: 49ms	remaining: 5.39s
9:	learn: 0.8559244	total: 54.4ms	remaining: 5.38s
10:	learn: 0.8370810	total: 59.5ms	remaining: 5.35s
11:	learn: 0.8193220	total: 63.9ms	remaining: 5.26s
12:	learn: 0.8039389	total: 68.2ms	remaining: 5.18s
13:	learn: 0.7893027	total: 72.7ms	remaining: 5.12s
14:	learn: 0.7759102	total: 78.1ms	remaining: 5.13s
15:	learn: 0.7622046	total: 83.3ms	remaining: 5.13s
16:	learn: 0.7511464	total: 88.2ms	remaining: 5.1s
17:	learn: 0.7402253	total: 93.9ms	remaining: 5.12s
18:	learn: 0.7304534	total: 99.6ms	remaining: 5.14s
1

[I 2023-08-02 01:28:30,104] Trial 11 finished with value: 0.19680606647462606 and parameters: {'w_cat': 60, 'w_xgb': 13}. Best is trial 11 with value: 0.19680606647462606.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 5.95ms	remaining: 5.94s
1:	learn: 1.0764030	total: 12.6ms	remaining: 6.28s
2:	learn: 1.0409692	total: 19.4ms	remaining: 6.44s
3:	learn: 1.0083718	total: 25.6ms	remaining: 6.38s
4:	learn: 0.9788923	total: 29.9ms	remaining: 5.94s
5:	learn: 0.9515839	total: 35ms	remaining: 5.79s
6:	learn: 0.9247004	total: 39.5ms	remaining: 5.6s
7:	learn: 0.9001281	total: 44.8ms	remaining: 5.56s
8:	learn: 0.8778985	total: 51.7ms	remaining: 5.69s
9:	learn: 0.8559244	total: 63ms	remaining: 6.24s
10:	learn: 0.8370810	total: 70ms	remaining: 6.29s
11:	learn: 0.8193220	total: 74.7ms	remaining: 6.15s
12:	learn: 0.8039389	total: 82.8ms	remaining: 6.29s
13:	learn: 0.7893027	total: 88ms	remaining: 6.2s
14:	learn: 0.7759102	total: 92.3ms	remaining: 6.06s
15:	learn: 0.7622046	total: 97.2ms	remaining: 5.98s
16:	learn: 0.7511464	total: 102ms	remaining: 5.87s
17:	learn: 0.7402253	total: 107ms	remaining: 5.82s
18:	learn: 0.7304534	total: 112ms	remaining: 5.79s
19:	l

[I 2023-08-02 01:28:38,953] Trial 12 finished with value: 0.19656207826532104 and parameters: {'w_cat': 57, 'w_xgb': 19}. Best is trial 12 with value: 0.19656207826532104.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 6.83ms	remaining: 6.82s
1:	learn: 1.0764030	total: 13.8ms	remaining: 6.89s
2:	learn: 1.0409692	total: 22.8ms	remaining: 7.57s
3:	learn: 1.0083718	total: 34ms	remaining: 8.48s
4:	learn: 0.9788923	total: 38ms	remaining: 7.56s
5:	learn: 0.9515839	total: 43.7ms	remaining: 7.24s
6:	learn: 0.9247004	total: 48.5ms	remaining: 6.88s
7:	learn: 0.9001281	total: 53.6ms	remaining: 6.65s
8:	learn: 0.8778985	total: 59.3ms	remaining: 6.53s
9:	learn: 0.8559244	total: 66.1ms	remaining: 6.54s
10:	learn: 0.8370810	total: 71.4ms	remaining: 6.42s
11:	learn: 0.8193220	total: 75.8ms	remaining: 6.24s
12:	learn: 0.8039389	total: 83.2ms	remaining: 6.32s
13:	learn: 0.7893027	total: 89ms	remaining: 6.27s
14:	learn: 0.7759102	total: 94.1ms	remaining: 6.18s
15:	learn: 0.7622046	total: 99.4ms	remaining: 6.11s
16:	learn: 0.7511464	total: 106ms	remaining: 6.11s
17:	learn: 0.7402253	total: 110ms	remaining: 6.02s
18:	learn: 0.7304534	total: 115ms	remaining: 5.95s
1

[I 2023-08-02 01:28:49,645] Trial 13 finished with value: 0.19701496160378373 and parameters: {'w_cat': 50, 'w_xgb': 33}. Best is trial 12 with value: 0.19656207826532104.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 7.94ms	remaining: 7.93s
1:	learn: 1.0764030	total: 13.1ms	remaining: 6.53s
2:	learn: 1.0409692	total: 18.6ms	remaining: 6.17s
3:	learn: 1.0083718	total: 26.6ms	remaining: 6.62s
4:	learn: 0.9788923	total: 31.9ms	remaining: 6.36s
5:	learn: 0.9515839	total: 36.7ms	remaining: 6.08s
6:	learn: 0.9247004	total: 41ms	remaining: 5.81s
7:	learn: 0.9001281	total: 47.6ms	remaining: 5.9s
8:	learn: 0.8778985	total: 53.1ms	remaining: 5.85s
9:	learn: 0.8559244	total: 57.6ms	remaining: 5.7s
10:	learn: 0.8370810	total: 64.1ms	remaining: 5.76s
11:	learn: 0.8193220	total: 69ms	remaining: 5.68s
12:	learn: 0.8039389	total: 73.4ms	remaining: 5.58s
13:	learn: 0.7893027	total: 78.2ms	remaining: 5.51s
14:	learn: 0.7759102	total: 87ms	remaining: 5.71s
15:	learn: 0.7622046	total: 92.2ms	remaining: 5.67s
16:	learn: 0.7511464	total: 96.4ms	remaining: 5.57s
17:	learn: 0.7402253	total: 100ms	remaining: 5.45s
18:	learn: 0.7304534	total: 104ms	remaining: 5.38s
19

[I 2023-08-02 01:28:58,338] Trial 14 finished with value: 0.1967224502891469 and parameters: {'w_cat': 53, 'w_xgb': 29}. Best is trial 12 with value: 0.19656207826532104.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 13.9ms	remaining: 13.9s
1:	learn: 1.0764030	total: 18.2ms	remaining: 9.07s
2:	learn: 1.0409692	total: 24.7ms	remaining: 8.22s
3:	learn: 1.0083718	total: 30.5ms	remaining: 7.59s
4:	learn: 0.9788923	total: 36.7ms	remaining: 7.31s
5:	learn: 0.9515839	total: 40.9ms	remaining: 6.78s
6:	learn: 0.9247004	total: 46.4ms	remaining: 6.58s
7:	learn: 0.9001281	total: 52.2ms	remaining: 6.47s
8:	learn: 0.8778985	total: 59.7ms	remaining: 6.58s
9:	learn: 0.8559244	total: 64.2ms	remaining: 6.36s
10:	learn: 0.8370810	total: 69.8ms	remaining: 6.28s
11:	learn: 0.8193220	total: 74.4ms	remaining: 6.13s
12:	learn: 0.8039389	total: 81.3ms	remaining: 6.17s
13:	learn: 0.7893027	total: 86.8ms	remaining: 6.11s
14:	learn: 0.7759102	total: 94.4ms	remaining: 6.2s
15:	learn: 0.7622046	total: 100ms	remaining: 6.15s
16:	learn: 0.7511464	total: 104ms	remaining: 6.02s
17:	learn: 0.7402253	total: 109ms	remaining: 5.95s
18:	learn: 0.7304534	total: 114ms	remaining: 5.8

[I 2023-08-02 01:29:07,411] Trial 15 finished with value: 0.1973778881859894 and parameters: {'w_cat': 42, 'w_xgb': 34}. Best is trial 12 with value: 0.19656207826532104.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 7.6ms	remaining: 7.59s
1:	learn: 1.0764030	total: 12.4ms	remaining: 6.17s
2:	learn: 1.0409692	total: 19.3ms	remaining: 6.4s
3:	learn: 1.0083718	total: 37.4ms	remaining: 9.3s
4:	learn: 0.9788923	total: 42.2ms	remaining: 8.4s
5:	learn: 0.9515839	total: 47.7ms	remaining: 7.89s
6:	learn: 0.9247004	total: 55.4ms	remaining: 7.86s
7:	learn: 0.9001281	total: 60.5ms	remaining: 7.5s
8:	learn: 0.8778985	total: 65.7ms	remaining: 7.23s
9:	learn: 0.8559244	total: 70.6ms	remaining: 6.99s
10:	learn: 0.8370810	total: 78.5ms	remaining: 7.06s
11:	learn: 0.8193220	total: 83.7ms	remaining: 6.89s
12:	learn: 0.8039389	total: 87.7ms	remaining: 6.66s
13:	learn: 0.7893027	total: 92.8ms	remaining: 6.53s
14:	learn: 0.7759102	total: 98.5ms	remaining: 6.46s
15:	learn: 0.7622046	total: 103ms	remaining: 6.34s
16:	learn: 0.7511464	total: 108ms	remaining: 6.25s
17:	learn: 0.7402253	total: 112ms	remaining: 6.13s
18:	learn: 0.7304534	total: 119ms	remaining: 6.15s
1

[I 2023-08-02 01:29:16,351] Trial 16 finished with value: 0.198727999461757 and parameters: {'w_cat': 38, 'w_xgb': 45}. Best is trial 12 with value: 0.19656207826532104.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 11.4ms	remaining: 11.4s
1:	learn: 1.0764030	total: 18.5ms	remaining: 9.25s
2:	learn: 1.0409692	total: 28.1ms	remaining: 9.33s
3:	learn: 1.0083718	total: 32.9ms	remaining: 8.2s
4:	learn: 0.9788923	total: 39.4ms	remaining: 7.85s
5:	learn: 0.9515839	total: 45ms	remaining: 7.45s
6:	learn: 0.9247004	total: 50.5ms	remaining: 7.17s
7:	learn: 0.9001281	total: 55ms	remaining: 6.82s
8:	learn: 0.8778985	total: 61.3ms	remaining: 6.75s
9:	learn: 0.8559244	total: 65.7ms	remaining: 6.5s
10:	learn: 0.8370810	total: 71ms	remaining: 6.38s
11:	learn: 0.8193220	total: 76.3ms	remaining: 6.28s
12:	learn: 0.8039389	total: 80.9ms	remaining: 6.14s
13:	learn: 0.7893027	total: 87.5ms	remaining: 6.16s
14:	learn: 0.7759102	total: 92.4ms	remaining: 6.07s
15:	learn: 0.7622046	total: 97.2ms	remaining: 5.98s
16:	learn: 0.7511464	total: 101ms	remaining: 5.87s
17:	learn: 0.7402253	total: 108ms	remaining: 5.87s
18:	learn: 0.7304534	total: 113ms	remaining: 5.81s
19:

[I 2023-08-02 01:29:28,611] Trial 17 finished with value: 0.19664971381741556 and parameters: {'w_cat': 56, 'w_xgb': 28}. Best is trial 12 with value: 0.19656207826532104.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 9.31ms	remaining: 9.3s
1:	learn: 1.0764030	total: 15.9ms	remaining: 7.94s
2:	learn: 1.0409692	total: 22.6ms	remaining: 7.52s
3:	learn: 1.0083718	total: 29.6ms	remaining: 7.38s
4:	learn: 0.9788923	total: 36.6ms	remaining: 7.29s
5:	learn: 0.9515839	total: 42ms	remaining: 6.96s
6:	learn: 0.9247004	total: 49.1ms	remaining: 6.96s
7:	learn: 0.9001281	total: 60.4ms	remaining: 7.49s
8:	learn: 0.8778985	total: 67.8ms	remaining: 7.46s
9:	learn: 0.8559244	total: 76.2ms	remaining: 7.55s
10:	learn: 0.8370810	total: 83.4ms	remaining: 7.5s
11:	learn: 0.8193220	total: 89.5ms	remaining: 7.37s
12:	learn: 0.8039389	total: 95.4ms	remaining: 7.24s
13:	learn: 0.7893027	total: 102ms	remaining: 7.19s
14:	learn: 0.7759102	total: 110ms	remaining: 7.25s
15:	learn: 0.7622046	total: 117ms	remaining: 7.21s
16:	learn: 0.7511464	total: 126ms	remaining: 7.31s
17:	learn: 0.7402253	total: 134ms	remaining: 7.33s
18:	learn: 0.7304534	total: 142ms	remaining: 7.35s
19

[I 2023-08-02 01:29:41,293] Trial 18 finished with value: 0.19826377279871282 and parameters: {'w_cat': 86, 'w_xgb': 4}. Best is trial 12 with value: 0.19656207826532104.


Learning rate set to 0.064991
0:	learn: 1.1134528	total: 9.43ms	remaining: 9.42s
1:	learn: 1.0764030	total: 19.2ms	remaining: 9.56s
2:	learn: 1.0409692	total: 25.6ms	remaining: 8.52s
3:	learn: 1.0083718	total: 32.3ms	remaining: 8.05s
4:	learn: 0.9788923	total: 40.8ms	remaining: 8.11s
5:	learn: 0.9515839	total: 47.6ms	remaining: 7.88s
6:	learn: 0.9247004	total: 54.6ms	remaining: 7.75s
7:	learn: 0.9001281	total: 62.1ms	remaining: 7.7s
8:	learn: 0.8778985	total: 68.5ms	remaining: 7.54s
9:	learn: 0.8559244	total: 75.9ms	remaining: 7.52s
10:	learn: 0.8370810	total: 83.7ms	remaining: 7.52s
11:	learn: 0.8193220	total: 90ms	remaining: 7.41s
12:	learn: 0.8039389	total: 97.1ms	remaining: 7.38s
13:	learn: 0.7893027	total: 113ms	remaining: 7.94s
14:	learn: 0.7759102	total: 120ms	remaining: 7.87s
15:	learn: 0.7622046	total: 126ms	remaining: 7.76s
16:	learn: 0.7511464	total: 133ms	remaining: 7.67s
17:	learn: 0.7402253	total: 139ms	remaining: 7.6s
18:	learn: 0.7304534	total: 145ms	remaining: 7.49s
19

[I 2023-08-02 01:29:52,685] Trial 19 finished with value: 0.1965951392694841 and parameters: {'w_cat': 63, 'w_xgb': 25}. Best is trial 12 with value: 0.19656207826532104.


Number of finished trials: 20
Best trial: {'w_cat': 57, 'w_xgb': 19}


In [22]:
cat_model = cat.CatBoostRegressor()
cat_model.fit(trainX, trainy)
y_pred_cat = cat_model.predict(testX)

xgb_model = xgb.XGBRegressor()
xgb_model.fit(trainX, trainy)
y_pred_xgb = xgb_model.predict(testX)

lgbm_model = lgbm.LGBMRegressor()
lgbm_model.fit(trainX, trainy, verbose=-1)
y_pred_lgbm = lgbm_model.predict(testX)

params = study.best_trial.params
y_pred = (params['w_cat']*y_pred_cat+params['w_xgb']*y_pred_xgb+(100-params['w_cat']-params['w_xgb'])*y_pred_lgbm)/100

Learning rate set to 0.064991
0:	learn: 1.1134528	total: 7.25ms	remaining: 7.24s
1:	learn: 1.0764030	total: 18.5ms	remaining: 9.23s
2:	learn: 1.0409692	total: 23.8ms	remaining: 7.9s
3:	learn: 1.0083718	total: 28.9ms	remaining: 7.21s
4:	learn: 0.9788923	total: 35.4ms	remaining: 7.04s
5:	learn: 0.9515839	total: 51.2ms	remaining: 8.49s
6:	learn: 0.9247004	total: 67.1ms	remaining: 9.52s
7:	learn: 0.9001281	total: 73.1ms	remaining: 9.06s
8:	learn: 0.8778985	total: 80ms	remaining: 8.81s


9:	learn: 0.8559244	total: 189ms	remaining: 18.7s
10:	learn: 0.8370810	total: 195ms	remaining: 17.6s
11:	learn: 0.8193220	total: 215ms	remaining: 17.7s
12:	learn: 0.8039389	total: 222ms	remaining: 16.8s
13:	learn: 0.7893027	total: 252ms	remaining: 17.8s
14:	learn: 0.7759102	total: 259ms	remaining: 17s
15:	learn: 0.7622046	total: 268ms	remaining: 16.5s
16:	learn: 0.7511464	total: 273ms	remaining: 15.8s
17:	learn: 0.7402253	total: 281ms	remaining: 15.3s
18:	learn: 0.7304534	total: 290ms	remaining: 15s
19:	learn: 0.7192693	total: 303ms	remaining: 14.9s
20:	learn: 0.7104842	total: 322ms	remaining: 15s
21:	learn: 0.7020815	total: 330ms	remaining: 14.7s
22:	learn: 0.6950149	total: 342ms	remaining: 14.5s
23:	learn: 0.6869116	total: 349ms	remaining: 14.2s
24:	learn: 0.6779583	total: 365ms	remaining: 14.2s
25:	learn: 0.6717269	total: 379ms	remaining: 14.2s
26:	learn: 0.6635544	total: 387ms	remaining: 13.9s
27:	learn: 0.6579794	total: 394ms	remaining: 13.7s
28:	learn: 0.6513783	total: 440ms	rema



In [24]:
# let's calculate mse
mse = mean_squared_error(testy, y_pred)
print(f"Simple Average Ensemble's MSE: {mse}") 

Simple Average Ensemble's MSE: 0.19656207805707848


In [30]:
#pd.DataFrame({'predy':y_pred, 'testy':pd.Series(testy), 'avg_predy': y_pred_final}).head(20)