In [59]:
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
df = pd.read_csv(('../data/feature_engineered.csv'))
df.set_index('Timestamp', inplace=True)

train_test_split = 0.2
train_size = int((1 - train_test_split) * len(df))
df_train = df.iloc[:train_size]
df_test  = df.iloc[train_size:]

tune_split = int(0.8 * len(df_train))
train_tune = df_train.iloc[:tune_split]
val_tune   = df_train.iloc[tune_split:]


In [60]:
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor


In [61]:
X_train_tune = train_tune.drop(['Power Consumption (kW)', 'load_da', 'load_d2'], axis=1)
y_train_tune = train_tune['load_da']

X_val_tune = val_tune.drop(['Power Consumption (kW)', 'load_da', 'load_d2'], axis=1)
y_val_tune = val_tune['load_da']


In [62]:
rf = RandomForestRegressor(random_state=42, n_jobs=-1, oob_score=True)

param_space = {
    'n_estimators': Integer(250, 600),
    'max_depth': Integer(3, 8),
    'min_samples_split': Integer(4, 12),
    'min_samples_leaf': Integer(12, 16),
    'max_features': Real(0.2, 0.6),
    'bootstrap': [True],
}

tscv = TimeSeriesSplit(n_splits=6)

opt = BayesSearchCV(
    estimator=rf,
    search_spaces=param_space,
    n_iter=30,
    cv=tscv,
    scoring='neg_root_mean_squared_error',
    n_jobs=-1,
    random_state=42,
    verbose=0
)

opt.fit(X_train_tune, y_train_tune)
print("Best Parameters:", opt.best_params_)
print("Best CV Score:", -opt.best_score_)


Best Parameters: OrderedDict([('bootstrap', True), ('max_depth', 3), ('max_features', 0.20367038946206986), ('min_samples_leaf', 14), ('min_samples_split', 9), ('n_estimators', 482)])
Best CV Score: 0.3069941496967017


In [84]:
xgb = XGBRegressor(
    objective='reg:squarederror',
    random_state=42,
    n_jobs=-1,
    tree_method='hist',        # fast histogram algorithm
    eval_metric='rmse'
)

param_space = {
    'n_estimators': Integer(300, 700),          # more boosting rounds
    'max_depth': Integer(2, 8),                 # deeper trees
    'learning_rate': Real(0.001, 0.01, prior='log-uniform'),  # bigger steps
    'subsample': Real(0.1, 0.7),                 # less stochasticity
    'colsample_bytree': Real(0.4, 1.0),          # use more features per tree
    'min_child_weight': Integer(3, 7),           # allow finer splits
    'gamma': Real(0.0, 1.0),                     # weaker split penalty
    'reg_lambda': Real(0.3, 2.0, prior='log-uniform'),  # reduce L2
    'reg_alpha': Real(0.3, 2.0, prior='log-uniform'),   # reduce L1
}


tscv = TimeSeriesSplit(n_splits=6)

opt = BayesSearchCV(
    estimator=xgb,
    search_spaces=param_space,
    n_iter=30,
    cv=tscv,
    scoring='neg_root_mean_squared_error',
    n_jobs=-1,
    random_state=42,
    verbose=1
)

opt.fit(X_train_tune, y_train_tune)
print("Best Parameters:", opt.best_params_)
print("Best CV Score:", -opt.best_score_)  # convert negative RMSE to positive


Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fits
Fitting 6 folds for each of 1 candidates, totalling 6 fi

In [None]:
best_xgb = opt.best_estimator_
best_xgb.fit(X_train_tune, y_train_tune)

best_rf = opt.best_estimator_
best_xgb.fit(X_train_tune, y_train_tune)

In [None]:
import numpy as np 

rf_train_pred = best_rf.predict(X_train_tune)
xgb_train_pred = best_xgb.predict(X_train_tune)

# Combine as meta-features
meta_X_train = np.column_stack((rf_train_pred, xgb_train_pred))
meta_y_train = y_train_tune

In [71]:
from sklearn.svm import SVR
meta_param_space_svr = {
    'C': Real(1, 50, prior='log-uniform'),
    #'gamma': Real(1e-4, 1, prior='log-uniform'),
    'epsilon': Real(1e-3, 0.1, prior='log-uniform')
}

meta_svr = SVR(kernel='rbf', gamma='scale')

meta_opt_svr = BayesSearchCV(
    estimator=meta_svr,
    search_spaces=meta_param_space_svr,
    n_iter=40,
    cv=tscv,
    scoring='neg_root_mean_squared_error',
    n_jobs=-1,
    random_state=42,
    verbose=0
)

meta_opt_svr.fit(meta_X_train, meta_y_train)


In [85]:
from lightgbm import LGBMRegressor


meta_param_space = {
    'n_estimators': Integer(300, 700),
    'learning_rate': Real(0.001, 0.1, prior='log-uniform'),
    'max_depth': Integer(3, 10),
    'num_leaves': Integer(15, 30),
    'min_child_samples': Integer(5, 20),
    'subsample': Real(0.6, 0.8),
    'colsample_bytree': Real(0.6, 1.0),
    'reg_lambda': Real(0.1, 2.0, prior='log-uniform'),  # reduce L2
    'reg_alpha': Real(0.1, 2.0, prior='log-uniform'),   # reduce L1
}

lgb_meta_base = LGBMRegressor(
    objective='rmse',
    random_state=42,
    boosting_type='gbdt',
    n_jobs=-1
)

meta_opt = BayesSearchCV(
    estimator=lgb_meta_base,
    search_spaces=meta_param_space,
    n_iter=30,
    cv=tscv,
    scoring='neg_root_mean_squared_error',
    n_jobs=-1,
    random_state=42,
    verbose=0
)

meta_opt.fit(meta_X_train, meta_y_train)
print("Best LightGBM Meta Params:", meta_opt.best_params_)
print("Best Meta CV Score:", -meta_opt.best_score_)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000398 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000418 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 66
[LightGBM] [Info] Total Bins 34
[LightGBM] [Info] Number of data points in the train set: 94, number of used features: 2
[LightGBM] [Info] Number of data points in the train set: 48, number of used features: 2
[LightGBM] [Info] Start training from score 6.320407
[LightGBM] [Info] Start training from score 6.329723
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000223 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 96
[LightGBM] [Info] Number of data points in the train set: 140, number of used features: 2
[LightGBM] [Info] Start training from score 

In [86]:
X_test_tune = df_test.drop(['Power Consumption (kW)', 'load_da', 'load_d2'], axis=1)
y_test_tune = df_test['load_da']

In [93]:
rf_forecast = best_rf.predict(X_test_tune)
xgb_forecast = best_xgb.predict(X_test_tune)

meta_features = np.column_stack((rf_forecast, xgb_forecast))
final_forecast = meta_opt.predict(meta_features)


In [94]:
print(mean_absolute_error(y_true=y_test_tune, y_pred=final_forecast))

0.25236129263863977


In [95]:
print(mean_squared_error(y_true=y_test_tune, y_pred=final_forecast, squared=False))

0.3187638998303389




In [None]:
from sklearn.neural_network import MLPRegressor
from skopt.space import Real, Integer, Categorical

from sklearn.model_selection import GridSearchCV

param_space = {
    'hidden_layer_sizes': [(32, 16), (64, 32), (128, 64, 32)],
    'alpha': [1e-5, 1e-4, 1e-3],
    'learning_rate_init': [0.0005, 0.001, 0.005, 0.002, 0.0002, 0.0001],
    'activation': ['relu', 'tanh'],
}

meta_opt_mlp = GridSearchCV(
    estimator=MLPRegressor(solver='adam', max_iter=3000, random_state=42),
    param_grid=param_space,
    cv=tscv,
    scoring='neg_root_mean_squared_error',
    n_jobs=-1,
    verbose=1
)

meta_opt_mlp.fit(meta_X_train, meta_y_train)
print("Best MLP Meta-Params:", meta_opt_mlp.best_params_)
print("Best Meta CV RMSE:", -meta_opt_mlp.best_score_)


Fitting 6 folds for each of 108 candidates, totalling 648 fits
[CV] END activation=relu, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.1s
[CV] END activation=relu, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.1s
[CV] END activation=relu, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.1s
[CV] END activation=relu, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.001; total time=   0.0s
[CV] END activation=relu, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.001; total time=   0.0s
[CV] END activation=relu, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.005; total time=   0.0s
[CV] END activation=relu, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.001; total time=   0.0s
[CV] END activation=relu, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.001; total time=   0.0s
[CV] END activation=relu, alph



[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(64, 32), learning_rate_init=0.0001; total time=   0.2s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.001; total time=   0.1s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.002; total time=   0.1s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.0002; total time=   0.4s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.001; total time=   0.1s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.1s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(32, 1



[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.7s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.2s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0001; total time=   0.6s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.4s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.0002; total time=   0.5s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(32, 16), learning_rate_init=0.001; total time=   0.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(32, 16), learnin



[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.6s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(64, 32), learning_rate_init=0.001; total time=   0.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(64, 32), learning_rate_



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.002; total time=   0.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.002; total time=   0.1s
[CV] END activation=tanh, alpha=1e-05, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0001; total time=   0.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.9s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.3s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0002; total time=   0.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.001; total time=   0.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.005; total time=   0.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.005; total time=   0.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.005; total time=   0.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.8s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.002; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.002; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0005; total time=   0.3s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.002; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.002; total time=   0.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0005; total time=   0.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_in



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.8s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0002; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0001; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0001; total time=   0.3s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0002; total time=   0.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0005; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0001; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(64, 32), learning_rate_init=0.0001; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_r



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.9s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.001; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.001; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0002; total time=   0.7s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.3s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.4s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0001; total time=   0.9s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.7s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0001; total time=   0.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0002; total time=   0.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(32, 16), learning_rate_init=0.0001; total time=   0.8s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0001; total time=   0.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0001; total time=   0.6s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(128, 64, 32), learning_rate_init=0.0001; total time=   0.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=

In [107]:
rf_forecast = best_rf.predict(X_test_tune)
xgb_forecast = best_xgb.predict(X_test_tune)

meta_features = np.column_stack((rf_forecast, xgb_forecast))
final_forecast = meta_opt_mlp.predict(meta_features)

In [109]:
print(mean_squared_error(y_true=y_test_tune, y_pred=final_forecast, squared=False))

0.29937603019770803


