In [1]:
import pandas as pd
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import Lasso 

from src.paths import TRAINING_DATA
from src.hyperparameter_tuning import optimise_hyperparams

  from .autonotebook import tqdm as notebook_tqdm
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/kobina/divvy-bikes-demand-predictor-public/34428ae078294518b4797548c73ed10d



#### Starts

In [None]:
start_table = pd.read_parquet(TRAINING_DATA/"start_table.parquet")

In [None]:
best_lasso_hyperparams_for_starts = \
  optimise_hyperparams(
    model_fn=Lasso, 
    hyperparam_trials = 5, 
    scenario = "start", 
    X = start_table.drop("trips_next_hour", axis = 1), 
    y = start_table["trips_next_hour"]
)

### Stops

In [2]:
stop_table = pd.read_parquet(TRAINING_DATA/"stop_table.parquet") 

#### Lasso

In [None]:
best_lasso_hyperparams_for_stops = \
  optimise_hyperparams(
    model_fn=Lasso, 
    hyperparam_trials = 5, 
    scenario = "stop", 
    X = stop_table.drop("trips_next_hour", axis = 1), 
    y = stop_table["trips_next_hour"]
)

#### LGBMRegressor

In [3]:
best_lgb_hyperparams_for_stops = \
  optimise_hyperparams(
    model_fn=LGBMRegressor, 
    hyperparam_trials=5, 
    scenario = "stop", 
    X=stop_table.drop("trips_next_hour", axis = 1),
    y=stop_table["trips_next_hour"]
)

divvy_trips - INFO - Beginning hyperparameter search
[I 2024-02-06 00:57:34,441] A new study created in memory with name: no-name-aa30187f-a3d6-42d1-b3f2-db02a5ec7ce1
divvy_trips - INFO - Start Trial 0
divvy_trips - INFO - Performing split number 0






divvy_trips - INFO - Performing split number 1
divvy_trips - INFO - Performing split number 2
divvy_trips - INFO - Performing split number 3
divvy_trips - INFO - Performing split number 4
[I 2024-02-06 00:58:10,871] Trial 0 finished with value: 0.20496896371584833 and parameters: {'num_leaves': 50, 'max_depth': 7, 'n_estimators': 52, 'learning_rate': 0.6653855922654952, 'importance_type': 'split', 'subsample': 0, 'feature_fraction': 0.8074421168630138, 'colsample_by_tree': 0.36967134549956615, 'colsample_by_level': 0.24751151073675703, 'colsample_by_node': 0.3286460741051326, 'bagging_fraction': 0.2842090588611806}. Best is trial 0 with value: 0.20496896371584833.
divvy_trips - INFO - Start Trial 1
divvy_trips - INFO - Performing split number 0






divvy_trips - INFO - Performing split number 1
divvy_trips - INFO - Performing split number 2
divvy_trips - INFO - Performing split number 3
divvy_trips - INFO - Performing split number 4
[I 2024-02-06 00:59:55,709] Trial 1 finished with value: 0.20672365820234306 and parameters: {'num_leaves': 58, 'max_depth': 8, 'n_estimators': 109, 'learning_rate': 0.5263831308238435, 'importance_type': 'gain', 'subsample': 0, 'feature_fraction': 0.5552746530779602, 'colsample_by_tree': 0.19430008653634756, 'colsample_by_level': 0.8855665348277181, 'colsample_by_node': 0.7546481091920707, 'bagging_fraction': 0.614286172764395}. Best is trial 0 with value: 0.20496896371584833.
divvy_trips - INFO - Start Trial 2
divvy_trips - INFO - Performing split number 0






divvy_trips - INFO - Performing split number 1
divvy_trips - INFO - Performing split number 2
divvy_trips - INFO - Performing split number 3
divvy_trips - INFO - Performing split number 4
[I 2024-02-06 01:02:03,510] Trial 2 finished with value: 0.2044856218795681 and parameters: {'num_leaves': 39, 'max_depth': 9, 'n_estimators': 138, 'learning_rate': 0.5090172649844095, 'importance_type': 'split', 'subsample': 0, 'feature_fraction': 0.5029190019298146, 'colsample_by_tree': 0.2622408641705394, 'colsample_by_level': 0.8916317127138547, 'colsample_by_node': 0.3928637682806727, 'bagging_fraction': 0.5270861842813163}. Best is trial 2 with value: 0.2044856218795681.
divvy_trips - INFO - Start Trial 3
divvy_trips - INFO - Performing split number 0






divvy_trips - INFO - Performing split number 1
divvy_trips - INFO - Performing split number 2
divvy_trips - INFO - Performing split number 3
divvy_trips - INFO - Performing split number 4
[I 2024-02-06 01:03:23,400] Trial 3 finished with value: 0.208052642421611 and parameters: {'num_leaves': 42, 'max_depth': 9, 'n_estimators': 100, 'learning_rate': 0.6255044339206903, 'importance_type': 'gain', 'subsample': 1, 'feature_fraction': 0.2720517991921456, 'colsample_by_tree': 0.798037114605478, 'colsample_by_level': 0.8809231215865033, 'colsample_by_node': 0.16955840508982306, 'bagging_fraction': 0.5512501653352012}. Best is trial 2 with value: 0.2044856218795681.
divvy_trips - INFO - Start Trial 4
divvy_trips - INFO - Performing split number 0






divvy_trips - INFO - Performing split number 1
divvy_trips - INFO - Performing split number 2
divvy_trips - INFO - Performing split number 3
divvy_trips - INFO - Performing split number 4
[I 2024-02-06 01:03:56,827] Trial 4 finished with value: 0.19505482761109055 and parameters: {'num_leaves': 9, 'max_depth': 8, 'n_estimators': 35, 'learning_rate': 0.37843940900146955, 'importance_type': 'split', 'subsample': 1, 'feature_fraction': 0.6605753400233634, 'colsample_by_tree': 0.507000070832764, 'colsample_by_level': 0.132989203577089, 'colsample_by_node': 0.7605783005531188, 'bagging_fraction': 0.9012817362748851}. Best is trial 4 with value: 0.19505482761109055.
divvy_trips - INFO - The best hyperparameters for <class 'lightgbm.sklearn.LGBMRegressor'> are:
divvy_trips - INFO - num_leaves:9
divvy_trips - INFO - max_depth:8
divvy_trips - INFO - n_estimators:35
divvy_trips - INFO - learning_rate:0.37843940900146955
divvy_trips - INFO - importance_type:split
divvy_trips - INFO - subsample:1


#### XGBRegressor

In [3]:
best_xgb_hyperparams_for_stops = \
  optimise_hyperparams(
    model_fn=XGBRegressor, 
    hyperparam_trials=5, 
    scenario="stop", 
    X=stop_table.drop("trips_next_hour", axis = 1),
    y=stop_table["trips_next_hour"]
)

divvy_trips - INFO - Beginning hyperparameter search
[I 2024-02-06 01:12:40,371] A new study created in memory with name: no-name-c5e3eea9-5666-4df8-8940-fc34d71b4ed1
divvy_trips - INFO - Start Trial 0
divvy_trips - INFO - Performing split number 0






Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 1
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 2
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 3
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 4
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

[I 2024-02-06 01:15:14,185] Trial 0 finished with value: 0.17087715864181519 and parameters: {'eta': 0.8090089669184578, 'max_depth': 8, 'alpha': 1.627575571737482, 'subsample': 1, 'colsample_by_tree': 0.9728813300435879, 'colsample_by_level': 0.8013045345189957, 'colsample_by_node': 0.5068040975074096}. Best is trial 0 with value: 0.170877





Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 1
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 2
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 3
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 4
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

[I 2024-02-06 01:17:09,993] Trial 1 finished with value: 0.1558344066143036 and parameters: {'eta': 0.7948236468284198, 'max_depth': 4, 'alpha': 1.240654350931024, 'subsample': 1, 'colsample_by_tree': 0.2058691741415387, 'colsample_by_level': 0.38265257582732504, 'colsample_by_node': 0.395338425887634}. Best is trial 1 with value: 0.1558344





Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 1
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 2
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 3
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 4
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

[I 2024-02-06 01:18:18,535] Trial 2 finished with value: 0.1762741357088089 and parameters: {'eta': 0.8569231169435146, 'max_depth': 6, 'alpha': 0.04772564765031295, 'subsample': 0, 'colsample_by_tree': 0.138729639052558, 'colsample_by_level': 0.7628129154827453, 'colsample_by_node': 0.3672454381320174}. Best is trial 1 with value: 0.155834





Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 1
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 2
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 3
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 4
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

[I 2024-02-06 01:19:27,263] Trial 3 finished with value: 0.1762741357088089 and parameters: {'eta': 0.8021214104370077, 'max_depth': 10, 'alpha': 0.11111679360871562, 'subsample': 0, 'colsample_by_tree': 0.4374348684623708, 'colsample_by_level': 0.17139307366727918, 'colsample_by_node': 0.17002948767301163}. Best is trial 1 with value: 0.15





Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 1
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 2
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 3
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

divvy_trips - INFO - Performing split number 4
Parameters: { "colsample_by_level", "colsample_by_node", "colsample_by_tree" } are not used.

[I 2024-02-06 01:20:37,736] Trial 4 finished with value: 0.1762741357088089 and parameters: {'eta': 0.4218364621112167, 'max_depth': 10, 'alpha': 0.17023133295124593, 'subsample': 0, 'colsample_by_tree': 0.2648404355933356, 'colsample_by_level': 0.5936462168148562, 'colsample_by_node': 0.8741963261124106}. Best is trial 1 with value: 0.1558