You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have some code that uses a callback to stop a Ray Tune trial if the complexity of the model (total leaves in the model) exceeds a given threshold). This works fine with a normal lightgbmmodel but fails when I use a lightgbm_ray model.
In the below code, "use_distributed" can be toggled to True to reproduce the error.
I presume the error is because the correct way of passing the metrics back to tune is with the TuneReportCheckpointCallback() from ray.tune.integration.lightgbm. I've played around with this, but it seems like I can only access the metrics reported by the lightgbm model. I can't add the "total_leaves" as a metric because it relies on accessing the model itself, not just the data and predictions.
Is it possible to report total_leaves to ray tune with lightgbm_ray?
#%%
# set up and load boston data
import numpy as np
import pandas as pd
import os
import lightgbm
from lightgbm_ray import RayLGBMRegressor, RayParams, RayDMatrix
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import ray
from ray.air import session
from ray import tune
from ray.tune.search.optuna import OptunaSearch
ray.shutdown()
## Initialise ray:
if ray.is_initialized() == False:
service_host = os.environ['RAY_HEAD_SERVICE_HOST']
service_port = os.environ['RAY_HEAD_SERVICE_PORT']
ray.init(
f'ray://{service_host}:{service_port}'
)
use_distributed = False
out_dir =< '/path/to/output_folder'>
boston = load_boston()
x, y = boston.data, boston.target
df = pd.DataFrame(x, columns= boston.feature_names)
# make into dmatrix
if use_distributed:
actors = 2
ray_params = RayParams(
num_actors= actors,
cpus_per_actor = 2,
)
train_df_with_target = df.copy()
train_df_with_target['target'] = y
train_set = RayDMatrix(
data=train_df_with_target,
label = 'target'
)
else:
actors = 1
# set params and ray params
params = {
'boosting_type': 'goss',
'objective': 'regression',
'metric': 'rmse',
'n_estimators':100,
'num_leaves': 6,
'max_depth': 3,
'learning_rate': tune.quniform(0.05,0.1, 0.01),
'verbose': 1
}
#%% define function to count total leaves in model
def leaves_callback(env):
model = env.model
mod_dump = model.dump_model()
tree_info = mod_dump['tree_info']
num_leaves = 0
num_iterations = 0
for tree in tree_info:
num_leaves += tree['num_leaves']
num_iterations += 1
session.report({'total_leaves': num_leaves,
"rmse_train": env.evaluation_result_list[0][2],
'num_iterations': num_iterations})
# define trainable
def trainable(params):
if use_distributed:
mod_ray = RayLGBMRegressor(
random_state=100,
**params
)
mod_ray.fit(train_set,
y='target',
eval_set = [(train_set, 'target')],
eval_names=["train"],
ray_params=ray_params,
callbacks = [leaves_callback])
else:
mod = lightgbm.LGBMRegressor(
random_state=100,
**params
)
mod.fit(X = x,
y=y,
eval_set = [(x, y)],
eval_names=["train"],
callbacks = [leaves_callback])
#%% RUN TUNING
resources = [{'CPU': 2.0} for x in range(actors+1)] + [{'CPU': 1.0}]
analysis = tune.Tuner(
tune.with_resources(
trainable,
tune.PlacementGroupFactory(
resources,
strategy='PACK')
),
tune_config=tune.TuneConfig(
metric="rmse_train",
mode= "min",
search_alg=OptunaSearch(),
num_samples=5),
run_config= ray.air.RunConfig(local_dir=out_dir,
name = 'test_callback',
stop = {'total_leaves': 300}),
param_space= params,
)
results = analysis.fit()
If I toggle use_distributed to True
(_RemoteRayLightGBMActor pid=585, ip=10.99.15.76) File "/opt/conda/lib/python3.9/site-packages/ray/air/session.py", line 61, in report
(_RemoteRayLightGBMActor pid=585, ip=10.99.15.76) _get_session().report(metrics, checkpoint=checkpoint)
(_RemoteRayLightGBMActor pid=585, ip=10.99.15.76) AttributeError: 'NoneType' object has no attribute 'report'
If I toggle use_distributed to False, I get the expected result:
I have some code that uses a callback to stop a Ray Tune trial if the complexity of the model (total leaves in the model) exceeds a given threshold). This works fine with a normal
lightgbm
model but fails when I use alightgbm_ray
model.In the below code, "use_distributed" can be toggled to True to reproduce the error.
I presume the error is because the correct way of passing the metrics back to tune is with the
TuneReportCheckpointCallback()
fromray.tune.integration.lightgbm
. I've played around with this, but it seems like I can only access the metrics reported by the lightgbm model. I can't add the "total_leaves" as a metric because it relies on accessing the model itself, not just the data and predictions.Is it possible to report
total_leaves
to ray tune with lightgbm_ray?If I toggle use_distributed to True
If I toggle use_distributed to False, I get the expected result:
The text was updated successfully, but these errors were encountered: