Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ray Tune: set epochs equal to max_t and disable early stopping #1109

Merged
merged 1 commit into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions ludwig/utils/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,45 @@ def _set_proc_column(config: dict) -> None:
feature[PROC_COLUMN] = compute_feature_hash(feature)


def _merge_hyperopt_with_training(config: dict) -> None:
if 'hyperopt' not in config or TRAINING not in config:
return

scheduler = config['hyperopt'].get('sampler', {}).get('scheduler')
if not scheduler:
return

# Disable early stopping when using a scheduler. We achieve this by setting the parameter
# to -1, which ensures the condition to apply early stopping is never met.
training = config[TRAINING]
early_stop = training.get('early_stop')
if early_stop is not None and early_stop != -1:
raise ValueError(
'Cannot set training parameter `early_stop` when using a hyperopt scheduler. '
'Unset this parameter in your config.'
)
training['early_stop'] = -1

# At most one of max_t and epochs may be specified by the user, and we set them to be equal to
# ensure that Ludwig does not stop training before the scheduler has finished the trial.
max_t = scheduler.get('max_t')
epochs = training.get('epochs')
if max_t is not None and epochs is not None and max_t != epochs:
raise ValueError(
'Cannot set training parameter `epochs` when using a hyperopt scheduler with `max_t`. '
'Unset one of these parameters in your config.'
)
elif max_t is not None:
training['epochs'] = max_t
elif epochs is not None:
scheduler['max_t'] = epochs


def merge_with_defaults(config):
_perform_sanity_checks(config)
_set_feature_column(config)
_set_proc_column(config)
_merge_hyperopt_with_training(config)

# ===== Preprocessing =====
config['preprocessing'] = merge_dict(
Expand Down
10 changes: 7 additions & 3 deletions tests/integration_tests/test_hyperopt_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@
"scheduler": {
"type": "hb_bohb",
"time_attr": "training_iteration",
"max_t": 100,
"reduction_factor": 4,
},
"num_samples": 3
Expand Down Expand Up @@ -116,12 +115,17 @@ def run_hyperopt_executor(sampler, executor, csv_filename,
"input_features": input_features,
"output_features": output_features,
"combiner": {"type": "concat", "num_fc_layers": 2},
"training": {"epochs": 2, "learning_rate": 0.001}
"training": {"epochs": 2, "learning_rate": 0.001},
"hyperopt": {
**HYPEROPT_CONFIG,
"executor": executor,
"sampler": sampler,
},
}

config = merge_with_defaults(config)

hyperopt_config = HYPEROPT_CONFIG.copy()
hyperopt_config = config["hyperopt"]

if validate_output_feature:
hyperopt_config['output_feature'] = output_features[0]['name']
Expand Down