diff --git a/optuna/integration/botorch.py b/optuna/integration/botorch.py index 7581c1579a..7295584e75 100644 --- a/optuna/integration/botorch.py +++ b/optuna/integration/botorch.py @@ -69,6 +69,7 @@ def qei_candidates_func( train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], ) -> "torch.Tensor": """Quasi MC-based batch Expected Improvement (qEI). @@ -96,7 +97,11 @@ def qei_candidates_func( Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is identical to that of ``train_x``. The first and the second rows correspond to the lower and upper bounds for each parameter respectively. - + pending_x: + Pending parameter configurations. A ``torch.Tensor`` of shape + ``(n_pending, n_params)``. ``n_pending`` is the number of the trials which are already + suggested all their parameters but have not completed their evaluation, and + ``n_params`` is identical to that of ``train_x``. Returns: Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. @@ -134,6 +139,8 @@ def qei_candidates_func( objective = None # Using the default identity objective. train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) mll = ExactMarginalLogLikelihood(model.likelihood, model) @@ -144,6 +151,7 @@ def qei_candidates_func( best_f=best_f, sampler=_get_sobol_qmc_normal_sampler(256), objective=objective, + X_pending=pending_x, ) standard_bounds = torch.zeros_like(bounds) @@ -170,6 +178,7 @@ def qehvi_candidates_func( train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], ) -> "torch.Tensor": """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI). @@ -204,6 +213,8 @@ def qehvi_candidates_func( additional_qehvi_kwargs = {} train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) mll = ExactMarginalLogLikelihood(model.likelihood, model) @@ -227,6 +238,7 @@ def qehvi_candidates_func( ref_point=ref_point_list, partitioning=partitioning, sampler=_get_sobol_qmc_normal_sampler(256), + X_pending=pending_x, **additional_qehvi_kwargs, ) standard_bounds = torch.zeros_like(bounds) @@ -253,6 +265,7 @@ def qnehvi_candidates_func( train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], ) -> "torch.Tensor": """Quasi MC-based batch Expected Noisy Hypervolume Improvement (qNEHVI). @@ -283,6 +296,8 @@ def qnehvi_candidates_func( additional_qnehvi_kwargs = {} train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) mll = ExactMarginalLogLikelihood(model.likelihood, model) @@ -308,6 +323,7 @@ def qnehvi_candidates_func( alpha=alpha, prune_baseline=True, sampler=_get_sobol_qmc_normal_sampler(256), + X_pending=pending_x, **additional_qnehvi_kwargs, ) @@ -335,6 +351,7 @@ def qparego_candidates_func( train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], ) -> "torch.Tensor": """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization. @@ -366,6 +383,8 @@ def qparego_candidates_func( objective = GenericMCObjective(scalarization) train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) mll = ExactMarginalLogLikelihood(model.likelihood, model) @@ -376,6 +395,7 @@ def qparego_candidates_func( best_f=objective(train_y).max(), sampler=_get_sobol_qmc_normal_sampler(256), objective=objective, + X_pending=pending_x, ) standard_bounds = torch.zeros_like(bounds) @@ -404,6 +424,7 @@ def _get_default_candidates_func( "torch.Tensor", Optional["torch.Tensor"], "torch.Tensor", + Optional["torch.Tensor"], ], "torch.Tensor", ]: @@ -466,6 +487,13 @@ class BoTorchSampler(BaseSampler): n_startup_trials: Number of initial trials, that is the number of trials to resort to independent sampling. + consider_running_trials: + If True, the acquisition function takes into consideration the running parameters + whose evaluation has not completed. Enabling this option is considered to improve the + performance of parallel optimization. + + .. note:: + Added in v3.2.0 as an experimental argument. independent_sampler: An independent sampler to use for the initial trials and for parameters that are conditional. @@ -486,12 +514,14 @@ def __init__( "torch.Tensor", Optional["torch.Tensor"], "torch.Tensor", + Optional["torch.Tensor"], ], "torch.Tensor", ] ] = None, constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None, n_startup_trials: int = 10, + consider_running_trials: bool = True, independent_sampler: Optional[BaseSampler] = None, seed: Optional[int] = None, device: Optional["torch.device"] = None, @@ -500,6 +530,7 @@ def __init__( self._candidates_func = candidates_func self._constraints_func = constraints_func + self._consider_running_trials = consider_running_trials self._independent_sampler = independent_sampler or RandomSampler(seed=seed) self._n_startup_trials = n_startup_trials self._seed = seed @@ -542,9 +573,14 @@ def sample_relative( if len(search_space) == 0: return {} - trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)) + completed_trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)) + running_trials = [ + t for t in study.get_trials(deepcopy=False, states=(TrialState.RUNNING,)) if t != trial + ] + trials = completed_trials + running_trials n_trials = len(trials) + n_completed_trials = len(completed_trials) if n_trials < self._n_startup_trials: return {} @@ -558,30 +594,40 @@ def sample_relative( bounds: Union[numpy.ndarray, torch.Tensor] = trans.bounds params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) for trial_idx, trial in enumerate(trials): - params[trial_idx] = trans.transform(trial.params) - assert len(study.directions) == len(trial.values) - - for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)): - assert value is not None - if direction == StudyDirection.MINIMIZE: # BoTorch always assumes maximization. - value *= -1 - values[trial_idx, obj_idx] = value - - if self._constraints_func is not None: - constraints = study._storage.get_trial_system_attrs(trial._trial_id).get( - _CONSTRAINTS_KEY - ) - if constraints is not None: - n_constraints = len(constraints) - - if con is None: - con = numpy.full((n_trials, n_constraints), numpy.nan, dtype=numpy.float64) - elif n_constraints != con.shape[1]: - raise RuntimeError( - f"Expected {con.shape[1]} constraints but received {n_constraints}." - ) - - con[trial_idx] = constraints + if trial.state == TrialState.COMPLETE: + params[trial_idx] = trans.transform(trial.params) + assert len(study.directions) == len(trial.values) + for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)): + assert value is not None + if ( + direction == StudyDirection.MINIMIZE + ): # BoTorch always assumes maximization. + value *= -1 + values[trial_idx, obj_idx] = value + if self._constraints_func is not None: + constraints = study._storage.get_trial_system_attrs(trial._trial_id).get( + _CONSTRAINTS_KEY + ) + if constraints is not None: + n_constraints = len(constraints) + + if con is None: + con = numpy.full( + (n_completed_trials, n_constraints), numpy.nan, dtype=numpy.float64 + ) + elif n_constraints != con.shape[1]: + raise RuntimeError( + f"Expected {con.shape[1]} constraints " + f"but received {n_constraints}." + ) + con[trial_idx] = constraints + elif trial.state == TrialState.RUNNING: + if all(p in trial.params for p in search_space): + params[trial_idx] = trans.transform(trial.params) + else: + params[trial_idx] = numpy.nan + else: + assert False, "trail.state must be TrialState.COMPLETE or TrialState.RUNNING." if self._constraints_func is not None: if con is None: @@ -609,11 +655,21 @@ def sample_relative( if self._candidates_func is None: self._candidates_func = _get_default_candidates_func(n_objectives=n_objectives) + completed_values = values[:n_completed_trials] + completed_params = params[:n_completed_trials] + if self._consider_running_trials: + running_params = params[n_completed_trials:] + running_params = running_params[~torch.isnan(running_params).any(dim=1)] + else: + running_params = None + with manual_seed(self._seed): # `manual_seed` makes the default candidates functions reproducible. # `SobolQMCNormalSampler`'s constructor has a `seed` argument, but its behavior is # deterministic when the BoTorch's seed is fixed. - candidates = self._candidates_func(params, values, con, bounds) + candidates = self._candidates_func( + completed_params, completed_values, con, bounds, running_params + ) if self._seed is not None: self._seed += 1 diff --git a/tests/integration_tests/test_botorch.py b/tests/integration_tests/test_botorch.py index 43b6b0b207..1290e483da 100644 --- a/tests/integration_tests/test_botorch.py +++ b/tests/integration_tests/test_botorch.py @@ -16,6 +16,7 @@ from optuna.storages import RDBStorage from optuna.trial import FrozenTrial from optuna.trial import Trial +from optuna.trial import TrialState with try_import() as _imports: @@ -62,6 +63,7 @@ def candidates_func( train_obj: torch.Tensor, train_con: Optional[torch.Tensor], bounds: torch.Tensor, + running_x: Optional[torch.Tensor], ) -> torch.Tensor: assert train_con is None @@ -157,6 +159,7 @@ def candidates_func( train_obj: torch.Tensor, train_con: Optional[torch.Tensor], bounds: torch.Tensor, + running_x: Optional[torch.Tensor], ) -> torch.Tensor: return torch.rand(2, 1) # Must have the batch size one, not two. @@ -174,6 +177,7 @@ def candidates_func( train_obj: torch.Tensor, train_con: Optional[torch.Tensor], bounds: torch.Tensor, + running_x: Optional[torch.Tensor], ) -> torch.Tensor: return torch.rand(1, 1, 1) # Must have one or two dimensions, not three. @@ -193,6 +197,7 @@ def candidates_func( train_obj: torch.Tensor, train_con: Optional[torch.Tensor], bounds: torch.Tensor, + running_x: Optional[torch.Tensor], ) -> torch.Tensor: return torch.rand(n_params - 1) # Must return candidates for all parameters. @@ -262,6 +267,7 @@ def candidates_func( train_obj: torch.Tensor, train_con: Optional[torch.Tensor], bounds: torch.Tensor, + running_x: Optional[torch.Tensor], ) -> torch.Tensor: trial_number = train_x.size(0) @@ -312,6 +318,7 @@ def candidates_func( train_obj: torch.Tensor, train_con: Optional[torch.Tensor], bounds: torch.Tensor, + running_x: Optional[torch.Tensor], ) -> torch.Tensor: # `train_con` should be `None` if `constraints_func` always fails. assert train_con is None @@ -354,6 +361,7 @@ def candidates_func( train_obj: torch.Tensor, train_con: Optional[torch.Tensor], bounds: torch.Tensor, + running_x: Optional[torch.Tensor], ) -> torch.Tensor: trial_number = train_x.size(0) @@ -480,3 +488,56 @@ def constraints_func(trial: FrozenTrial) -> Sequence[float]: sampler = BoTorchSampler(constraints_func=constraints_func, n_startup_trials=1) study = optuna.create_study(sampler=sampler) study.optimize(objective, n_trials=3) + + +@pytest.mark.parametrize( + "candidates_func, n_objectives", + [ + (integration.botorch.qei_candidates_func, 1), + (integration.botorch.qehvi_candidates_func, 2), + (integration.botorch.qparego_candidates_func, 4), + (integration.botorch.qnehvi_candidates_func, 2), + (integration.botorch.qnehvi_candidates_func, 3), # alpha > 0 + ], +) +def test_botorch_consider_running_trials(candidates_func: Any, n_objectives: int) -> None: + sampler = BoTorchSampler( + candidates_func=candidates_func, + n_startup_trials=1, + consider_running_trials=True, + ) + + def objective(trial: Trial) -> Sequence[float]: + ret = [] + for i in range(n_objectives): + val = sum(trial.suggest_float(f"x{i}_{j}", 0, 1) for j in range(2)) + ret.append(val) + return ret + + study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) + study.optimize(objective, n_trials=2) + assert len(study.trials) == 2 + + # fully suggested running trial + running_trial_full = study.ask() + _ = objective(running_trial_full) + study.optimize(objective, n_trials=1) + assert len(study.trials) == 4 + assert sum(t.state == TrialState.RUNNING for t in study.trials) == 1 + assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 3 + + # partially suggested running trial + running_trial_partial = study.ask() + for i in range(n_objectives): + running_trial_partial.suggest_float(f"x{i}_0", 0, 1) + study.optimize(objective, n_trials=1) + assert len(study.trials) == 6 + assert sum(t.state == TrialState.RUNNING for t in study.trials) == 2 + assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 4 + + # not suggested running trial + _ = study.ask() + study.optimize(objective, n_trials=1) + assert len(study.trials) == 8 + assert sum(t.state == TrialState.RUNNING for t in study.trials) == 3 + assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 5