optuna · HideakiImamura · May 12, 2023 · Mar 29, 2023 · Mar 30, 2023 · Apr 9, 2023
diff --git a/optuna/integration/botorch.py b/optuna/integration/botorch.py
@@ -69,6 +69,7 @@ def qei_candidates_func(
     train_obj: "torch.Tensor",
     train_con: Optional["torch.Tensor"],
     bounds: "torch.Tensor",
+    pending_x: Optional["torch.Tensor"],
 ) -> "torch.Tensor":
     """Quasi MC-based batch Expected Improvement (qEI).
 
@@ -96,7 +97,11 @@ def qei_candidates_func(
             Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is
             identical to that of ``train_x``. The first and the second rows correspond to the
             lower and upper bounds for each parameter respectively.
-
+        pending_x:
+            Pending parameter configurations. A ``torch.Tensor`` of shape
+            ``(n_pending, n_params)``. ``n_pending`` is the number of the trials which are already
+            suggested all their parameters but have not completed their evaluation, and
+            ``n_params`` is identical to that of ``train_x``.
     Returns:
         Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``.
 
@@ -134,6 +139,8 @@ def qei_candidates_func(
         objective = None  # Using the default identity objective.
 
     train_x = normalize(train_x, bounds=bounds)
+    if pending_x is not None:
+        pending_x = normalize(pending_x, bounds=bounds)
 
     model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1)))
     mll = ExactMarginalLogLikelihood(model.likelihood, model)
@@ -144,6 +151,7 @@ def qei_candidates_func(
         best_f=best_f,
         sampler=_get_sobol_qmc_normal_sampler(256),
         objective=objective,
+        X_pending=pending_x,
     )
 
     standard_bounds = torch.zeros_like(bounds)
@@ -170,6 +178,7 @@ def qehvi_candidates_func(
     train_obj: "torch.Tensor",
     train_con: Optional["torch.Tensor"],
     bounds: "torch.Tensor",
+    pending_x: Optional["torch.Tensor"],
 ) -> "torch.Tensor":
     """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI).
 
@@ -204,6 +213,8 @@ def qehvi_candidates_func(
         additional_qehvi_kwargs = {}
 
     train_x = normalize(train_x, bounds=bounds)
+    if pending_x is not None:
+        pending_x = normalize(pending_x, bounds=bounds)
 
     model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1]))
     mll = ExactMarginalLogLikelihood(model.likelihood, model)
@@ -227,6 +238,7 @@ def qehvi_candidates_func(
         ref_point=ref_point_list,
         partitioning=partitioning,
         sampler=_get_sobol_qmc_normal_sampler(256),
+        X_pending=pending_x,
         **additional_qehvi_kwargs,
     )
     standard_bounds = torch.zeros_like(bounds)
@@ -253,6 +265,7 @@ def qnehvi_candidates_func(
     train_obj: "torch.Tensor",
     train_con: Optional["torch.Tensor"],
     bounds: "torch.Tensor",
+    pending_x: Optional["torch.Tensor"],
 ) -> "torch.Tensor":
     """Quasi MC-based batch Expected Noisy Hypervolume Improvement (qNEHVI).
 
@@ -283,6 +296,8 @@ def qnehvi_candidates_func(
         additional_qnehvi_kwargs = {}
 
     train_x = normalize(train_x, bounds=bounds)
+    if pending_x is not None:
+        pending_x = normalize(pending_x, bounds=bounds)
 
     model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1]))
     mll = ExactMarginalLogLikelihood(model.likelihood, model)
@@ -308,6 +323,7 @@ def qnehvi_candidates_func(
         alpha=alpha,
         prune_baseline=True,
         sampler=_get_sobol_qmc_normal_sampler(256),
+        X_pending=pending_x,
         **additional_qnehvi_kwargs,
     )
 
@@ -335,6 +351,7 @@ def qparego_candidates_func(
     train_obj: "torch.Tensor",
     train_con: Optional["torch.Tensor"],
     bounds: "torch.Tensor",
+    pending_x: Optional["torch.Tensor"],
 ) -> "torch.Tensor":
     """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization.
 
@@ -366,6 +383,8 @@ def qparego_candidates_func(
         objective = GenericMCObjective(scalarization)
 
     train_x = normalize(train_x, bounds=bounds)
+    if pending_x is not None:
+        pending_x = normalize(pending_x, bounds=bounds)
 
     model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1)))
     mll = ExactMarginalLogLikelihood(model.likelihood, model)
@@ -376,6 +395,7 @@ def qparego_candidates_func(
         best_f=objective(train_y).max(),
         sampler=_get_sobol_qmc_normal_sampler(256),
         objective=objective,
+        X_pending=pending_x,
     )
 
     standard_bounds = torch.zeros_like(bounds)
@@ -404,6 +424,7 @@ def _get_default_candidates_func(
         "torch.Tensor",
         Optional["torch.Tensor"],
         "torch.Tensor",
+        Optional["torch.Tensor"],
     ],
     "torch.Tensor",
 ]:
@@ -466,6 +487,13 @@ class BoTorchSampler(BaseSampler):
         n_startup_trials:
             Number of initial trials, that is the number of trials to resort to independent
             sampling.
+        consider_running_trials:
+            If True, the acquisition function takes into consideration the running parameters
+            whose evaluation has not completed. Enabling this option is considered to improve the
+            performance of parallel optimization.
+
+            .. note::
+                Added in v3.2.0 as an experimental argument.
         independent_sampler:
             An independent sampler to use for the initial trials and for parameters that are
             conditional.
@@ -486,12 +514,14 @@ def __init__(
                     "torch.Tensor",
                     Optional["torch.Tensor"],
                     "torch.Tensor",
+                    Optional["torch.Tensor"],
                 ],
                 "torch.Tensor",
             ]
         ] = None,
         constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None,
         n_startup_trials: int = 10,
+        consider_running_trials: bool = True,
         independent_sampler: Optional[BaseSampler] = None,
         seed: Optional[int] = None,
         device: Optional["torch.device"] = None,
@@ -500,6 +530,7 @@ def __init__(
 
         self._candidates_func = candidates_func
         self._constraints_func = constraints_func
+        self._consider_running_trials = consider_running_trials
         self._independent_sampler = independent_sampler or RandomSampler(seed=seed)
         self._n_startup_trials = n_startup_trials
         self._seed = seed
@@ -542,9 +573,14 @@ def sample_relative(
         if len(search_space) == 0:
             return {}
 
-        trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,))
+        completed_trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,))
+        running_trials = [
+            t for t in study.get_trials(deepcopy=False, states=(TrialState.RUNNING,)) if t != trial
+        ]
+        trials = completed_trials + running_trials
 
         n_trials = len(trials)
+        n_completed_trials = len(completed_trials)
         if n_trials < self._n_startup_trials:
             return {}
 
@@ -558,30 +594,40 @@ def sample_relative(
         bounds: Union[numpy.ndarray, torch.Tensor] = trans.bounds
         params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64)
         for trial_idx, trial in enumerate(trials):
-            params[trial_idx] = trans.transform(trial.params)
-            assert len(study.directions) == len(trial.values)
-
-            for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)):
-                assert value is not None
-                if direction == StudyDirection.MINIMIZE:  # BoTorch always assumes maximization.
-                    value *= -1
-                values[trial_idx, obj_idx] = value
-
-            if self._constraints_func is not None:
-                constraints = study._storage.get_trial_system_attrs(trial._trial_id).get(
-                    _CONSTRAINTS_KEY
-                )
-                if constraints is not None:
-                    n_constraints = len(constraints)
-
-                    if con is None:
-                        con = numpy.full((n_trials, n_constraints), numpy.nan, dtype=numpy.float64)
-                    elif n_constraints != con.shape[1]:
-                        raise RuntimeError(
-                            f"Expected {con.shape[1]} constraints but received {n_constraints}."
-                        )
-
-                    con[trial_idx] = constraints
+            if trial.state == TrialState.COMPLETE:
+                params[trial_idx] = trans.transform(trial.params)
+                assert len(study.directions) == len(trial.values)
+                for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)):
+                    assert value is not None
+                    if (
+                        direction == StudyDirection.MINIMIZE
+                    ):  # BoTorch always assumes maximization.
+                        value *= -1
+                    values[trial_idx, obj_idx] = value
+                if self._constraints_func is not None:
+                    constraints = study._storage.get_trial_system_attrs(trial._trial_id).get(
+                        _CONSTRAINTS_KEY
+                    )
+                    if constraints is not None:
+                        n_constraints = len(constraints)
+
+                        if con is None:
+                            con = numpy.full(
+                                (n_completed_trials, n_constraints), numpy.nan, dtype=numpy.float64
+                            )
+                        elif n_constraints != con.shape[1]:
+                            raise RuntimeError(
+                                f"Expected {con.shape[1]} constraints "
+                                f"but received {n_constraints}."
+                            )
+                        con[trial_idx] = constraints
+            elif trial.state == TrialState.RUNNING:
+                if all(p in trial.params for p in search_space):
+                    params[trial_idx] = trans.transform(trial.params)
+                else:
+                    params[trial_idx] = numpy.nan
+            else:
+                assert False, "trail.state must be TrialState.COMPLETE or TrialState.RUNNING."
 
         if self._constraints_func is not None:
             if con is None:
@@ -609,11 +655,21 @@ def sample_relative(
         if self._candidates_func is None:
             self._candidates_func = _get_default_candidates_func(n_objectives=n_objectives)
 
+        completed_values = values[:n_completed_trials]
+        completed_params = params[:n_completed_trials]
+        if self._consider_running_trials:
+            running_params = params[n_completed_trials:]
+            running_params = running_params[~torch.isnan(running_params).any(dim=1)]
+        else:
+            running_params = None
+
         with manual_seed(self._seed):
             # `manual_seed` makes the default candidates functions reproducible.
             # `SobolQMCNormalSampler`'s constructor has a `seed` argument, but its behavior is
             # deterministic when the BoTorch's seed is fixed.
-            candidates = self._candidates_func(params, values, con, bounds)
+            candidates = self._candidates_func(
+                completed_params, completed_values, con, bounds, running_params
+            )
             if self._seed is not None:
                 self._seed += 1
 

diff --git a/tests/integration_tests/test_botorch.py b/tests/integration_tests/test_botorch.py
@@ -16,6 +16,7 @@
 from optuna.storages import RDBStorage
 from optuna.trial import FrozenTrial
 from optuna.trial import Trial
+from optuna.trial import TrialState
 
 
 with try_import() as _imports:
@@ -62,6 +63,7 @@ def candidates_func(
         train_obj: torch.Tensor,
         train_con: Optional[torch.Tensor],
         bounds: torch.Tensor,
+        running_x: Optional[torch.Tensor],
     ) -> torch.Tensor:
         assert train_con is None
 
@@ -157,6 +159,7 @@ def candidates_func(
         train_obj: torch.Tensor,
         train_con: Optional[torch.Tensor],
         bounds: torch.Tensor,
+        running_x: Optional[torch.Tensor],
     ) -> torch.Tensor:
         return torch.rand(2, 1)  # Must have the batch size one, not two.
 
@@ -174,6 +177,7 @@ def candidates_func(
         train_obj: torch.Tensor,
         train_con: Optional[torch.Tensor],
         bounds: torch.Tensor,
+        running_x: Optional[torch.Tensor],
     ) -> torch.Tensor:
         return torch.rand(1, 1, 1)  # Must have one or two dimensions, not three.
 
@@ -193,6 +197,7 @@ def candidates_func(
         train_obj: torch.Tensor,
         train_con: Optional[torch.Tensor],
         bounds: torch.Tensor,
+        running_x: Optional[torch.Tensor],
     ) -> torch.Tensor:
         return torch.rand(n_params - 1)  # Must return candidates for all parameters.
 
@@ -262,6 +267,7 @@ def candidates_func(
         train_obj: torch.Tensor,
         train_con: Optional[torch.Tensor],
         bounds: torch.Tensor,
+        running_x: Optional[torch.Tensor],
     ) -> torch.Tensor:
         trial_number = train_x.size(0)
 
@@ -312,6 +318,7 @@ def candidates_func(
         train_obj: torch.Tensor,
         train_con: Optional[torch.Tensor],
         bounds: torch.Tensor,
+        running_x: Optional[torch.Tensor],
     ) -> torch.Tensor:
         # `train_con` should be `None` if `constraints_func` always fails.
         assert train_con is None
@@ -354,6 +361,7 @@ def candidates_func(
         train_obj: torch.Tensor,
         train_con: Optional[torch.Tensor],
         bounds: torch.Tensor,
+        running_x: Optional[torch.Tensor],
     ) -> torch.Tensor:
         trial_number = train_x.size(0)
 
@@ -480,3 +488,56 @@ def constraints_func(trial: FrozenTrial) -> Sequence[float]:
     sampler = BoTorchSampler(constraints_func=constraints_func, n_startup_trials=1)
     study = optuna.create_study(sampler=sampler)
     study.optimize(objective, n_trials=3)
+
+
+@pytest.mark.parametrize(
+    "candidates_func, n_objectives",
+    [
+        (integration.botorch.qei_candidates_func, 1),
+        (integration.botorch.qehvi_candidates_func, 2),
+        (integration.botorch.qparego_candidates_func, 4),
+        (integration.botorch.qnehvi_candidates_func, 2),
+        (integration.botorch.qnehvi_candidates_func, 3),  # alpha > 0
+    ],
+)
+def test_botorch_consider_running_trials(candidates_func: Any, n_objectives: int) -> None:
+    sampler = BoTorchSampler(
+        candidates_func=candidates_func,
+        n_startup_trials=1,
+        consider_running_trials=True,
+    )
+
+    def objective(trial: Trial) -> Sequence[float]:
+        ret = []
+        for i in range(n_objectives):
+            val = sum(trial.suggest_float(f"x{i}_{j}", 0, 1) for j in range(2))
+            ret.append(val)
+        return ret
+
+    study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler)
+    study.optimize(objective, n_trials=2)
+    assert len(study.trials) == 2
+
+    # fully suggested running trial
+    running_trial_full = study.ask()
+    _ = objective(running_trial_full)
+    study.optimize(objective, n_trials=1)
+    assert len(study.trials) == 4
+    assert sum(t.state == TrialState.RUNNING for t in study.trials) == 1
+    assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 3
+
+    # partially suggested running trial
+    running_trial_partial = study.ask()
+    for i in range(n_objectives):
+        running_trial_partial.suggest_float(f"x{i}_0", 0, 1)
+    study.optimize(objective, n_trials=1)
+    assert len(study.trials) == 6
+    assert sum(t.state == TrialState.RUNNING for t in study.trials) == 2
+    assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 4
+
+    # not suggested running trial
+    _ = study.ask()
+    study.optimize(objective, n_trials=1)
+    assert len(study.trials) == 8
+    assert sum(t.state == TrialState.RUNNING for t in study.trials) == 3
+    assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 5