Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support batched sampling with BoTorch #4591

Merged
merged 17 commits into from
May 12, 2023
110 changes: 83 additions & 27 deletions optuna/integration/botorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def qei_candidates_func(
train_obj: "torch.Tensor",
train_con: Optional["torch.Tensor"],
bounds: "torch.Tensor",
pending_x: Optional["torch.Tensor"],
) -> "torch.Tensor":
"""Quasi MC-based batch Expected Improvement (qEI).

Expand Down Expand Up @@ -96,7 +97,11 @@ def qei_candidates_func(
Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is
identical to that of ``train_x``. The first and the second rows correspond to the
lower and upper bounds for each parameter respectively.

pending_x:
Pending parameter configurations. A ``torch.Tensor`` of shape
``(n_pending, n_params)``. ``n_pending`` is the number of the trials which are already
suggested all their parameters but have not completed their evaluation, and
``n_params`` is identical to that of ``train_x``.
Returns:
Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``.

Expand Down Expand Up @@ -134,6 +139,8 @@ def qei_candidates_func(
objective = None # Using the default identity objective.

train_x = normalize(train_x, bounds=bounds)
if pending_x is not None:
pending_x = normalize(pending_x, bounds=bounds)

model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1)))
mll = ExactMarginalLogLikelihood(model.likelihood, model)
Expand All @@ -144,6 +151,7 @@ def qei_candidates_func(
best_f=best_f,
sampler=_get_sobol_qmc_normal_sampler(256),
objective=objective,
X_pending=pending_x,
)

standard_bounds = torch.zeros_like(bounds)
Expand All @@ -170,6 +178,7 @@ def qehvi_candidates_func(
train_obj: "torch.Tensor",
train_con: Optional["torch.Tensor"],
bounds: "torch.Tensor",
pending_x: Optional["torch.Tensor"],
) -> "torch.Tensor":
"""Quasi MC-based batch Expected Hypervolume Improvement (qEHVI).

Expand Down Expand Up @@ -204,6 +213,8 @@ def qehvi_candidates_func(
additional_qehvi_kwargs = {}

train_x = normalize(train_x, bounds=bounds)
if pending_x is not None:
pending_x = normalize(pending_x, bounds=bounds)

model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1]))
mll = ExactMarginalLogLikelihood(model.likelihood, model)
Expand All @@ -227,6 +238,7 @@ def qehvi_candidates_func(
ref_point=ref_point_list,
partitioning=partitioning,
sampler=_get_sobol_qmc_normal_sampler(256),
X_pending=pending_x,
**additional_qehvi_kwargs,
)
standard_bounds = torch.zeros_like(bounds)
Expand All @@ -253,6 +265,7 @@ def qnehvi_candidates_func(
train_obj: "torch.Tensor",
train_con: Optional["torch.Tensor"],
bounds: "torch.Tensor",
pending_x: Optional["torch.Tensor"],
) -> "torch.Tensor":
"""Quasi MC-based batch Expected Noisy Hypervolume Improvement (qNEHVI).

Expand Down Expand Up @@ -283,6 +296,8 @@ def qnehvi_candidates_func(
additional_qnehvi_kwargs = {}

train_x = normalize(train_x, bounds=bounds)
if pending_x is not None:
pending_x = normalize(pending_x, bounds=bounds)

model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1]))
mll = ExactMarginalLogLikelihood(model.likelihood, model)
Expand All @@ -308,6 +323,7 @@ def qnehvi_candidates_func(
alpha=alpha,
prune_baseline=True,
sampler=_get_sobol_qmc_normal_sampler(256),
X_pending=pending_x,
**additional_qnehvi_kwargs,
)

Expand Down Expand Up @@ -335,6 +351,7 @@ def qparego_candidates_func(
train_obj: "torch.Tensor",
train_con: Optional["torch.Tensor"],
bounds: "torch.Tensor",
pending_x: Optional["torch.Tensor"],
) -> "torch.Tensor":
"""Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization.

Expand Down Expand Up @@ -366,6 +383,8 @@ def qparego_candidates_func(
objective = GenericMCObjective(scalarization)

train_x = normalize(train_x, bounds=bounds)
if pending_x is not None:
pending_x = normalize(pending_x, bounds=bounds)

model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1)))
mll = ExactMarginalLogLikelihood(model.likelihood, model)
Expand All @@ -376,6 +395,7 @@ def qparego_candidates_func(
best_f=objective(train_y).max(),
sampler=_get_sobol_qmc_normal_sampler(256),
objective=objective,
X_pending=pending_x,
)

standard_bounds = torch.zeros_like(bounds)
Expand Down Expand Up @@ -404,6 +424,7 @@ def _get_default_candidates_func(
"torch.Tensor",
Optional["torch.Tensor"],
"torch.Tensor",
Optional["torch.Tensor"],
],
"torch.Tensor",
]:
Expand Down Expand Up @@ -466,6 +487,13 @@ class BoTorchSampler(BaseSampler):
n_startup_trials:
Number of initial trials, that is the number of trials to resort to independent
sampling.
consider_running_trials:
If True, the acquisition function takes into consideration the running parameters
whose evaluation has not completed. Enabling this option is considered to improve the
performance of parallel optimization.
HideakiImamura marked this conversation as resolved.
Show resolved Hide resolved

.. note::
Added in v3.2.0 as an experimental argument.
independent_sampler:
An independent sampler to use for the initial trials and for parameters that are
conditional.
Expand All @@ -486,12 +514,14 @@ def __init__(
"torch.Tensor",
Optional["torch.Tensor"],
"torch.Tensor",
Optional["torch.Tensor"],
],
"torch.Tensor",
]
] = None,
constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None,
n_startup_trials: int = 10,
consider_running_trials: bool = True,
independent_sampler: Optional[BaseSampler] = None,
seed: Optional[int] = None,
device: Optional["torch.device"] = None,
Expand All @@ -500,6 +530,7 @@ def __init__(

self._candidates_func = candidates_func
self._constraints_func = constraints_func
self._consider_running_trials = consider_running_trials
self._independent_sampler = independent_sampler or RandomSampler(seed=seed)
self._n_startup_trials = n_startup_trials
self._seed = seed
Expand Down Expand Up @@ -542,9 +573,14 @@ def sample_relative(
if len(search_space) == 0:
return {}

trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,))
completed_trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,))
running_trials = [
t for t in study.get_trials(deepcopy=False, states=(TrialState.RUNNING,)) if t != trial
]
kstoneriv3 marked this conversation as resolved.
Show resolved Hide resolved
trials = completed_trials + running_trials

n_trials = len(trials)
n_completed_trials = len(completed_trials)
if n_trials < self._n_startup_trials:
return {}

Expand All @@ -558,30 +594,40 @@ def sample_relative(
bounds: Union[numpy.ndarray, torch.Tensor] = trans.bounds
params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64)
for trial_idx, trial in enumerate(trials):
params[trial_idx] = trans.transform(trial.params)
assert len(study.directions) == len(trial.values)

for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)):
assert value is not None
if direction == StudyDirection.MINIMIZE: # BoTorch always assumes maximization.
value *= -1
values[trial_idx, obj_idx] = value

if self._constraints_func is not None:
constraints = study._storage.get_trial_system_attrs(trial._trial_id).get(
_CONSTRAINTS_KEY
)
if constraints is not None:
n_constraints = len(constraints)

if con is None:
con = numpy.full((n_trials, n_constraints), numpy.nan, dtype=numpy.float64)
elif n_constraints != con.shape[1]:
raise RuntimeError(
f"Expected {con.shape[1]} constraints but received {n_constraints}."
)

con[trial_idx] = constraints
if trial.state == TrialState.COMPLETE:
params[trial_idx] = trans.transform(trial.params)
assert len(study.directions) == len(trial.values)
for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)):
assert value is not None
if (
direction == StudyDirection.MINIMIZE
): # BoTorch always assumes maximization.
value *= -1
values[trial_idx, obj_idx] = value
if self._constraints_func is not None:
constraints = study._storage.get_trial_system_attrs(trial._trial_id).get(
_CONSTRAINTS_KEY
)
if constraints is not None:
n_constraints = len(constraints)

if con is None:
con = numpy.full(
(n_completed_trials, n_constraints), numpy.nan, dtype=numpy.float64
)
elif n_constraints != con.shape[1]:
raise RuntimeError(
f"Expected {con.shape[1]} constraints "
f"but received {n_constraints}."
)
con[trial_idx] = constraints
elif trial.state == TrialState.RUNNING:
if all(p in trial.params for p in search_space):
params[trial_idx] = trans.transform(trial.params)
else:
params[trial_idx] = numpy.nan
else:
assert False, "trail.state must be TrialState.COMPLETE or TrialState.RUNNING."

if self._constraints_func is not None:
if con is None:
Expand Down Expand Up @@ -609,11 +655,21 @@ def sample_relative(
if self._candidates_func is None:
self._candidates_func = _get_default_candidates_func(n_objectives=n_objectives)

completed_values = values[:n_completed_trials]
completed_params = params[:n_completed_trials]
if self._consider_running_trials:
running_params = params[n_completed_trials:]
running_params = running_params[~torch.isnan(running_params).any(dim=1)]
else:
running_params = None

with manual_seed(self._seed):
# `manual_seed` makes the default candidates functions reproducible.
# `SobolQMCNormalSampler`'s constructor has a `seed` argument, but its behavior is
# deterministic when the BoTorch's seed is fixed.
candidates = self._candidates_func(params, values, con, bounds)
candidates = self._candidates_func(
completed_params, completed_values, con, bounds, running_params
)
if self._seed is not None:
self._seed += 1

Expand Down
61 changes: 61 additions & 0 deletions tests/integration_tests/test_botorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from optuna.storages import RDBStorage
from optuna.trial import FrozenTrial
from optuna.trial import Trial
from optuna.trial import TrialState


with try_import() as _imports:
Expand Down Expand Up @@ -62,6 +63,7 @@ def candidates_func(
train_obj: torch.Tensor,
train_con: Optional[torch.Tensor],
bounds: torch.Tensor,
running_x: Optional[torch.Tensor],
) -> torch.Tensor:
assert train_con is None

Expand Down Expand Up @@ -157,6 +159,7 @@ def candidates_func(
train_obj: torch.Tensor,
train_con: Optional[torch.Tensor],
bounds: torch.Tensor,
running_x: Optional[torch.Tensor],
) -> torch.Tensor:
return torch.rand(2, 1) # Must have the batch size one, not two.

Expand All @@ -174,6 +177,7 @@ def candidates_func(
train_obj: torch.Tensor,
train_con: Optional[torch.Tensor],
bounds: torch.Tensor,
running_x: Optional[torch.Tensor],
) -> torch.Tensor:
return torch.rand(1, 1, 1) # Must have one or two dimensions, not three.

Expand All @@ -193,6 +197,7 @@ def candidates_func(
train_obj: torch.Tensor,
train_con: Optional[torch.Tensor],
bounds: torch.Tensor,
running_x: Optional[torch.Tensor],
) -> torch.Tensor:
return torch.rand(n_params - 1) # Must return candidates for all parameters.

Expand Down Expand Up @@ -262,6 +267,7 @@ def candidates_func(
train_obj: torch.Tensor,
train_con: Optional[torch.Tensor],
bounds: torch.Tensor,
running_x: Optional[torch.Tensor],
) -> torch.Tensor:
trial_number = train_x.size(0)

Expand Down Expand Up @@ -312,6 +318,7 @@ def candidates_func(
train_obj: torch.Tensor,
train_con: Optional[torch.Tensor],
bounds: torch.Tensor,
running_x: Optional[torch.Tensor],
) -> torch.Tensor:
# `train_con` should be `None` if `constraints_func` always fails.
assert train_con is None
Expand Down Expand Up @@ -354,6 +361,7 @@ def candidates_func(
train_obj: torch.Tensor,
train_con: Optional[torch.Tensor],
bounds: torch.Tensor,
running_x: Optional[torch.Tensor],
) -> torch.Tensor:
trial_number = train_x.size(0)

Expand Down Expand Up @@ -480,3 +488,56 @@ def constraints_func(trial: FrozenTrial) -> Sequence[float]:
sampler = BoTorchSampler(constraints_func=constraints_func, n_startup_trials=1)
study = optuna.create_study(sampler=sampler)
study.optimize(objective, n_trials=3)


@pytest.mark.parametrize(
"candidates_func, n_objectives",
[
(integration.botorch.qei_candidates_func, 1),
(integration.botorch.qehvi_candidates_func, 2),
(integration.botorch.qparego_candidates_func, 4),
(integration.botorch.qnehvi_candidates_func, 2),
(integration.botorch.qnehvi_candidates_func, 3), # alpha > 0
],
)
def test_botorch_consider_running_trials(candidates_func: Any, n_objectives: int) -> None:
sampler = BoTorchSampler(
candidates_func=candidates_func,
n_startup_trials=1,
consider_running_trials=True,
)
kstoneriv3 marked this conversation as resolved.
Show resolved Hide resolved

def objective(trial: Trial) -> Sequence[float]:
ret = []
for i in range(n_objectives):
val = sum(trial.suggest_float(f"x{i}_{j}", 0, 1) for j in range(2))
ret.append(val)
return ret

study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler)
study.optimize(objective, n_trials=2)
assert len(study.trials) == 2

# fully suggested running trial
running_trial_full = study.ask()
_ = objective(running_trial_full)
study.optimize(objective, n_trials=1)
assert len(study.trials) == 4
assert sum(t.state == TrialState.RUNNING for t in study.trials) == 1
assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 3

# partially suggested running trial
running_trial_partial = study.ask()
for i in range(n_objectives):
running_trial_partial.suggest_float(f"x{i}_0", 0, 1)
study.optimize(objective, n_trials=1)
assert len(study.trials) == 6
assert sum(t.state == TrialState.RUNNING for t in study.trials) == 2
assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 4

# not suggested running trial
_ = study.ask()
study.optimize(objective, n_trials=1)
assert len(study.trials) == 8
assert sum(t.state == TrialState.RUNNING for t in study.trials) == 3
assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 5