Merge pull request #4016 from knshnb/cma-with-margin

Support CMA-ES with margin in `CmaEsSampler`
optuna · Nov 17, 2022 · db1837c · db1837c
2 parents f085672 + ca8adcb
commit db1837c
Show file tree

Hide file tree

Showing 3 changed files with 172 additions and 34 deletions.
diff --git a/optuna/samplers/_cmaes.py b/optuna/samplers/_cmaes.py
@@ -12,6 +12,7 @@
 import warnings
 
 from cmaes import CMA
+from cmaes import CMAwM
 from cmaes import get_warm_start_mgd
 from cmaes import SepCMA
 import numpy as np
@@ -20,6 +21,8 @@
 from optuna import logging
 from optuna._transform import _SearchSpaceTransform
 from optuna.distributions import BaseDistribution
+from optuna.distributions import FloatDistribution
+from optuna.distributions import IntDistribution
 from optuna.exceptions import ExperimentalWarning
 from optuna.samplers import BaseSampler
 from optuna.study._study_direction import StudyDirection
@@ -34,7 +37,7 @@
 _SYSTEM_ATTR_MAX_LENGTH = 2045
 
 
-CmaClass = Union[CMA, SepCMA]
+CmaClass = Union[CMA, SepCMA, CMAwM]
 
 
 class CmaEsSampler(BaseSampler):
@@ -86,6 +89,9 @@ def objective(trial):
     - `Masahiro Nomura, Shuhei Watanabe, Youhei Akimoto, Yoshihiko Ozaki, Masaki Onishi.
       Warm Starting CMA-ES for Hyperparameter Optimization, AAAI. 2021.
       <https://arxiv.org/abs/2012.06932>`_
+    - `R. Hamano, S. Saito, M. Nomura, S. Shirakawa. CMA-ES with Margin: Lower-Bounding Marginal
+      Probability for Mixed-Integer Black-Box Optimization, GECCO. 2022.
+      <https://arxiv.org/abs/2205.13482>`_
 
     .. seealso::
         You can also use :class:`optuna.integration.PyCmaSampler` which is a sampler using cma
@@ -177,6 +183,18 @@ def objective(trial):
                 versions without prior notice. See
                 https://github.com/optuna/optuna/releases/tag/v2.6.0.
 
+        with_margin:
+            If this is :obj:`True`, CMA-ES with margin is used. This algorithm prevents samples in
+            each discrete distribution (:class:`~optuna.distributions.FloatDistribution` with
+            `step` and :class:`~optuna.distributions.IntDistribution`) from being fixed to a single
+            point.
+            Currently, this option cannot be used with ``use_separable_cma=True``.
+
+            .. note::
+                Added in v3.1.0 as an experimental feature. The interface may change in newer
+                versions without prior notice. See
+                https://github.com/optuna/optuna/releases/tag/v3.1.0.
+
         source_trials:
             This option is for Warm Starting CMA-ES, a method to transfer prior knowledge on
             similar HPO tasks through the initialization of CMA-ES. This method estimates a
@@ -205,6 +223,7 @@ def __init__(
         popsize: Optional[int] = None,
         inc_popsize: int = 2,
         use_separable_cma: bool = False,
+        with_margin: bool = False,
         source_trials: Optional[List[FrozenTrial]] = None,
     ) -> None:
         self._x0 = x0
@@ -219,6 +238,7 @@ def __init__(
         self._popsize = popsize
         self._inc_popsize = inc_popsize
         self._use_separable_cma = use_separable_cma
+        self._with_margin = with_margin
         self._source_trials = source_trials
 
         if self._restart_strategy:
@@ -249,6 +269,13 @@ def __init__(
                 ExperimentalWarning,
             )
 
+        if self._with_margin:
+            warnings.warn(
+                "`with_margin` option is an experimental feature."
+                " The interface can change in the future.",
+                ExperimentalWarning,
+            )
+
         if source_trials is not None and (x0 is not None or sigma0 is not None):
             raise ValueError(
                 "It is prohibited to pass `source_trials` argument when "
@@ -272,6 +299,12 @@ def __init__(
                 )
             )
 
+        # TODO(knshnb): Support sep-CMA-ES with margin.
+        if self._use_separable_cma and self._with_margin:
+            raise ValueError(
+                "Currently, we do not support `use_separable_cma=True` and `with_margin=True`."
+            )
+
     def reseed_rng(self) -> None:
         # _cma_rng doesn't require reseeding because the relative sampling reseeds in each trial.
         self._independent_sampler.reseed_rng()
@@ -287,13 +320,7 @@ def infer_relative_search_space(
                 # `Trial`.
                 continue
 
-            if not isinstance(
-                distribution,
-                (
-                    optuna.distributions.FloatDistribution,
-                    optuna.distributions.IntDistribution,
-                ),
-            ):
+            if not isinstance(distribution, (FloatDistribution, IntDistribution)):
                 # Categorical distribution is unsupported.
                 continue
             search_space[name] = distribution
@@ -326,7 +353,8 @@ def sample_relative(
             self._warn_independent_sampling = False
             return {}
 
-        trans = _SearchSpaceTransform(search_space)
+        # When `with_margin=True`, bounds in discrete dimensions are handled inside `CMAwM`.
+        trans = _SearchSpaceTransform(search_space, transform_step=not self._with_margin)
 
         optimizer, n_restarts = self._restore_optimizer(completed_trials)
         if optimizer is None:
@@ -359,7 +387,10 @@ def sample_relative(
             solutions: List[Tuple[np.ndarray, float]] = []
             for t in solution_trials[: optimizer.population_size]:
                 assert t.value is not None, "completed trials must have a value"
-                x = trans.transform(t.params)
+                if isinstance(optimizer, CMAwM):
+                    x = t.system_attrs["x_for_tell"]
+                else:
+                    x = trans.transform(t.params)
                 y = t.value if study.direction == StudyDirection.MINIMIZE else -t.value
                 solutions.append((x, y))
 
@@ -382,7 +413,11 @@ def sample_relative(
         # Caution: optimizer should update its seed value.
         seed = self._cma_rng.randint(1, 2**16) + trial.number
         optimizer._rng.seed(seed)
-        params = optimizer.ask()
+        if isinstance(optimizer, CMAwM):
+            params, x_for_tell = optimizer.ask()
+            study._storage.set_trial_system_attr(trial._trial_id, "x_for_tell", x_for_tell)
+        else:
+            params = optimizer.ask()
 
         study._storage.set_trial_system_attr(
             trial._trial_id, generation_attr_key, optimizer.generation
@@ -484,6 +519,26 @@ def _init_optimizer(
                 population_size=population_size,
             )
 
+        if self._with_margin:
+            steps = np.empty(len(trans._search_space), dtype=float)
+            for i, dist in enumerate(trans._search_space.values()):
+                assert isinstance(dist, (IntDistribution, FloatDistribution))
+                # Set step 0.0 for continuous search space.
+                steps[i] = dist.step or 0.0
+
+            # If there is no discrete search space, we use `CMA` because CMAwM` throws an error.
+            if np.any(steps > 0.0):
+                return CMAwM(
+                    mean=mean,
+                    sigma=sigma0,
+                    bounds=trans.bounds,
+                    steps=steps,
+                    cov=cov,
+                    seed=self._cma_rng.randint(1, 2**31 - 2),
+                    n_max_resampling=10 * n_dimension,
+                    population_size=population_size,
+                )
+
         return CMA(
             mean=mean,
             sigma=sigma0,

diff --git a/setup.py b/setup.py
@@ -28,7 +28,7 @@ def get_install_requires() -> List[str]:
     requirements = [
         "alembic>=1.5.0",
         "cliff",
-        "cmaes>=0.8.2",
+        "cmaes>=0.9.0",
         "colorlog",
         # TODO(HideakiImamura): remove this after the fix by `cliff` or `stevedore`
         "importlib-metadata<5.0.0",

diff --git a/tests/samplers_tests/test_cmaes.py b/tests/samplers_tests/test_cmaes.py
@@ -26,6 +26,11 @@ def test_consider_pruned_trials_experimental_warning() -> None:
         optuna.samplers.CmaEsSampler(consider_pruned_trials=True)
 
 
+def test_with_margin_experimental_warning() -> None:
+    with pytest.warns(optuna.exceptions.ExperimentalWarning):
+        optuna.samplers.CmaEsSampler(with_margin=True)
+
+
 @pytest.mark.filterwarnings("ignore::optuna.exceptions.ExperimentalWarning")
 @pytest.mark.parametrize(
     "use_separable_cma, cma_class_str",
@@ -66,46 +71,86 @@ def test_init_cmaes_opts(
 
 
 @pytest.mark.filterwarnings("ignore::optuna.exceptions.ExperimentalWarning")
-@patch("optuna.samplers._cmaes.get_warm_start_mgd")
-def test_warm_starting_cmaes(mock_func_ws: MagicMock) -> None:
+@pytest.mark.parametrize("popsize", [None, 8])
+def test_init_cmaes_opts_with_margin(popsize: Optional[int]) -> None:
+    sampler = optuna.samplers.CmaEsSampler(
+        x0={"x": 0, "y": 0},
+        sigma0=0.1,
+        seed=1,
+        n_startup_trials=1,
+        popsize=popsize,
+        with_margin=True,
+    )
+    study = optuna.create_study(sampler=sampler)
+
+    with patch("optuna.samplers._cmaes.CMAwM") as cma_class:
+        cma_obj = MagicMock()
+        cma_obj.ask.return_value = np.array((-1, -1))
+        cma_obj.generation = 0
+        cma_class.return_value = cma_obj
+        study.optimize(
+            lambda t: t.suggest_float("x", -1, 1) + t.suggest_int("y", -1, 1), n_trials=2
+        )
+
+        assert cma_class.call_count == 1
+
+        _, actual_kwargs = cma_class.call_args
+        assert np.array_equal(actual_kwargs["mean"], np.array([0, 0]))
+        assert actual_kwargs["sigma"] == 0.1
+        assert np.allclose(actual_kwargs["bounds"], np.array([(-1, 1), (-1, 1)]))
+        assert np.allclose(actual_kwargs["steps"], np.array([0.0, 1.0]))
+        assert actual_kwargs["seed"] == np.random.RandomState(1).randint(1, 2**32)
+        assert actual_kwargs["n_max_resampling"] == 10 * 2
+        assert actual_kwargs["population_size"] == popsize
+
+
+@pytest.mark.filterwarnings("ignore::optuna.exceptions.ExperimentalWarning")
+@pytest.mark.parametrize("with_margin", [False, True])
+def test_warm_starting_cmaes(with_margin: bool) -> None:
     def objective(trial: optuna.Trial) -> float:
         x = trial.suggest_float("x", -10, 10)
-        y = trial.suggest_float("y", -10, 10)
+        y = trial.suggest_int("y", -10, 10)
         return x**2 + y
 
     source_study = optuna.create_study()
     source_study.optimize(objective, 20)
     source_trials = source_study.get_trials(deepcopy=False)
 
-    mock_func_ws.return_value = (np.zeros(2), 0.0, np.zeros((2, 2)))
-    sampler = optuna.samplers.CmaEsSampler(seed=1, n_startup_trials=1, source_trials=source_trials)
-    study = optuna.create_study(sampler=sampler)
-    study.optimize(objective, 2)
-    assert mock_func_ws.call_count == 1
+    with patch("optuna.samplers._cmaes.get_warm_start_mgd") as mock_func_ws:
+        mock_func_ws.return_value = (np.zeros(2), 0.0, np.zeros((2, 2)))
+        sampler = optuna.samplers.CmaEsSampler(
+            seed=1, n_startup_trials=1, with_margin=with_margin, source_trials=source_trials
+        )
+        study = optuna.create_study(sampler=sampler)
+        study.optimize(objective, 2)
+        assert mock_func_ws.call_count == 1
 
 
 @pytest.mark.filterwarnings("ignore::optuna.exceptions.ExperimentalWarning")
-@patch("optuna.samplers._cmaes.get_warm_start_mgd")
-def test_warm_starting_cmaes_maximize(mock_func_ws: MagicMock) -> None:
+@pytest.mark.parametrize("with_margin", [False, True])
+def test_warm_starting_cmaes_maximize(with_margin: bool) -> None:
     def objective(trial: optuna.Trial) -> float:
         x = trial.suggest_float("x", -10, 10)
-        y = trial.suggest_float("y", -10, 10)
+        y = trial.suggest_int("y", -10, 10)
         # Objective values are negative.
         return -(x**2) - (y - 5) ** 2
 
     source_study = optuna.create_study(direction="maximize")
     source_study.optimize(objective, 20)
     source_trials = source_study.get_trials(deepcopy=False)
 
-    mock_func_ws.return_value = (np.zeros(2), 0.0, np.zeros((2, 2)))
-    sampler = optuna.samplers.CmaEsSampler(seed=1, n_startup_trials=1, source_trials=source_trials)
-    study = optuna.create_study(sampler=sampler, direction="maximize")
-    study.optimize(objective, 2)
-    assert mock_func_ws.call_count == 1
+    with patch("optuna.samplers._cmaes.get_warm_start_mgd") as mock_func_ws:
+        mock_func_ws.return_value = (np.zeros(2), 0.0, np.zeros((2, 2)))
+        sampler = optuna.samplers.CmaEsSampler(
+            seed=1, n_startup_trials=1, with_margin=with_margin, source_trials=source_trials
+        )
+        study = optuna.create_study(sampler=sampler, direction="maximize")
+        study.optimize(objective, 2)
+        assert mock_func_ws.call_count == 1
 
-    solutions_arg = mock_func_ws.call_args[0][0]
-    is_positive = [x[1] >= 0 for x in solutions_arg]
-    assert all(is_positive)
+        solutions_arg = mock_func_ws.call_args[0][0]
+        is_positive = [x[1] >= 0 for x in solutions_arg]
+        assert all(is_positive)
 
 
 @pytest.mark.filterwarnings("ignore::optuna.exceptions.ExperimentalWarning")
@@ -135,9 +180,13 @@ def test_should_raise_exception() -> None:
             restart_strategy="invalid-restart-strategy",
         )
 
+    with pytest.raises(ValueError):
+        optuna.samplers.CmaEsSampler(use_separable_cma=True, with_margin=True)
+
 
 @pytest.mark.filterwarnings("ignore::optuna.exceptions.ExperimentalWarning")
-def test_incompatible_search_space() -> None:
+@pytest.mark.parametrize("with_margin", [False, True])
+def test_incompatible_search_space(with_margin: bool) -> None:
     def objective1(trial: optuna.Trial) -> float:
         x0 = trial.suggest_float("x0", 2, 3)
         x1 = trial.suggest_float("x1", 1e-2, 1e2, log=True)
@@ -147,7 +196,9 @@ def objective1(trial: optuna.Trial) -> float:
     source_study.optimize(objective1, 20)
 
     # Should not raise an exception.
-    sampler = optuna.samplers.CmaEsSampler(source_trials=source_study.trials)
+    sampler = optuna.samplers.CmaEsSampler(
+        with_margin=with_margin, source_trials=source_study.trials
+    )
     target_study1 = optuna.create_study(sampler=sampler)
     target_study1.optimize(objective1, 20)
 
@@ -158,7 +209,9 @@ def objective2(trial: optuna.Trial) -> float:
         return x0 + x1 + x2
 
     # Should raise an exception.
-    sampler = optuna.samplers.CmaEsSampler(source_trials=source_study.trials)
+    sampler = optuna.samplers.CmaEsSampler(
+        with_margin=with_margin, source_trials=source_study.trials
+    )
     target_study2 = optuna.create_study(sampler=sampler)
     with pytest.raises(ValueError):
         target_study2.optimize(objective2, 20)
@@ -420,3 +473,33 @@ def test_is_compatible_search_space() -> None:
             "x1": optuna.distributions.CategoricalDistribution(["foo", "bar", "baz", "qux"]),
         },
     )
+
+
+def test_internal_optimizer_with_margin() -> None:
+    def objective_discrete(trial: optuna.Trial) -> float:
+        x = trial.suggest_int("x", -10, 10)
+        y = trial.suggest_int("y", -10, 10)
+        return x**2 + y
+
+    def objective_mixed(trial: optuna.Trial) -> float:
+        x = trial.suggest_float("x", -10, 10)
+        y = trial.suggest_int("y", -10, 10)
+        return x**2 + y
+
+    def objective_continuous(trial: optuna.Trial) -> float:
+        x = trial.suggest_float("x", -10, 10)
+        y = trial.suggest_float("y", -10, 10)
+        return x**2 + y
+
+    objectives = [objective_discrete, objective_mixed, objective_continuous]
+    # When all the seach spaces are continuous, `CMA` is used.
+    expected_calls = [(0, 1), (0, 1), (1, 0)]
+    for objective, (cma_call, cmawm_call) in zip(objectives, expected_calls):
+        with patch("optuna.samplers._cmaes.CMA") as cma_class_mock, patch(
+            "optuna.samplers._cmaes.CMAwM"
+        ) as cmawm_class_mock:
+            sampler = optuna.samplers.CmaEsSampler(with_margin=True)
+            study = optuna.create_study(sampler=sampler)
+            study.optimize(objective, n_trials=2)
+            assert cma_class_mock.call_count == cma_call
+            assert cmawm_class_mock.call_count == cmawm_call