Remove most of advanced setups

optuna · Feb 1, 2024 · 3d17362 · 3d17362
1 parent dd9711f
commit 3d17362
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 151 deletions.
diff --git a/optuna/importance/_ped_anova/_evaluator.py b/optuna/importance/_ped_anova/_evaluator.py
@@ -6,14 +6,14 @@
 import numpy as np
 
 from optuna.distributions import BaseDistribution
-from optuna.distributions import CategoricalChoiceType
 from optuna.importance._base import _get_distributions
 from optuna.importance._base import _get_filtered_trials
 from optuna.importance._base import _sort_dict_by_importance
 from optuna.importance._base import BaseImportanceEvaluator
 from optuna.importance._ped_anova._scott_parzen_estimator import _build_parzen_estimator
 from optuna.importance.filters import get_trial_filter
 from optuna.study import Study
+from optuna.study import StudyDirection
 from optuna.trial import FrozenTrial
 
 
@@ -25,11 +25,9 @@ class PedAnovaImportanceEvaluator(BaseImportanceEvaluator):
       <https://arxiv.org/abs/2304.10255>`_.
 
     PED-ANOVA fits Parzen estimators of :class:`~optuna.trial.TrialState.COMPLETE` trials better
-    than a user-specified baseline. Users can specify the baseline either by a quantile or a value.
+    than a user-specified baseline. Users can specify the baseline either by a quantile.
     The importance can be interpreted as how important each hyperparameter is to get
     the performance better than baseline.
-    Users can also remove trials worse than `cutoff` so that the interpretation removes the bias
-    caused by the initial trials.
 
     For further information about PED-ANOVA algorithm, please refer to the following paper:
 
@@ -46,34 +44,11 @@ class PedAnovaImportanceEvaluator(BaseImportanceEvaluator):
         Please refer to the original work available at https://github.com/nabenabe0928/local-anova.
 
     Args:
-        is_lower_better:
-            Whether `target_value` is better when it is lower.
-        n_steps:
-            The number of grids in continuous domains.
-            For example, if one of the parameters has the domain of [`low`, `high`],
-            we discretize it as `np.linspace(low, high, n_steps)`.
         baseline_quantile:
             Compute the importance of achieving top-`baseline_quantile` quantile `target_value`.
             For example, `baseline_quantile=0.1` means that the importances give the information
             of which parameters were important to achieve the top-10% performance during
             the specified `study`.
-        min_n_top_trials:
-            How many `trials` must be included in `top_trials`.
-        consider_prior:
-            Whether we use non-informative prior to regularize the Parzen estimators.
-            This might be helpful to avoid overfitting.
-        prior_weight:
-            How much we regularize the Parzen estimator fitting.
-            The larger `prior_weight` becomes, the more we regularize the fitting.
-            All the observations receive `weight=1.0`, so the default value is `prior_weight=1.0`.
-        categorical_distance_func:
-            A dictionary of distance functions for categorical parameters. The key is the name of
-            the categorical parameter and the value is a distance function that takes two
-            :class:`~optuna.distributions.CategoricalChoiceType` s and returns a :obj:`float`
-            value. The distance function must return a non-negative value.
-
-            While categorical choices are handled equally by default, this option allows users to
-            specify prior knowledge on the structure of categorical parameters.
         evaluate_on_local:
             Whether we measure the importance in the local or global space.
             If `True`, the importances imply how importance each parameter is during `study`.
@@ -82,58 +57,49 @@ class PedAnovaImportanceEvaluator(BaseImportanceEvaluator):
             space during the specified `study`.
     """
 
-    def __init__(
-        self,
-        is_lower_better: bool,
-        *,
-        n_steps: int = 50,
-        baseline_quantile: float = 0.1,
-        consider_prior: bool = False,
-        prior_weight: float = 1.0,
-        categorical_distance_func: dict[
-            str, Callable[[CategoricalChoiceType, CategoricalChoiceType], float]
-        ]
-        | None = None,
-        evaluate_on_local: bool = True,
-        min_n_top_trials: int = 2,
-    ):
-        if n_steps <= 1:
-            raise ValueError(f"`n_steps` must be larger than 1, but got {n_steps}.")
-
-        if min_n_top_trials < 2:
-            raise ValueError(
-                f"min_n_top_trials must be larger than 1, but got {min_n_top_trials}."
-            )
-
-        self._n_steps = n_steps
-        self._categorical_distance_func = (
-            categorical_distance_func if categorical_distance_func is not None else {}
-        )
-        self._consider_prior = consider_prior
-        self._prior_weight = prior_weight
-        self._is_lower_better = is_lower_better
-        self._min_n_top_trials = min_n_top_trials
+    def __init__(self, *, baseline_quantile: float = 0.1, evaluate_on_local: bool = True):
         self._baseline_quantile = baseline_quantile
         self._evaluate_on_local = evaluate_on_local
 
+        # Advanced Setups.
+        # Discretize a domain [low, high] as `np.linspace(low, high, n_steps)`.
+        self._n_steps: int = 50
+        # Prior is used for regularization.
+        self._consider_prior = True
+        # Control the regularization effect.
+        self._prior_weight = 1.0
+        # How many `trials` must be included in `top_trials`.
+        self._min_n_top_trials = 2
+
     def _get_top_trials(
         self,
+        study: Study,
         trials: list[FrozenTrial],
         params: list[str],
         target: Callable[[FrozenTrial], float] | None,
     ) -> list[FrozenTrial]:
+        if target is None and study._is_multi_objective():
+            raise ValueError(
+                "If the `study` is being used for multi-objective optimization, "
+                "please specify the `target`. For example, use "
+                "`target=lambda t: t.values[0]` for the first objective value."
+            )
+
+        is_lower_better = study.directions[0] == StudyDirection.MINIMIZE
+        if target is not None:
+            warnings.warn(
+                f"{self.__class__.__name__} computes the importances of params to achieve "
+                "low `target` values. If this is not what you want, please multiply target by -1."
+            )
+            is_lower_better = True
+
         trial_filter = get_trial_filter(
-            quantile=self._baseline_quantile,
-            is_lower_better=self._is_lower_better,
-            min_n_top_trials=self._min_n_top_trials,
-            target=target,
+            self._baseline_quantile, is_lower_better, self._min_n_top_trials, target
         )
         top_trials = trial_filter(trials)
 
         if len(trials) == len(top_trials):
-            warnings.warn(
-                "All the trials were considered to be in top and it gives equal importances."
-            )
+            warnings.warn("All trials are in top trials, which gives equal importances.")
 
         return top_trials
 
@@ -144,37 +110,20 @@ def _compute_pearson_divergence(
         top_trials: list[FrozenTrial],
         all_trials: list[FrozenTrial],
     ) -> float:
-        cat_dist_func = self._categorical_distance_func.get(param_name, None)
+        consider_prior, prior_weight = self._consider_prior, self._prior_weight
         pe_top = _build_parzen_estimator(
-            param_name=param_name,
-            dist=dist,
-            trials=top_trials,
-            n_steps=self._n_steps,
-            consider_prior=self._consider_prior,
-            prior_weight=self._prior_weight,
-            categorical_distance_func=cat_dist_func,
+            param_name, dist, top_trials, self._n_steps, consider_prior, prior_weight
         )
-        n_grids = pe_top.n_grids
-        grids = np.arange(n_grids)
+        grids = np.arange(pe_top.n_steps)
         pdf_top = pe_top.pdf(grids) + 1e-12
 
-        if self._evaluate_on_local:
-            # Compute the integral on the local space.
-            # It gives us the importances of hyperparameters during the search.
+        if self._evaluate_on_local:  # The importance of param during the study.
             pe_local = _build_parzen_estimator(
-                param_name=param_name,
-                dist=dist,
-                trials=all_trials,
-                n_steps=self._n_steps,
-                consider_prior=self._consider_prior,
-                prior_weight=self._prior_weight,
-                categorical_distance_func=cat_dist_func,
+                param_name, dist, all_trials, self._n_steps, consider_prior, prior_weight
             )
             pdf_local = pe_local.pdf(grids) + 1e-12
-        else:
-            # Compute the integral on the global space.
-            # It gives us the importances of hyperparameters in the search space.
-            pdf_local = np.full(n_grids, 1.0 / n_grids)
+        else:  # The importance of param in the search space.
+            pdf_local = np.full(pe_top.n_steps, 1.0 / pe_top.n_steps)
 
         return float(pdf_local @ ((pdf_top / pdf_local - 1) ** 2))
 
@@ -185,19 +134,11 @@ def evaluate(
         *,
         target: Callable[[FrozenTrial], float] | None = None,
     ) -> dict[str, float]:
-        if target is None and study._is_multi_objective():
-            raise ValueError(
-                "If the `study` is being used for multi-objective optimization, "
-                "please specify the `target`. For example, use "
-                "`target=lambda t: t.values[0]` for the first objective value."
-            )
-
         distributions = _get_distributions(study, params=params)
         if params is None:
             params = list(distributions.keys())
 
         assert params is not None
-
         # PED-ANOVA does not support parameter distributions with a single value,
         # because the importance of such params become zero.
         non_single_distributions = {
@@ -210,15 +151,12 @@ def evaluate(
             return {}
 
         trials = _get_filtered_trials(study, params=params, target=target)
-        top_trials = self._get_top_trials(trials, params, target)
+        top_trials = self._get_top_trials(study, trials, params, target)
         importance_sum = 0.0
         param_importances = {}
         for param_name, dist in non_single_distributions.items():
             param_importances[param_name] = self._compute_pearson_divergence(
-                param_name,
-                dist,
-                top_trials=top_trials,
-                all_trials=trials,
+                param_name, dist, top_trials=top_trials, all_trials=trials
             )
             importance_sum += param_importances[param_name]