Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a super quick f-ANOVA algorithm named PED-ANOVA #5212

Merged
merged 41 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
dd9711f
[feat] Add the first draft of PED-ANOVA
nabenabe0928 Dec 8, 2023
3d17362
Remove most of advanced setups
nabenabe0928 Feb 1, 2024
c60cf66
Refactor get grids and their indices
nabenabe0928 Feb 1, 2024
ee372ca
Remove filter directory
nabenabe0928 Feb 1, 2024
3b7c665
Refactor quantile filter
nabenabe0928 Feb 1, 2024
8f35675
Add custom filter
nabenabe0928 Feb 1, 2024
74b71b9
Add tests for PED-ANOVA
nabenabe0928 Feb 2, 2024
6a0987f
Bundle tests
nabenabe0928 Feb 2, 2024
96e6fba
Add tests for the PED-ANOVA arguments
nabenabe0928 Feb 2, 2024
925b6dc
Invert common tests for fanova and mean decrease
nabenabe0928 Feb 2, 2024
56e878b
Fix the argument of np.quantile
nabenabe0928 Feb 2, 2024
40662e1
Add TODO comments np.quantile
nabenabe0928 Feb 2, 2024
2c2ab29
Remove an edgecase of init in quantile filter
nabenabe0928 Feb 5, 2024
50344a0
Add some tests for quantile filters
nabenabe0928 Feb 8, 2024
633fff2
Fix formatting errors
nabenabe0928 Feb 9, 2024
a822d21
Rename files
nabenabe0928 Feb 9, 2024
598ffcb
Add quantile filter
nabenabe0928 Feb 9, 2024
1f100a3
Apply mamu's reviews
nabenabe0928 Feb 9, 2024
681e25f
Fix tests
nabenabe0928 Feb 9, 2024
e13e838
Fix ScottParzenEstimator
nabenabe0928 Feb 9, 2024
120b86b
Bundle the tests
nabenabe0928 Feb 9, 2024
ff9a3e2
Fix mypy error
nabenabe0928 Feb 9, 2024
2df39f7
Add a test for scott pe init
nabenabe0928 Feb 13, 2024
964054e
Add tests for building scott pe
nabenabe0928 Feb 13, 2024
fb2a1fc
Add tests for count for pes
nabenabe0928 Feb 13, 2024
5f3f866
Apply formatter
nabenabe0928 Feb 13, 2024
44deb27
Remove empty test file
nabenabe0928 Feb 14, 2024
144ae9d
Cover all the forking
nabenabe0928 Feb 14, 2024
446966b
Remove mean decrease impurity test for non single
nabenabe0928 Feb 14, 2024
901946a
Add the rename suggestion by takizawa
nabenabe0928 Feb 15, 2024
abc071c
Rename test_quantile_filter to test_evaluator
nabenabe0928 Feb 15, 2024
12eee7e
Bundle create_trials for single and multi obj
nabenabe0928 Feb 15, 2024
e429b44
Move ped-anova tests to test_evaluator.py
nabenabe0928 Feb 15, 2024
67d4251
Use parametrize for evaluator tests
nabenabe0928 Feb 15, 2024
b690702
Remove match in pytest.raises
nabenabe0928 Feb 15, 2024
1acdc12
Add tests for distributions with step
nabenabe0928 Feb 15, 2024
57a2a1a
Rename test_evaluator to test_pedanova_evaluator to avoid conflict
nabenabe0928 Feb 15, 2024
dbe7ad8
Apply mamu's suggestion
nabenabe0928 Feb 16, 2024
2c12f7a
Add PED-ANOVA in docs/source
nabenabe0928 Feb 19, 2024
da1fe66
Add experimental warning to PED-ANOVA class
nabenabe0928 Feb 19, 2024
17006ee
Fix examples in doc
nabenabe0928 Feb 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions optuna/importance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from optuna.importance._base import BaseImportanceEvaluator
from optuna.importance._fanova import FanovaImportanceEvaluator
from optuna.importance._mean_decrease_impurity import MeanDecreaseImpurityImportanceEvaluator
from optuna.importance._ped_anova import PedAnovaImportanceEvaluator
from optuna.study import Study
from optuna.trial import FrozenTrial

Expand All @@ -16,6 +17,7 @@
"BaseImportanceEvaluator",
"FanovaImportanceEvaluator",
"MeanDecreaseImpurityImportanceEvaluator",
"PedAnovaImportanceEvaluator",
"get_param_importances",
]

Expand Down
4 changes: 4 additions & 0 deletions optuna/importance/_ped_anova/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from optuna.importance._ped_anova.evaluator import PedAnovaImportanceEvaluator


__all__ = ["PedAnovaImportanceEvaluator"]
233 changes: 233 additions & 0 deletions optuna/importance/_ped_anova/evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
from __future__ import annotations

from collections.abc import Callable
import warnings

import numpy as np

from optuna.distributions import BaseDistribution
from optuna.importance._base import _get_distributions
from optuna.importance._base import _get_filtered_trials
from optuna.importance._base import _sort_dict_by_importance
from optuna.importance._base import BaseImportanceEvaluator
from optuna.importance._ped_anova.scott_parzen_estimator import _build_parzen_estimator
from optuna.logging import get_logger
from optuna.study import Study
from optuna.study import StudyDirection
from optuna.trial import FrozenTrial


_logger = get_logger(__name__)


class _QuantileFilter:
def __init__(
self,
quantile: float,
is_lower_better: bool,
min_n_top_trials: int,
target: Callable[[FrozenTrial], float] | None,
):
assert 0 <= quantile <= 1, "quantile must be in [0, 1]."
assert min_n_top_trials > 0, "min_n_top_trials must be positive."

self._quantile = quantile
self._is_lower_better = is_lower_better
self._min_n_top_trials = min_n_top_trials
self._target = target

def filter(self, trials: list[FrozenTrial]) -> list[FrozenTrial]:
target, min_n_top_trials = self._target, self._min_n_top_trials
sign = 1.0 if self._is_lower_better else -1.0
loss_values = sign * np.asarray([t.value if target is None else target(t) for t in trials])
err_msg = "len(trials) must be larger than or equal to min_n_top_trials"
assert min_n_top_trials <= loss_values.size, err_msg

def _quantile(v: np.ndarray, q: float) -> float:
cutoff_index = int(np.ceil(q * loss_values.size)) - 1
return float(np.partition(loss_values, cutoff_index)[cutoff_index])

cutoff_val = max(
np.partition(loss_values, min_n_top_trials - 1)[min_n_top_trials - 1],
# TODO(nabenabe0928): After dropping Python3.7, replace below with
# np.quantile(loss_values, self._quantile, method="inverted_cdf")
_quantile(loss_values, self._quantile),
)
should_keep_trials = loss_values <= cutoff_val
return [t for t, should_keep in zip(trials, should_keep_trials) if should_keep]


class PedAnovaImportanceEvaluator(BaseImportanceEvaluator):
"""PED-ANOVA importance evaluator.

Implements the PED-ANOVA hyperparameter importance evaluation algorithm.

PED-ANOVA fits Parzen estimators of :class:`~optuna.trial.TrialState.COMPLETE` trials better
than a user-specified baseline. Users can specify the baseline by a quantile.
The importance can be interpreted as how important each hyperparameter is to get
the performance better than baseline.

For further information about PED-ANOVA algorithm, please refer to the following paper:

- `PED-ANOVA: Efficiently Quantifying Hyperparameter Importance in Arbitrary Subspaces
<https://arxiv.org/abs/2304.10255>`_

.. note::

The performance of PED-ANOVA depends on how many trials to consider above baseline.
To stabilize the analysis, it is preferable to include at least 5 trials above baseline.

.. note::

Please refer to `the original work <https://github.com/nabenabe0928/local-anova>`_.

Args:
baseline_quantile:
Compute the importance of achieving top-`baseline_quantile` quantile objective value.
For example, `baseline_quantile=0.1` means that the importances give the information
of which parameters were important to achieve the top-10% performance during
optimization.
evaluate_on_local:
Whether we measure the importance in the local or global space.
If :obj:`True`, the importances imply how importance each parameter is during
optimization. Meanwhile, `evaluate_on_local=False` gives the importances in the
specified search_space. `evaluate_on_local=True` is especially useful when users
modify search space during optimization.
nabenabe0928 marked this conversation as resolved.
Show resolved Hide resolved

Example:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please reduce the indentation by one level? In the generated document, the example code is included in the arguments section.

Screenshot 2024-02-19 at 16 58 06

An example of using PED-ANOVA is as follows:

.. testcode::

import optuna
from optuna.importance import PedAnovaImportanceEvaluator


def objective(trial):
x1 = trial.suggest_float("x1", -10, 10)
x2 = trial.suggest_float("x2", -10, 10)
return x1 + x2 / 1000


study = optuna.create_study()
study.optimize(objective, n_trials=100)
evaluator = PedAnovaImportanceEvaluator()
evaluator.evaluate(study)

"""

def __init__(
self,
*,
baseline_quantile: float = 0.1,
evaluate_on_local: bool = True,
):
assert 0.0 <= baseline_quantile <= 1.0, "baseline_quantile must be in [0, 1]."
self._baseline_quantile = baseline_quantile
self._evaluate_on_local = evaluate_on_local

# Advanced Setups.
# Discretize a domain [low, high] as `np.linspace(low, high, n_steps)`.
self._n_steps: int = 50
# Prior is used for regularization.
self._consider_prior = True
# Control the regularization effect.
self._prior_weight = 1.0
# How many `trials` must be included in `top_trials`.
self._min_n_top_trials = 2

def _get_top_trials(
self,
study: Study,
trials: list[FrozenTrial],
params: list[str],
target: Callable[[FrozenTrial], float] | None,
) -> list[FrozenTrial]:
is_lower_better = study.directions[0] == StudyDirection.MINIMIZE
if target is not None:
warnings.warn(
f"{self.__class__.__name__} computes the importances of params to achieve "
"low `target` values. If this is not what you want, "
"please modify target, e.g., by multiplying the output by -1."
)
is_lower_better = True

top_trials = _QuantileFilter(
self._baseline_quantile, is_lower_better, self._min_n_top_trials, target
).filter(trials)

if len(trials) == len(top_trials):
_logger.warning("All trials are in top trials, which gives equal importances.")

return top_trials

def _compute_pearson_divergence(
self,
param_name: str,
dist: BaseDistribution,
top_trials: list[FrozenTrial],
all_trials: list[FrozenTrial],
) -> float:
consider_prior, prior_weight = self._consider_prior, self._prior_weight
pe_top = _build_parzen_estimator(
param_name, dist, top_trials, self._n_steps, consider_prior, prior_weight
)
# NOTE: pe_top.n_steps could be different from self._n_steps.
grids = np.arange(pe_top.n_steps)
pdf_top = pe_top.pdf(grids) + 1e-12

if self._evaluate_on_local: # The importance of param during the study.
pe_local = _build_parzen_estimator(
param_name, dist, all_trials, self._n_steps, consider_prior, prior_weight
)
pdf_local = pe_local.pdf(grids) + 1e-12
else: # The importance of param in the search space.
pdf_local = np.full(pe_top.n_steps, 1.0 / pe_top.n_steps)

return float(pdf_local @ ((pdf_top / pdf_local - 1) ** 2))

def evaluate(
self,
study: Study,
params: list[str] | None = None,
*,
target: Callable[[FrozenTrial], float] | None = None,
) -> dict[str, float]:
dists = _get_distributions(study, params=params)
if params is None:
params = list(dists.keys())

assert params is not None
# PED-ANOVA does not support parameter distributions with a single value,
# because the importance of such params become zero.
non_single_dists = {name: dist for name, dist in dists.items() if not dist.single()}
single_dists = {name: dist for name, dist in dists.items() if dist.single()}
if len(non_single_dists) == 0:
return {}

trials = _get_filtered_trials(study, params=params, target=target)
n_params = len(non_single_dists)
# The following should be tested at _get_filtered_trials.
assert target is not None or max([len(t.values) for t in trials], default=1) == 1
if len(trials) <= self._min_n_top_trials:
param_importances = {k: 1.0 / n_params for k in non_single_dists}
param_importances.update({k: 0.0 for k in single_dists})
return {k: 1.0 / n_params for k in param_importances}

top_trials = self._get_top_trials(study, trials, params, target)
importance_sum = 0.0
param_importances = {}
for param_name, dist in non_single_dists.items():
param_importances[param_name] = self._compute_pearson_divergence(
param_name, dist, top_trials=top_trials, all_trials=trials
)
importance_sum += param_importances[param_name]

if importance_sum > 0.0:
param_importances = {k: v / importance_sum for k, v in param_importances.items()}
else:
# It happens when pdf_local == pdf_top for all params.
param_importances = {k: 1.0 / n_params for k in non_single_dists}

param_importances.update({k: 0.0 for k in single_dists})
return _sort_dict_by_importance(param_importances)