Skip to content

Commit

Permalink
Remove most of advanced setups
Browse files Browse the repository at this point in the history
  • Loading branch information
nabenabe0928 committed Feb 1, 2024
1 parent dd9711f commit 3d17362
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 151 deletions.
142 changes: 40 additions & 102 deletions optuna/importance/_ped_anova/_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
import numpy as np

from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalChoiceType
from optuna.importance._base import _get_distributions
from optuna.importance._base import _get_filtered_trials
from optuna.importance._base import _sort_dict_by_importance
from optuna.importance._base import BaseImportanceEvaluator
from optuna.importance._ped_anova._scott_parzen_estimator import _build_parzen_estimator
from optuna.importance.filters import get_trial_filter
from optuna.study import Study
from optuna.study import StudyDirection
from optuna.trial import FrozenTrial


Expand All @@ -25,11 +25,9 @@ class PedAnovaImportanceEvaluator(BaseImportanceEvaluator):
<https://arxiv.org/abs/2304.10255>`_.
PED-ANOVA fits Parzen estimators of :class:`~optuna.trial.TrialState.COMPLETE` trials better
than a user-specified baseline. Users can specify the baseline either by a quantile or a value.
than a user-specified baseline. Users can specify the baseline either by a quantile.
The importance can be interpreted as how important each hyperparameter is to get
the performance better than baseline.
Users can also remove trials worse than `cutoff` so that the interpretation removes the bias
caused by the initial trials.
For further information about PED-ANOVA algorithm, please refer to the following paper:
Expand All @@ -46,34 +44,11 @@ class PedAnovaImportanceEvaluator(BaseImportanceEvaluator):
Please refer to the original work available at https://github.com/nabenabe0928/local-anova.
Args:
is_lower_better:
Whether `target_value` is better when it is lower.
n_steps:
The number of grids in continuous domains.
For example, if one of the parameters has the domain of [`low`, `high`],
we discretize it as `np.linspace(low, high, n_steps)`.
baseline_quantile:
Compute the importance of achieving top-`baseline_quantile` quantile `target_value`.
For example, `baseline_quantile=0.1` means that the importances give the information
of which parameters were important to achieve the top-10% performance during
the specified `study`.
min_n_top_trials:
How many `trials` must be included in `top_trials`.
consider_prior:
Whether we use non-informative prior to regularize the Parzen estimators.
This might be helpful to avoid overfitting.
prior_weight:
How much we regularize the Parzen estimator fitting.
The larger `prior_weight` becomes, the more we regularize the fitting.
All the observations receive `weight=1.0`, so the default value is `prior_weight=1.0`.
categorical_distance_func:
A dictionary of distance functions for categorical parameters. The key is the name of
the categorical parameter and the value is a distance function that takes two
:class:`~optuna.distributions.CategoricalChoiceType` s and returns a :obj:`float`
value. The distance function must return a non-negative value.
While categorical choices are handled equally by default, this option allows users to
specify prior knowledge on the structure of categorical parameters.
evaluate_on_local:
Whether we measure the importance in the local or global space.
If `True`, the importances imply how importance each parameter is during `study`.
Expand All @@ -82,58 +57,49 @@ class PedAnovaImportanceEvaluator(BaseImportanceEvaluator):
space during the specified `study`.
"""

def __init__(
self,
is_lower_better: bool,
*,
n_steps: int = 50,
baseline_quantile: float = 0.1,
consider_prior: bool = False,
prior_weight: float = 1.0,
categorical_distance_func: dict[
str, Callable[[CategoricalChoiceType, CategoricalChoiceType], float]
]
| None = None,
evaluate_on_local: bool = True,
min_n_top_trials: int = 2,
):
if n_steps <= 1:
raise ValueError(f"`n_steps` must be larger than 1, but got {n_steps}.")

if min_n_top_trials < 2:
raise ValueError(
f"min_n_top_trials must be larger than 1, but got {min_n_top_trials}."
)

self._n_steps = n_steps
self._categorical_distance_func = (
categorical_distance_func if categorical_distance_func is not None else {}
)
self._consider_prior = consider_prior
self._prior_weight = prior_weight
self._is_lower_better = is_lower_better
self._min_n_top_trials = min_n_top_trials
def __init__(self, *, baseline_quantile: float = 0.1, evaluate_on_local: bool = True):
self._baseline_quantile = baseline_quantile
self._evaluate_on_local = evaluate_on_local

Check warning on line 62 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L61-L62

Added lines #L61 - L62 were not covered by tests

# Advanced Setups.
# Discretize a domain [low, high] as `np.linspace(low, high, n_steps)`.
self._n_steps: int = 50

Check warning on line 66 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L66

Added line #L66 was not covered by tests
# Prior is used for regularization.
self._consider_prior = True

Check warning on line 68 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L68

Added line #L68 was not covered by tests
# Control the regularization effect.
self._prior_weight = 1.0

Check warning on line 70 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L70

Added line #L70 was not covered by tests
# How many `trials` must be included in `top_trials`.
self._min_n_top_trials = 2

Check warning on line 72 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L72

Added line #L72 was not covered by tests

def _get_top_trials(
self,
study: Study,
trials: list[FrozenTrial],
params: list[str],
target: Callable[[FrozenTrial], float] | None,
) -> list[FrozenTrial]:
if target is None and study._is_multi_objective():
raise ValueError(

Check warning on line 82 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L81-L82

Added lines #L81 - L82 were not covered by tests
"If the `study` is being used for multi-objective optimization, "
"please specify the `target`. For example, use "
"`target=lambda t: t.values[0]` for the first objective value."
)

is_lower_better = study.directions[0] == StudyDirection.MINIMIZE
if target is not None:
warnings.warn(

Check warning on line 90 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L88-L90

Added lines #L88 - L90 were not covered by tests
f"{self.__class__.__name__} computes the importances of params to achieve "
"low `target` values. If this is not what you want, please multiply target by -1."
)
is_lower_better = True

Check warning on line 94 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L94

Added line #L94 was not covered by tests

trial_filter = get_trial_filter(

Check warning on line 96 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L96

Added line #L96 was not covered by tests
quantile=self._baseline_quantile,
is_lower_better=self._is_lower_better,
min_n_top_trials=self._min_n_top_trials,
target=target,
self._baseline_quantile, is_lower_better, self._min_n_top_trials, target
)
top_trials = trial_filter(trials)

Check warning on line 99 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L99

Added line #L99 was not covered by tests

if len(trials) == len(top_trials):
warnings.warn(
"All the trials were considered to be in top and it gives equal importances."
)
warnings.warn("All trials are in top trials, which gives equal importances.")

Check warning on line 102 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L101-L102

Added lines #L101 - L102 were not covered by tests

return top_trials

Check warning on line 104 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L104

Added line #L104 was not covered by tests

Expand All @@ -144,37 +110,20 @@ def _compute_pearson_divergence(
top_trials: list[FrozenTrial],
all_trials: list[FrozenTrial],
) -> float:
cat_dist_func = self._categorical_distance_func.get(param_name, None)
consider_prior, prior_weight = self._consider_prior, self._prior_weight
pe_top = _build_parzen_estimator(

Check warning on line 114 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L113-L114

Added lines #L113 - L114 were not covered by tests
param_name=param_name,
dist=dist,
trials=top_trials,
n_steps=self._n_steps,
consider_prior=self._consider_prior,
prior_weight=self._prior_weight,
categorical_distance_func=cat_dist_func,
param_name, dist, top_trials, self._n_steps, consider_prior, prior_weight
)
n_grids = pe_top.n_grids
grids = np.arange(n_grids)
grids = np.arange(pe_top.n_steps)
pdf_top = pe_top.pdf(grids) + 1e-12

Check warning on line 118 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L117-L118

Added lines #L117 - L118 were not covered by tests

if self._evaluate_on_local:
# Compute the integral on the local space.
# It gives us the importances of hyperparameters during the search.
if self._evaluate_on_local: # The importance of param during the study.
pe_local = _build_parzen_estimator(

Check warning on line 121 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L120-L121

Added lines #L120 - L121 were not covered by tests
param_name=param_name,
dist=dist,
trials=all_trials,
n_steps=self._n_steps,
consider_prior=self._consider_prior,
prior_weight=self._prior_weight,
categorical_distance_func=cat_dist_func,
param_name, dist, all_trials, self._n_steps, consider_prior, prior_weight
)
pdf_local = pe_local.pdf(grids) + 1e-12

Check warning on line 124 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L124

Added line #L124 was not covered by tests
else:
# Compute the integral on the global space.
# It gives us the importances of hyperparameters in the search space.
pdf_local = np.full(n_grids, 1.0 / n_grids)
else: # The importance of param in the search space.
pdf_local = np.full(pe_top.n_steps, 1.0 / pe_top.n_steps)

Check warning on line 126 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L126

Added line #L126 was not covered by tests

return float(pdf_local @ ((pdf_top / pdf_local - 1) ** 2))

Check warning on line 128 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L128

Added line #L128 was not covered by tests

Expand All @@ -185,19 +134,11 @@ def evaluate(
*,
target: Callable[[FrozenTrial], float] | None = None,
) -> dict[str, float]:
if target is None and study._is_multi_objective():
raise ValueError(
"If the `study` is being used for multi-objective optimization, "
"please specify the `target`. For example, use "
"`target=lambda t: t.values[0]` for the first objective value."
)

distributions = _get_distributions(study, params=params)
if params is None:
params = list(distributions.keys())

Check warning on line 139 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L137-L139

Added lines #L137 - L139 were not covered by tests

assert params is not None

Check warning on line 141 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L141

Added line #L141 was not covered by tests

# PED-ANOVA does not support parameter distributions with a single value,
# because the importance of such params become zero.
non_single_distributions = {

Check warning on line 144 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L144

Added line #L144 was not covered by tests
Expand All @@ -210,15 +151,12 @@ def evaluate(
return {}

Check warning on line 151 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L150-L151

Added lines #L150 - L151 were not covered by tests

trials = _get_filtered_trials(study, params=params, target=target)
top_trials = self._get_top_trials(trials, params, target)
top_trials = self._get_top_trials(study, trials, params, target)
importance_sum = 0.0
param_importances = {}
for param_name, dist in non_single_distributions.items():
param_importances[param_name] = self._compute_pearson_divergence(

Check warning on line 158 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L153-L158

Added lines #L153 - L158 were not covered by tests
param_name,
dist,
top_trials=top_trials,
all_trials=trials,
param_name, dist, top_trials=top_trials, all_trials=trials
)
importance_sum += param_importances[param_name]

Check warning on line 161 in optuna/importance/_ped_anova/_evaluator.py

View check run for this annotation

Codecov / codecov/patch

optuna/importance/_ped_anova/_evaluator.py#L161

Added line #L161 was not covered by tests

Expand Down

0 comments on commit 3d17362

Please sign in to comment.