Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove experimental decorators from hyperparameter importance (HPI) features. #1440

Merged
merged 2 commits into from Jun 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 1 addition & 3 deletions optuna/importance/__init__.py
Expand Up @@ -2,16 +2,14 @@
from typing import List
from typing import Optional

from optuna._experimental import experimental
from optuna.importance._base import BaseImportanceEvaluator
from optuna.importance._fanova import FanovaImportanceEvaluator # NOQA
from optuna.importance._mean_decrease_impurity import MeanDecreaseImpurityImportanceEvaluator
from optuna.study import Study


@experimental("1.3.0")
def get_param_importances(
study: Study, evaluator: BaseImportanceEvaluator = None, params: Optional[List[str]] = None
study: Study, *, evaluator: BaseImportanceEvaluator = None, params: Optional[List[str]] = None
) -> Dict[str, float]:
"""Evaluate parameter importances based on completed trials in the given study.

Expand Down
4 changes: 1 addition & 3 deletions optuna/importance/_base.py
Expand Up @@ -7,22 +7,20 @@

import numpy as np

from optuna._experimental import experimental
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution
from optuna.samplers import intersection_search_space
from optuna.study import Study
from optuna.trial import TrialState


@experimental("1.3.0")
class BaseImportanceEvaluator(object, metaclass=abc.ABCMeta):
"""Abstract parameter importance evaluator.

"""

@abc.abstractmethod
def evaluate(self, study: Study, params: Optional[List[str]]) -> Dict[str, float]:
def evaluate(self, study: Study, params: Optional[List[str]] = None) -> Dict[str, float]:
"""Evaluate parameter importances based on completed trials in the given study.

.. note::
Expand Down
23 changes: 19 additions & 4 deletions optuna/importance/_fanova.py
Expand Up @@ -3,7 +3,6 @@
from typing import List
from typing import Optional

from optuna._experimental import experimental
from optuna._imports import try_import
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution
Expand All @@ -30,7 +29,6 @@
fANOVA = None # NOQA


@experimental("1.3.0")
class FanovaImportanceEvaluator(BaseImportanceEvaluator):
"""fANOVA parameter importance evaluator.

Expand All @@ -42,20 +40,37 @@ class FanovaImportanceEvaluator(BaseImportanceEvaluator):

`An Efficient Approach for Assessing Hyperparameter Importance
<http://proceedings.mlr.press/v32/hutter14.html>`_.

Args:
n_trees:
Number of trees in the random forest.
max_depth:
The maximum depth of each tree in the random forest.
seed:
Seed for the random forest.
"""

def __init__(self) -> None:
def __init__(
self, *, n_trees: int = 16, max_depth: int = 64, seed: Optional[int] = None
) -> None:
_imports.check()

def evaluate(self, study: Study, params: Optional[List[str]]) -> Dict[str, float]:
self._n_trees = n_trees
self._max_depth = max_depth
self._seed = seed

def evaluate(self, study: Study, params: Optional[List[str]] = None) -> Dict[str, float]:
distributions = _get_distributions(study, params)
params_data, values_data = _get_study_data(study, distributions)

evaluator = fANOVA(
X=params_data,
Y=values_data,
config_space=_get_configuration_space(distributions),
n_trees=self._n_trees,
seed=self._seed,
max_features=max(1, int(params_data.shape[1] * 0.7)),
max_depth=self._max_depth,
)

individual_importances = {}
Expand Down
14 changes: 6 additions & 8 deletions optuna/importance/_mean_decrease_impurity.py
Expand Up @@ -7,7 +7,6 @@

import numpy

from optuna._experimental import experimental
from optuna._imports import try_import
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution
Expand All @@ -22,7 +21,6 @@
from sklearn.preprocessing import OneHotEncoder


@experimental("1.5.0")
class MeanDecreaseImpurityImportanceEvaluator(BaseImportanceEvaluator):
"""Mean Decrease Impurity (MDI) parameter importance evaluator.

Expand All @@ -36,28 +34,28 @@ class MeanDecreaseImpurityImportanceEvaluator(BaseImportanceEvaluator):
<https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html#sklearn.ensemble.RandomForestClassifier.feature_importances_>`_.

Args:
n_estimators:
n_trees:
Number of trees in the random forest.
max_depth:
The maximum depth of each tree in the random forest.
random_seed:
seed:
Seed for the random forest.
"""

def __init__(
self, n_estimators: int = 16, max_depth: int = 64, random_state: Optional[int] = None
self, *, n_trees: int = 16, max_depth: int = 64, seed: Optional[int] = None
) -> None:
_imports.check()

self._forest = RandomForestRegressor(
n_estimators=n_estimators,
n_estimators=n_trees,
max_depth=max_depth,
min_samples_split=2,
min_samples_leaf=1,
random_state=random_state,
random_state=seed,
)

def evaluate(self, study: Study, params: Optional[List[str]]) -> Dict[str, float]:
def evaluate(self, study: Study, params: Optional[List[str]] = None) -> Dict[str, float]:
distributions = _get_distributions(study, params)
params_data, values_data = _get_study_data(study, distributions)

Expand Down
Empty file.
56 changes: 56 additions & 0 deletions tests/importance_tests/test_fanova.py
@@ -0,0 +1,56 @@
from optuna import create_study
from optuna.importance import FanovaImportanceEvaluator
from optuna import Trial


def objective(trial: Trial) -> float:
x1 = trial.suggest_uniform("x1", 0.1, 3)
x2 = trial.suggest_loguniform("x2", 0.1, 3)
x3 = trial.suggest_loguniform("x3", 2, 4)
return x1 + x2 * x3


def test_fanova_importance_evaluator_n_trees() -> None:
# Assumes that `seed` can be fixed to reproduce identical results.

study = create_study()
study.optimize(objective, n_trials=3)

evaluator = FanovaImportanceEvaluator(n_trees=10, seed=0)
param_importance = evaluator.evaluate(study)

evaluator = FanovaImportanceEvaluator(n_trees=20, seed=0)
param_importance_different_n_trees = evaluator.evaluate(study)

assert param_importance != param_importance_different_n_trees


def test_fanova_importance_evaluator_max_depth() -> None:
# Assumes that `seed` can be fixed to reproduce identical results.

study = create_study()
study.optimize(objective, n_trials=3)

evaluator = FanovaImportanceEvaluator(max_depth=1, seed=0)
param_importance = evaluator.evaluate(study)

evaluator = FanovaImportanceEvaluator(max_depth=2, seed=0)
param_importance_different_max_depth = evaluator.evaluate(study)

assert param_importance != param_importance_different_max_depth


def test_fanova_importance_evaluator_seed() -> None:
study = create_study()
study.optimize(objective, n_trials=3)

evaluator = FanovaImportanceEvaluator(seed=2)
param_importance = evaluator.evaluate(study)

evaluator = FanovaImportanceEvaluator(seed=2)
param_importance_same_seed = evaluator.evaluate(study)
assert param_importance == param_importance_same_seed

evaluator = FanovaImportanceEvaluator(seed=3)
param_importance_different_seed = evaluator.evaluate(study)
assert param_importance != param_importance_different_seed
Expand Up @@ -142,7 +142,7 @@ def objective(trial: Trial) -> float:
study.optimize(objective, n_trials=3)

with pytest.raises(TypeError):
get_param_importances(study, evaluator={})
get_param_importances(study, evaluator={}) # type: ignore


@parametrize_evaluator
Expand Down Expand Up @@ -199,7 +199,7 @@ def objective(trial: Trial) -> float:
study.optimize(objective, n_trials=3)

with pytest.raises(TypeError):
get_param_importances(study, evaluator=evaluator_init_func(), params={})
get_param_importances(study, evaluator=evaluator_init_func(), params={}) # type: ignore

with pytest.raises(TypeError):
get_param_importances(study, evaluator=evaluator_init_func(), params=[0])
get_param_importances(study, evaluator=evaluator_init_func(), params=[0]) # type: ignore
56 changes: 56 additions & 0 deletions tests/importance_tests/test_mean_decrease_impurity.py
@@ -0,0 +1,56 @@
from optuna import create_study
from optuna.importance import MeanDecreaseImpurityImportanceEvaluator
from optuna import Trial


def objective(trial: Trial) -> float:
x1 = trial.suggest_uniform("x1", 0.1, 3)
x2 = trial.suggest_loguniform("x2", 0.1, 3)
x3 = trial.suggest_loguniform("x3", 2, 4)
return x1 + x2 * x3


def test_mean_decrease_impurity_importance_evaluator_n_trees() -> None:
# Assumes that `seed` can be fixed to reproduce identical results.

study = create_study()
study.optimize(objective, n_trials=3)

evaluator = MeanDecreaseImpurityImportanceEvaluator(n_trees=10, seed=0)
param_importance = evaluator.evaluate(study)

evaluator = MeanDecreaseImpurityImportanceEvaluator(n_trees=20, seed=0)
param_importance_different_n_trees = evaluator.evaluate(study)

assert param_importance != param_importance_different_n_trees


def test_mean_decrease_impurity_importance_evaluator_max_depth() -> None:
# Assumes that `seed` can be fixed to reproduce identical results.

study = create_study()
study.optimize(objective, n_trials=3)

evaluator = MeanDecreaseImpurityImportanceEvaluator(max_depth=1, seed=0)
param_importance = evaluator.evaluate(study)

evaluator = MeanDecreaseImpurityImportanceEvaluator(max_depth=2, seed=0)
param_importance_different_max_depth = evaluator.evaluate(study)

assert param_importance != param_importance_different_max_depth


def test_mean_decrease_impurity_importance_evaluator_seed() -> None:
study = create_study()
study.optimize(objective, n_trials=3)

evaluator = MeanDecreaseImpurityImportanceEvaluator(seed=2)
param_importance = evaluator.evaluate(study)

evaluator = MeanDecreaseImpurityImportanceEvaluator(seed=2)
param_importance_same_seed = evaluator.evaluate(study)
assert param_importance == param_importance_same_seed

evaluator = MeanDecreaseImpurityImportanceEvaluator(seed=3)
param_importance_different_seed = evaluator.evaluate(study)
assert param_importance != param_importance_different_seed