diff --git a/optuna/importance/__init__.py b/optuna/importance/__init__.py index 4076ef5d33..612b023aa9 100644 --- a/optuna/importance/__init__.py +++ b/optuna/importance/__init__.py @@ -2,16 +2,14 @@ from typing import List from typing import Optional -from optuna._experimental import experimental from optuna.importance._base import BaseImportanceEvaluator from optuna.importance._fanova import FanovaImportanceEvaluator # NOQA from optuna.importance._mean_decrease_impurity import MeanDecreaseImpurityImportanceEvaluator from optuna.study import Study -@experimental("1.3.0") def get_param_importances( - study: Study, evaluator: BaseImportanceEvaluator = None, params: Optional[List[str]] = None + study: Study, *, evaluator: BaseImportanceEvaluator = None, params: Optional[List[str]] = None ) -> Dict[str, float]: """Evaluate parameter importances based on completed trials in the given study. diff --git a/optuna/importance/_base.py b/optuna/importance/_base.py index 8904099d6a..e3d104136a 100644 --- a/optuna/importance/_base.py +++ b/optuna/importance/_base.py @@ -7,7 +7,6 @@ import numpy as np -from optuna._experimental import experimental from optuna.distributions import BaseDistribution from optuna.distributions import CategoricalDistribution from optuna.samplers import intersection_search_space @@ -15,14 +14,13 @@ from optuna.trial import TrialState -@experimental("1.3.0") class BaseImportanceEvaluator(object, metaclass=abc.ABCMeta): """Abstract parameter importance evaluator. """ @abc.abstractmethod - def evaluate(self, study: Study, params: Optional[List[str]]) -> Dict[str, float]: + def evaluate(self, study: Study, params: Optional[List[str]] = None) -> Dict[str, float]: """Evaluate parameter importances based on completed trials in the given study. .. note:: diff --git a/optuna/importance/_fanova.py b/optuna/importance/_fanova.py index 41378539c1..5c0445c198 100644 --- a/optuna/importance/_fanova.py +++ b/optuna/importance/_fanova.py @@ -3,7 +3,6 @@ from typing import List from typing import Optional -from optuna._experimental import experimental from optuna._imports import try_import from optuna.distributions import BaseDistribution from optuna.distributions import CategoricalDistribution @@ -30,7 +29,6 @@ fANOVA = None # NOQA -@experimental("1.3.0") class FanovaImportanceEvaluator(BaseImportanceEvaluator): """fANOVA parameter importance evaluator. @@ -42,12 +40,26 @@ class FanovaImportanceEvaluator(BaseImportanceEvaluator): `An Efficient Approach for Assessing Hyperparameter Importance `_. + + Args: + n_trees: + Number of trees in the random forest. + max_depth: + The maximum depth of each tree in the random forest. + seed: + Seed for the random forest. """ - def __init__(self) -> None: + def __init__( + self, *, n_trees: int = 16, max_depth: int = 64, seed: Optional[int] = None + ) -> None: _imports.check() - def evaluate(self, study: Study, params: Optional[List[str]]) -> Dict[str, float]: + self._n_trees = n_trees + self._max_depth = max_depth + self._seed = seed + + def evaluate(self, study: Study, params: Optional[List[str]] = None) -> Dict[str, float]: distributions = _get_distributions(study, params) params_data, values_data = _get_study_data(study, distributions) @@ -55,7 +67,10 @@ def evaluate(self, study: Study, params: Optional[List[str]]) -> Dict[str, float X=params_data, Y=values_data, config_space=_get_configuration_space(distributions), + n_trees=self._n_trees, + seed=self._seed, max_features=max(1, int(params_data.shape[1] * 0.7)), + max_depth=self._max_depth, ) individual_importances = {} diff --git a/optuna/importance/_mean_decrease_impurity.py b/optuna/importance/_mean_decrease_impurity.py index a8ee7feebf..58d909b109 100644 --- a/optuna/importance/_mean_decrease_impurity.py +++ b/optuna/importance/_mean_decrease_impurity.py @@ -7,7 +7,6 @@ import numpy -from optuna._experimental import experimental from optuna._imports import try_import from optuna.distributions import BaseDistribution from optuna.distributions import CategoricalDistribution @@ -22,7 +21,6 @@ from sklearn.preprocessing import OneHotEncoder -@experimental("1.5.0") class MeanDecreaseImpurityImportanceEvaluator(BaseImportanceEvaluator): """Mean Decrease Impurity (MDI) parameter importance evaluator. @@ -36,28 +34,28 @@ class MeanDecreaseImpurityImportanceEvaluator(BaseImportanceEvaluator): `_. Args: - n_estimators: + n_trees: Number of trees in the random forest. max_depth: The maximum depth of each tree in the random forest. - random_seed: + seed: Seed for the random forest. """ def __init__( - self, n_estimators: int = 16, max_depth: int = 64, random_state: Optional[int] = None + self, *, n_trees: int = 16, max_depth: int = 64, seed: Optional[int] = None ) -> None: _imports.check() self._forest = RandomForestRegressor( - n_estimators=n_estimators, + n_estimators=n_trees, max_depth=max_depth, min_samples_split=2, min_samples_leaf=1, - random_state=random_state, + random_state=seed, ) - def evaluate(self, study: Study, params: Optional[List[str]]) -> Dict[str, float]: + def evaluate(self, study: Study, params: Optional[List[str]] = None) -> Dict[str, float]: distributions = _get_distributions(study, params) params_data, values_data = _get_study_data(study, distributions) diff --git a/tests/importance_tests/__init__.py b/tests/importance_tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/importance_tests/test_fanova.py b/tests/importance_tests/test_fanova.py new file mode 100644 index 0000000000..668b22a636 --- /dev/null +++ b/tests/importance_tests/test_fanova.py @@ -0,0 +1,56 @@ +from optuna import create_study +from optuna.importance import FanovaImportanceEvaluator +from optuna import Trial + + +def objective(trial: Trial) -> float: + x1 = trial.suggest_uniform("x1", 0.1, 3) + x2 = trial.suggest_loguniform("x2", 0.1, 3) + x3 = trial.suggest_loguniform("x3", 2, 4) + return x1 + x2 * x3 + + +def test_fanova_importance_evaluator_n_trees() -> None: + # Assumes that `seed` can be fixed to reproduce identical results. + + study = create_study() + study.optimize(objective, n_trials=3) + + evaluator = FanovaImportanceEvaluator(n_trees=10, seed=0) + param_importance = evaluator.evaluate(study) + + evaluator = FanovaImportanceEvaluator(n_trees=20, seed=0) + param_importance_different_n_trees = evaluator.evaluate(study) + + assert param_importance != param_importance_different_n_trees + + +def test_fanova_importance_evaluator_max_depth() -> None: + # Assumes that `seed` can be fixed to reproduce identical results. + + study = create_study() + study.optimize(objective, n_trials=3) + + evaluator = FanovaImportanceEvaluator(max_depth=1, seed=0) + param_importance = evaluator.evaluate(study) + + evaluator = FanovaImportanceEvaluator(max_depth=2, seed=0) + param_importance_different_max_depth = evaluator.evaluate(study) + + assert param_importance != param_importance_different_max_depth + + +def test_fanova_importance_evaluator_seed() -> None: + study = create_study() + study.optimize(objective, n_trials=3) + + evaluator = FanovaImportanceEvaluator(seed=2) + param_importance = evaluator.evaluate(study) + + evaluator = FanovaImportanceEvaluator(seed=2) + param_importance_same_seed = evaluator.evaluate(study) + assert param_importance == param_importance_same_seed + + evaluator = FanovaImportanceEvaluator(seed=3) + param_importance_different_seed = evaluator.evaluate(study) + assert param_importance != param_importance_different_seed diff --git a/tests/test_importance.py b/tests/importance_tests/test_init.py similarity index 98% rename from tests/test_importance.py rename to tests/importance_tests/test_init.py index 4de905c016..5128789253 100644 --- a/tests/test_importance.py +++ b/tests/importance_tests/test_init.py @@ -142,7 +142,7 @@ def objective(trial: Trial) -> float: study.optimize(objective, n_trials=3) with pytest.raises(TypeError): - get_param_importances(study, evaluator={}) + get_param_importances(study, evaluator={}) # type: ignore @parametrize_evaluator @@ -199,7 +199,7 @@ def objective(trial: Trial) -> float: study.optimize(objective, n_trials=3) with pytest.raises(TypeError): - get_param_importances(study, evaluator=evaluator_init_func(), params={}) + get_param_importances(study, evaluator=evaluator_init_func(), params={}) # type: ignore with pytest.raises(TypeError): - get_param_importances(study, evaluator=evaluator_init_func(), params=[0]) + get_param_importances(study, evaluator=evaluator_init_func(), params=[0]) # type: ignore diff --git a/tests/importance_tests/test_mean_decrease_impurity.py b/tests/importance_tests/test_mean_decrease_impurity.py new file mode 100644 index 0000000000..4d0474cd2d --- /dev/null +++ b/tests/importance_tests/test_mean_decrease_impurity.py @@ -0,0 +1,56 @@ +from optuna import create_study +from optuna.importance import MeanDecreaseImpurityImportanceEvaluator +from optuna import Trial + + +def objective(trial: Trial) -> float: + x1 = trial.suggest_uniform("x1", 0.1, 3) + x2 = trial.suggest_loguniform("x2", 0.1, 3) + x3 = trial.suggest_loguniform("x3", 2, 4) + return x1 + x2 * x3 + + +def test_mean_decrease_impurity_importance_evaluator_n_trees() -> None: + # Assumes that `seed` can be fixed to reproduce identical results. + + study = create_study() + study.optimize(objective, n_trials=3) + + evaluator = MeanDecreaseImpurityImportanceEvaluator(n_trees=10, seed=0) + param_importance = evaluator.evaluate(study) + + evaluator = MeanDecreaseImpurityImportanceEvaluator(n_trees=20, seed=0) + param_importance_different_n_trees = evaluator.evaluate(study) + + assert param_importance != param_importance_different_n_trees + + +def test_mean_decrease_impurity_importance_evaluator_max_depth() -> None: + # Assumes that `seed` can be fixed to reproduce identical results. + + study = create_study() + study.optimize(objective, n_trials=3) + + evaluator = MeanDecreaseImpurityImportanceEvaluator(max_depth=1, seed=0) + param_importance = evaluator.evaluate(study) + + evaluator = MeanDecreaseImpurityImportanceEvaluator(max_depth=2, seed=0) + param_importance_different_max_depth = evaluator.evaluate(study) + + assert param_importance != param_importance_different_max_depth + + +def test_mean_decrease_impurity_importance_evaluator_seed() -> None: + study = create_study() + study.optimize(objective, n_trials=3) + + evaluator = MeanDecreaseImpurityImportanceEvaluator(seed=2) + param_importance = evaluator.evaluate(study) + + evaluator = MeanDecreaseImpurityImportanceEvaluator(seed=2) + param_importance_same_seed = evaluator.evaluate(study) + assert param_importance == param_importance_same_seed + + evaluator = MeanDecreaseImpurityImportanceEvaluator(seed=3) + param_importance_different_seed = evaluator.evaluate(study) + assert param_importance != param_importance_different_seed