From ad3ab5e199b0d1377eb985610c2efeec93fff17e Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Tue, 6 Jun 2023 14:52:07 +0300 Subject: [PATCH 1/2] feature: add params_to_tune into Tune --- etna/auto/auto.py | 18 +++++++++++++++--- tests/test_auto/test_tune.py | 34 ++++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index c6895c309..188b6e4fb 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -12,6 +12,7 @@ import optuna import pandas as pd from hydra_slayer import get_from_params +from optuna.distributions import BaseDistribution from optuna.distributions import CategoricalDistribution from optuna.distributions import DiscreteUniformDistribution from optuna.distributions import IntLogUniformDistribution @@ -595,6 +596,7 @@ def __init__( storage: Optional[BaseStorage] = None, metrics: Optional[List[Metric]] = None, sampler: Optional[BaseSampler] = None, + params_to_tune: Optional[Dict[str, BaseDistribution]] = None, ): """ Initialize Tune class. @@ -622,6 +624,11 @@ def __init__( By default, :py:class:`~etna.metrics.metrics.Sign`, :py:class:`~etna.metrics.metrics.SMAPE`, :py:class:`~etna.metrics.metrics.MAE`, :py:class:`~etna.metrics.metrics.MSE`, :py:class:`~etna.metrics.metrics.MedAE` metrics are used. + sampler: + Optuna sampler to use. By default, TPE sampler is used. + params_to_tune: + Parameters of pipeline that should be tuned with corresponding tuning distributions. + By default, `pipeline.params_to_tune()` is used. """ super().__init__( target_metric=target_metric, @@ -638,6 +645,10 @@ def __init__( self.sampler: BaseSampler = TPESampler() else: self.sampler = sampler + if params_to_tune is None: + self.params_to_tune = pipeline.params_to_tune() + else: + self.params_to_tune = params_to_tune self._optuna: Optional[Optuna] = None def fit( @@ -677,6 +688,7 @@ def fit( objective=self.objective( ts=ts, pipeline=self.pipeline, + params_to_tune=self.params_to_tune, target_metric=self.target_metric, metric_aggregation=self.metric_aggregation, metrics=self.metrics, @@ -696,6 +708,7 @@ def fit( def objective( ts: TSDataset, pipeline: BasePipeline, + params_to_tune: Dict[str, BaseDistribution], target_metric: Metric, metric_aggregation: MetricAggregationStatistics, metrics: List[Metric], @@ -712,6 +725,8 @@ def objective( TSDataset to fit on. pipeline: Pipeline to tune. + params_to_tune: + Parameters of pipeline that should be tuned with corresponding tuning distributions. target_metric: Metric to optimize. metric_aggregation: @@ -746,9 +761,6 @@ def objective( } def _objective(trial: Trial) -> float: - - params_to_tune = pipeline.params_to_tune() - # using received optuna.distribution objects to call corresponding trial.suggest_xxx params_suggested = {} for param_name, param_distr in params_to_tune.items(): diff --git a/tests/test_auto/test_tune.py b/tests/test_auto/test_tune.py index 116834877..642312ba0 100644 --- a/tests/test_auto/test_tune.py +++ b/tests/test_auto/test_tune.py @@ -35,11 +35,13 @@ def test_objective( initializer=MagicMock(spec=_Initializer), callback=MagicMock(spec=_Callback), pipeline=Pipeline(NaiveModel()), + params_to_tune={}, ): trial = MagicMock() _objective = Tune.objective( ts=example_tsds, pipeline=pipeline, + params_to_tune=params_to_tune, target_metric=target_metric, metric_aggregation=metric_aggregation, metrics=metrics, @@ -172,14 +174,38 @@ def test_top_k( ) def test_tune_run(example_tsds, optuna_storage, pipeline): tune = Tune( - pipeline, - MAE(), + pipeline=pipeline, + target_metric=MAE(), + metric_aggregation="median", + horizon=7, + storage=optuna_storage, + ) + tune.fit(ts=example_tsds, n_trials=2) + + assert len(tune._optuna.study.trials) == 2 + assert len(tune.summary()) == 2 + assert len(tune.top_k()) == 2 + assert len(tune.top_k(k=1)) == 1 + + +@pytest.mark.parametrize( + "pipeline, params_to_tune", + [ + (Pipeline(NaiveModel(1), horizon=7), {"model.lag": IntUniformDistribution(low=1, high=5)}), + ], +) +def test_tune_run_custom_params_to_tune(example_tsds, optuna_storage, pipeline, params_to_tune): + tune = Tune( + pipeline=pipeline, + params_to_tune=params_to_tune, + target_metric=MAE(), metric_aggregation="median", horizon=7, storage=optuna_storage, ) tune.fit(ts=example_tsds, n_trials=2) + assert tune.params_to_tune == params_to_tune assert len(tune._optuna.study.trials) == 2 assert len(tune.summary()) == 2 assert len(tune.top_k()) == 2 @@ -205,8 +231,8 @@ def test_tune_hierarchical_run( pipeline, ): tune = Tune( - pipeline, - MAE(), + pipeline=pipeline, + target_metric=MAE(), metric_aggregation="median", horizon=7, backtest_params={"n_folds": 2}, From 7fbcdeb68afd73e358e5b0cfbf492527d46c8cc4 Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Tue, 6 Jun 2023 14:53:57 +0300 Subject: [PATCH 2/2] chore: update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b18dc6a7d..638a23839 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add default `params_to_tune` for `ChangePointsSegmentationTransform`, `ChangePointsTrendTransform`, `ChangePointsLevelTransform`, `TrendTransform`, `LinearTrendTransform`, `TheilSenTrendTransform` and `STLTransform` ([#1243](https://github.com/tinkoff-ai/etna/pull/1243)) - Add default `params_to_tune` for `TreeFeatureSelectionTransform`, `MRMRFeatureSelectionTransform` and `GaleShapleyFeatureSelectionTransform` ([#1250](https://github.com/tinkoff-ai/etna/pull/1250)) - Add tuning stage into `Auto.fit` ([#1272](https://github.com/tinkoff-ai/etna/pull/1272)) -- +- Add `params_to_tune` into `Tune` init ([#1282](https://github.com/tinkoff-ai/etna/pull/1282)) ### Fixed - - Fix `BaseReconciliator` to work on `pandas==1.1.5` ([#1229](https://github.com/tinkoff-ai/etna/pull/1229))