From ef510173199cd3b8b44ad6d767945511feb7d521 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 8 Jun 2023 17:10:27 +0300 Subject: [PATCH 1/9] moved util function --- etna/commands/utils.py | 8 ++++++++ tests/test_commands/test_utils.py | 34 +++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/etna/commands/utils.py b/etna/commands/utils.py index a97daa457..2efa90137 100644 --- a/etna/commands/utils.py +++ b/etna/commands/utils.py @@ -1,13 +1,21 @@ from enum import Enum from math import floor +from typing import Any +from typing import Dict from typing import Literal from typing import Optional +from typing import Set from typing import Union from etna.datasets import TSDataset from etna.pipeline import Pipeline +def remove_params(params: Dict[str, Any], to_remove: Set[str]) -> Dict[str, Any]: + """Select `forecast` arguments from params.""" + return {k: v for k, v in params.items() if k not in to_remove} + + class MethodsWithFolds(str, Enum): """Enum for methods that use `n_folds` argument.""" diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py index 559af4f43..be5be1171 100644 --- a/tests/test_commands/test_utils.py +++ b/tests/test_commands/test_utils.py @@ -2,10 +2,12 @@ import pytest +from etna.commands.forecast_command import ADDITIONAL_FORECAST_PARAMETERS from etna.commands.utils import _estimate_n_folds from etna.commands.utils import _max_n_folds_backtest from etna.commands.utils import _max_n_folds_forecast from etna.commands.utils import estimate_max_n_folds +from etna.commands.utils import remove_params from etna.metrics import MAE from etna.models import HoltWintersModel from etna.models import LinearPerSegmentModel @@ -254,3 +256,35 @@ def test_estimate_max_n_folds_backtest_with_transforms( run_estimate_max_n_folds_backtest_test( pipeline=pipeline_with_transforms, context_size=context_size, ts=ts, stride=stride, expected=expected ) + + +@pytest.mark.parametrize( + "params,to_remove,expected", + ( + ({"start_timestamp": "2021-09-10"}, ADDITIONAL_FORECAST_PARAMETERS, {}), + ( + {"prediction_interval": True, "n_folds": 3, "start_timestamp": "2021-09-10"}, + ADDITIONAL_FORECAST_PARAMETERS, + {"prediction_interval": True, "n_folds": 3}, + ), + ( + {"prediction_interval": True, "n_folds": 3, "quantiles": [0.025, 0.975]}, + ADDITIONAL_FORECAST_PARAMETERS, + {"prediction_interval": True, "n_folds": 3, "quantiles": [0.025, 0.975]}, + ), + ( + { + "prediction_interval": True, + "n_folds": 3, + "quantiles": [0.025, 0.975], + "estimate_n_folds": True, + "context_size": 3, + }, + ADDITIONAL_FORECAST_PARAMETERS, + {"prediction_interval": True, "n_folds": 3, "quantiles": [0.025, 0.975]}, + ), + ), +) +def test_get_forecast_call_params(params, to_remove, expected): + result = remove_params(params=params, to_remove=to_remove) + assert result == expected From fcc752a211301acaf69e215dfc885049e3582f08 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 8 Jun 2023 17:11:11 +0300 Subject: [PATCH 2/9] folds estimation for backtest --- etna/commands/backtest_command.py | 36 +++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/etna/commands/backtest_command.py b/etna/commands/backtest_command.py index 4d49f1992..a8a96c134 100644 --- a/etna/commands/backtest_command.py +++ b/etna/commands/backtest_command.py @@ -5,6 +5,7 @@ from typing import Optional from typing import Sequence from typing import Union +from typing import cast import hydra_slayer import pandas as pd @@ -12,9 +13,14 @@ from omegaconf import OmegaConf from typing_extensions import Literal +from etna.commands.utils import estimate_max_n_folds +from etna.commands.utils import remove_params from etna.datasets import TSDataset from etna.pipeline import Pipeline +ADDITIONAL_BACKTEST_PARAMETERS = {"estimate_n_folds"} +ADDITIONAL_PIPELINE_PARAMETERS = {"context_size"} + def backtest( config_path: Path = typer.Argument(..., help="path to yaml config with desired pipeline"), @@ -63,6 +69,8 @@ def backtest( ============= =========== =============== =============== """ pipeline_configs = OmegaConf.to_object(OmegaConf.load(config_path)) + pipeline_configs = cast(Dict[str, Any], pipeline_configs) + backtest_configs = OmegaConf.to_object(OmegaConf.load(backtest_config_path)) df_timeseries = pd.read_csv(target_path, parse_dates=["timestamp"]) @@ -78,10 +86,34 @@ def backtest( tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog, known_future=k_f) - pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_configs) + pipeline_args = remove_params(params=pipeline_configs, to_remove=ADDITIONAL_PIPELINE_PARAMETERS) + pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_args) backtest_configs_hydra_slayer: Dict[str, Any] = hydra_slayer.get_from_params(**backtest_configs) - metrics, forecast, info = pipeline.backtest(ts=tsdataset, **backtest_configs_hydra_slayer) + # estimate number of folds if parameters set + if backtest_configs_hydra_slayer.get("estimate_n_folds", False): + if "context_size" not in pipeline_configs: + raise ValueError("Parameter `context_size` must be set if number of folds estimation enabled!") + + context_size = pipeline_configs["context_size"] + + max_n_folds = estimate_max_n_folds( + ts=tsdataset, + pipeline=pipeline, + method_name="backtest", + context_size=context_size, + **backtest_configs_hydra_slayer, + ) + + n_folds = min( + max_n_folds, backtest_configs_hydra_slayer.get("n_folds", 5) + ) # use default value of folds if parameter not set + + backtest_configs_hydra_slayer["n_folds"] = n_folds + + backtest_call_args = remove_params(params=backtest_configs_hydra_slayer, to_remove=ADDITIONAL_BACKTEST_PARAMETERS) + + metrics, forecast, info = pipeline.backtest(ts=tsdataset, **backtest_call_args) (metrics.to_csv(output_path / "metrics.csv", index=False)) (TSDataset.to_flatten(forecast).to_csv(output_path / "forecast.csv", index=False)) From 8aeb99f048464bee02a1823d8f7c437eede67e7d Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 8 Jun 2023 17:12:07 +0300 Subject: [PATCH 3/9] tests for backtest --- tests/test_commands/conftest.py | 21 +++++++++ tests/test_commands/test_backtest.py | 70 ++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/tests/test_commands/conftest.py b/tests/test_commands/conftest.py index 5917e4150..6a7ef929a 100644 --- a/tests/test_commands/conftest.py +++ b/tests/test_commands/conftest.py @@ -29,6 +29,27 @@ def base_pipeline_yaml_path(): tmp.close() +@pytest.fixture +def base_pipeline_with_context_size_yaml_path(): + tmp = NamedTemporaryFile("w") + tmp.write( + """ + _target_: etna.pipeline.Pipeline + horizon: 4 + model: + _target_: etna.models.CatBoostMultiSegmentModel + transforms: + - _target_: etna.transforms.LinearTrendTransform + in_column: target + - _target_: etna.transforms.SegmentEncoderTransform + context_size: 1 + """ + ) + tmp.flush() + yield Path(tmp.name) + tmp.close() + + @pytest.fixture def elementary_linear_model_pipeline(): tmp = NamedTemporaryFile("w") diff --git a/tests/test_commands/test_backtest.py b/tests/test_commands/test_backtest.py index 5ac45324d..0c253bc6e 100644 --- a/tests/test_commands/test_backtest.py +++ b/tests/test_commands/test_backtest.py @@ -26,6 +26,47 @@ def base_backtest_yaml_path(): tmp.close() +@pytest.fixture +def backtest_with_folds_estimation_yaml_path(): + tmp = NamedTemporaryFile("w") + tmp.write( + """ + n_folds: 200 + n_jobs: 4 + metrics: + - _target_: etna.metrics.MAE + - _target_: etna.metrics.MSE + - _target_: etna.metrics.MAPE + - _target_: etna.metrics.SMAPE + estimate_n_folds: true + """ + ) + tmp.flush() + yield Path(tmp.name) + tmp.close() + + +@pytest.fixture +def backtest_with_stride_yaml_path(): + tmp = NamedTemporaryFile("w") + tmp.write( + """ + n_folds: 3 + n_jobs: 4 + metrics: + - _target_: etna.metrics.MAE + - _target_: etna.metrics.MSE + - _target_: etna.metrics.MAPE + - _target_: etna.metrics.SMAPE + estimate_n_folds: true + stride: 100 + """ + ) + tmp.flush() + yield Path(tmp.name) + tmp.close() + + def test_dummy_run(base_pipeline_yaml_path, base_backtest_yaml_path, base_timeseries_path): tmp_output = TemporaryDirectory() tmp_output_path = Path(tmp_output.name) @@ -82,3 +123,32 @@ def test_forecast_format(base_pipeline_yaml_path, base_backtest_yaml_path, base_ forecast_df = pd.read_csv(tmp_output_path / "forecast.csv") assert all(x in forecast_df.columns for x in ["segment", "timestamp", "target"]) assert len(forecast_df) == 24 + + +@pytest.mark.parametrize( + "backtest_config_path_name,expected", + ( + ("backtest_with_folds_estimation_yaml_path", 24), + ("backtest_with_stride_yaml_path", 1), + ), +) +def test_backtest_estimate_n_folds( + base_pipeline_with_context_size_yaml_path, backtest_config_path_name, base_timeseries_path, expected, request +): + backtest_config_path = request.getfixturevalue(backtest_config_path_name) + + tmp_output = TemporaryDirectory() + tmp_output_path = Path(tmp_output.name) + run( + [ + "etna", + "backtest", + str(base_pipeline_with_context_size_yaml_path), + str(backtest_config_path), + str(base_timeseries_path), + "D", + str(tmp_output_path), + ] + ) + forecast_df = pd.read_csv(tmp_output_path / "forecast.csv") + assert forecast_df["fold_number"].nunique() == expected From 72f8320212d321b056144ef56365dc0eac8f4fae Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 8 Jun 2023 17:12:56 +0300 Subject: [PATCH 4/9] folds estimation for forecast --- etna/commands/forecast_command.py | 42 ++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/etna/commands/forecast_command.py b/etna/commands/forecast_command.py index 4152aecfb..a351171a2 100644 --- a/etna/commands/forecast_command.py +++ b/etna/commands/forecast_command.py @@ -1,3 +1,4 @@ +import warnings from pathlib import Path from typing import Any from typing import Dict @@ -5,6 +6,7 @@ from typing import Optional from typing import Sequence from typing import Union +from typing import cast import hydra_slayer import pandas as pd @@ -12,16 +14,14 @@ from omegaconf import OmegaConf from typing_extensions import Literal +from etna.commands.utils import estimate_max_n_folds +from etna.commands.utils import remove_params from etna.datasets import TSDataset from etna.models.utils import determine_num_steps from etna.pipeline import Pipeline -ADDITIONAL_FORECAST_PARAMETERS = {"start_timestamp"} - - -def get_forecast_call_params(forecast_params: Dict[str, Any]) -> Dict[str, Any]: - """Select `forecast` arguments from params.""" - return {k: v for k, v in forecast_params.items() if k not in ADDITIONAL_FORECAST_PARAMETERS} +ADDITIONAL_FORECAST_PARAMETERS = {"start_timestamp", "estimate_n_folds"} +ADDITIONAL_PIPELINE_PARAMETERS = {"context_size"} def compute_horizon(horizon: int, forecast_params: Dict[str, Any], tsdataset: TSDataset) -> int: @@ -101,6 +101,8 @@ def forecast( ============= =========== =============== =============== """ pipeline_configs = OmegaConf.to_object(OmegaConf.load(config_path)) + pipeline_configs = cast(Dict[str, Any], pipeline_configs) + if forecast_config_path: forecast_params_config = OmegaConf.to_object(OmegaConf.load(forecast_config_path)) else: @@ -124,10 +126,32 @@ def forecast( horizon = compute_horizon(horizon=horizon, forecast_params=forecast_params, tsdataset=tsdataset) pipeline_configs["horizon"] = horizon # type: ignore - forecast_call_args = get_forecast_call_params(forecast_params) - - pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_configs) + pipeline_args = remove_params(params=pipeline_configs, to_remove=ADDITIONAL_PIPELINE_PARAMETERS) + pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_args) pipeline.fit(tsdataset) + + # estimate number of folds if parameters set + if forecast_params.get("estimate_n_folds", False): + if forecast_params.get("prediction_interval", False): + if "context_size" not in pipeline_configs: + raise ValueError("Parameter `context_size` must be set if number of folds estimation enabled!") + + context_size = pipeline_configs["context_size"] + + max_n_folds = estimate_max_n_folds( + pipeline=pipeline, method_name="forecast", context_size=context_size, **forecast_params + ) + + n_folds = min( + max_n_folds, forecast_params.get("n_folds", 5) + ) # use default value of folds if parameter not set + forecast_params["n_folds"] = n_folds + + else: + warnings.warn("Number of folds estimation would be ignored as the current forecast call doesn't use folds!") + + forecast_call_args = remove_params(params=forecast_params, to_remove=ADDITIONAL_FORECAST_PARAMETERS) + forecast = pipeline.forecast(**forecast_call_args) forecast = filter_forecast(forecast_ts=forecast, forecast_params=forecast_params) From f8a7d2d70fb655ed3ca62b03513fa22896337f9b Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 8 Jun 2023 17:13:14 +0300 Subject: [PATCH 5/9] added tests for forecast --- tests/test_commands/conftest.py | 31 --------- tests/test_commands/test_forecast.py | 96 ++++++++++++++++++++++------ 2 files changed, 76 insertions(+), 51 deletions(-) diff --git a/tests/test_commands/conftest.py b/tests/test_commands/conftest.py index 6a7ef929a..e1c48f13f 100644 --- a/tests/test_commands/conftest.py +++ b/tests/test_commands/conftest.py @@ -164,37 +164,6 @@ def base_timeseries_exog_path(): tmp.close() -@pytest.fixture -def base_forecast_omegaconf_path(): - tmp = NamedTemporaryFile("w") - tmp.write( - """ - prediction_interval: true - quantiles: [0.025, 0.975] - n_folds: 3 - """ - ) - tmp.flush() - yield Path(tmp.name) - tmp.close() - - -@pytest.fixture -def start_timestamp_forecast_omegaconf_path(): - tmp = NamedTemporaryFile("w") - tmp.write( - """ - prediction_interval: true - quantiles: [0.025, 0.975] - n_folds: 3 - start_timestamp: "2021-09-10" - """ - ) - tmp.flush() - yield Path(tmp.name) - tmp.close() - - @pytest.fixture def empty_ts(): df = pd.DataFrame({"segment": [], "timestamp": [], "target": []}) diff --git a/tests/test_commands/test_forecast.py b/tests/test_commands/test_forecast.py index f55b5a5de..ab670df7e 100644 --- a/tests/test_commands/test_forecast.py +++ b/tests/test_commands/test_forecast.py @@ -8,10 +8,57 @@ from etna.commands.forecast_command import compute_horizon from etna.commands.forecast_command import filter_forecast -from etna.commands.forecast_command import get_forecast_call_params from etna.datasets import TSDataset +@pytest.fixture +def base_forecast_omegaconf_path(): + tmp = NamedTemporaryFile("w") + tmp.write( + """ + prediction_interval: true + quantiles: [0.025, 0.975] + n_folds: 3 + """ + ) + tmp.flush() + yield Path(tmp.name) + tmp.close() + + +@pytest.fixture +def start_timestamp_forecast_omegaconf_path(): + tmp = NamedTemporaryFile("w") + tmp.write( + """ + prediction_interval: true + quantiles: [0.025, 0.975] + n_folds: 3 + start_timestamp: "2021-09-10" + """ + ) + tmp.flush() + yield Path(tmp.name) + tmp.close() + + +@pytest.fixture +def base_forecast_with_folds_estimation_omegaconf_path(): + tmp = NamedTemporaryFile("w") + tmp.write( + """ + prediction_interval: true + quantiles: [0.025, 0.975] + n_folds: 200 + start_timestamp: "2021-09-10" + estimate_n_folds: true + """ + ) + tmp.flush() + yield Path(tmp.name) + tmp.close() + + def test_dummy_run_with_exog(base_pipeline_yaml_path, base_timeseries_path, base_timeseries_exog_path): tmp_output = NamedTemporaryFile("w") tmp_output_path = Path(tmp_output.name) @@ -128,25 +175,6 @@ def pipeline_dummy_config(): return {"horizon": 3} -@pytest.mark.parametrize( - "params,expected", - ( - ({"start_timestamp": "2021-09-10"}, {}), - ( - {"prediction_interval": True, "n_folds": 3, "start_timestamp": "2021-09-10"}, - {"prediction_interval": True, "n_folds": 3}, - ), - ( - {"prediction_interval": True, "n_folds": 3, "quantiles": [0.025, 0.975]}, - {"prediction_interval": True, "n_folds": 3, "quantiles": [0.025, 0.975]}, - ), - ), -) -def test_get_forecast_call_params(params, expected): - result = get_forecast_call_params(forecast_params=params) - assert result == expected - - @pytest.mark.parametrize("forecast_params", ({"start_timestamp": "2020-04-09"}, {"start_timestamp": "2019-04-10"})) def test_compute_horizon_error(example_tsds, forecast_params, pipeline_dummy_config): with pytest.raises(ValueError, match="Parameter `start_timestamp` should greater than end of training dataset!"): @@ -215,3 +243,31 @@ def test_forecast_start_timestamp( assert len(df_output) == 3 * 2 # 3 predictions for 2 segments assert df_output["timestamp"].min() == "2021-09-10" # start_timestamp assert not np.any(df_output.isna().values) + + +def test_forecast_estimate_n_folds( + base_pipeline_with_context_size_yaml_path, + base_forecast_with_folds_estimation_omegaconf_path, + base_timeseries_path, + base_timeseries_exog_path, +): + tmp_output = NamedTemporaryFile("w") + tmp_output_path = Path(tmp_output.name) + run( + [ + "etna", + "forecast", + str(base_pipeline_with_context_size_yaml_path), + str(base_timeseries_path), + "D", + str(tmp_output_path), + str(base_timeseries_exog_path), + str(base_forecast_with_folds_estimation_omegaconf_path), + ] + ) + df_output = pd.read_csv(tmp_output_path) + + print(df_output) + + assert all(x in df_output.columns for x in ["target_0.025", "target_0.975"]) + assert len(df_output) == 4 * 2 # 4 predictions for 2 segments From d6068b38dbb6b4a6987de01d5bedf9f4600f0adc Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 9 Jun 2023 11:16:45 +0300 Subject: [PATCH 6/9] added docs --- docs/source/commands.rst | 65 ++++++++++++++++++++++++++++ etna/commands/forecast_command.py | 2 +- tests/test_commands/test_forecast.py | 2 - tests/test_commands/test_utils.py | 12 ++--- 4 files changed, 69 insertions(+), 12 deletions(-) diff --git a/docs/source/commands.rst b/docs/source/commands.rst index 69ead83d0..d0e98efa6 100644 --- a/docs/source/commands.rst +++ b/docs/source/commands.rst @@ -28,6 +28,10 @@ Basic ``forecast`` usage: * :code:`n_folds` - number of folds to use in the backtest for prediction interval estimation. By default equals to 3. * :code:`return_components` - whether to estimate forecast components * :code:`start_timestamp` - timestamp with the starting point of forecast. +* :code:`estimate_n_folds` - whether to estimate the number of folds from data. Works only when prediction intervals are enabled. Requires :code:`context_size` parameter set in pipeline config. + +:code:`context_size` is the top level field in pipeline config, +determines minimum number of points in the history that is required by pipeline to produce a forecast. Setting these parameters is optional. Further information on arguments could be found in the documentation of :meth:`~etna.pipeline.pipeline.Pipeline.forecast` method. @@ -63,6 +67,26 @@ Parameter :code:`start_timestamp` could be set similarly: quantiles: [0.025, 0.975] start_timestamp: "2020-01-12" +Example of a pair of configs for number of folds estimation: + +.. code-block:: yaml + + _target_: etna.pipeline.Pipeline + horizon: 4 + model: + _target_: etna.models.CatBoostMultiSegmentModel + transforms: + - _target_: etna.transforms.LinearTrendTransform + in_column: target + - _target_: etna.transforms.SegmentEncoderTransform + context_size: 1 + +.. code-block:: yaml + + prediction_interval: true + quantiles: [0.025, 0.975] + estimate_n_folds: true + **How to prepare data?** Example of dataset with data to forecast: @@ -114,6 +138,24 @@ Basic ``backtest`` usage: [EXOG_PATH] path to csv with exog data [KNOWN_FUTURE] list of all known_future columns (regressor columns). If not specified then all exog_columns considered known_future [default: None] +**Backtest config parameters** + +* :code:`metrics` - list of metrics to compute for each fold. +* :code:`n_folds` - number of folds to use in the backtest for prediction interval estimation. By default equals to 5. +* :code:`mode` - train generation policy: :code:`expand` or :code:`constant`. +* :code:`aggregate_metrics` - if :code:`True` aggregate metrics above folds, return raw metrics otherwise. +* :code:`n_jobs` - number of jobs to run in parallel. +* :code:`refit` - determines how often pipeline should be retrained during iteration over folds. +* :code:`stride` - number of points between folds. +* :code:`joblib_params` - additional parameters for :py:class:`joblib.Parallel` +* :code:`forecast_params` - additional parameters for :meth:`~etna.pipeline.base.BasePipeline.forecast` +* :code:`estimate_n_folds` - whether to estimate the number of folds from data. Requires :code:`context_size` parameter set in pipeline config. + +:code:`context_size` is the top level field in pipeline config, +determines minimum number of points in the history that is required by pipeline to produce a forecast. + +Setting these parameters is optional. +Further information on arguments could be found in the documentation of :meth:`~etna.pipeline.base.BasePipeline.backtest` method. **How to create configs?** @@ -142,6 +184,29 @@ Example of backtest run config: - _target_: etna.metrics.MAPE - _target_: etna.metrics.SMAPE +Example of a pair of configs for number of folds estimation for backtest: + +.. code-block:: yaml + + _target_: etna.pipeline.Pipeline + horizon: 4 + model: + _target_: etna.models.CatBoostMultiSegmentModel + transforms: + - _target_: etna.transforms.LinearTrendTransform + in_column: target + - _target_: etna.transforms.SegmentEncoderTransform + context_size: 1 + +.. code-block:: yaml + + n_folds: 200 + n_jobs: 4 + metrics: + - _target_: etna.metrics.MAE + - _target_: etna.metrics.SMAPE + estimate_n_folds: true + **How to prepare data?** diff --git a/etna/commands/forecast_command.py b/etna/commands/forecast_command.py index a351171a2..d1b863ed3 100644 --- a/etna/commands/forecast_command.py +++ b/etna/commands/forecast_command.py @@ -143,7 +143,7 @@ def forecast( ) n_folds = min( - max_n_folds, forecast_params.get("n_folds", 5) + max_n_folds, forecast_params.get("n_folds", 3) ) # use default value of folds if parameter not set forecast_params["n_folds"] = n_folds diff --git a/tests/test_commands/test_forecast.py b/tests/test_commands/test_forecast.py index ab670df7e..cf0ddaa1e 100644 --- a/tests/test_commands/test_forecast.py +++ b/tests/test_commands/test_forecast.py @@ -267,7 +267,5 @@ def test_forecast_estimate_n_folds( ) df_output = pd.read_csv(tmp_output_path) - print(df_output) - assert all(x in df_output.columns for x in ["target_0.025", "target_0.975"]) assert len(df_output) == 4 * 2 # 4 predictions for 2 segments diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py index be5be1171..53ead7d25 100644 --- a/tests/test_commands/test_utils.py +++ b/tests/test_commands/test_utils.py @@ -273,18 +273,12 @@ def test_estimate_max_n_folds_backtest_with_transforms( {"prediction_interval": True, "n_folds": 3, "quantiles": [0.025, 0.975]}, ), ( - { - "prediction_interval": True, - "n_folds": 3, - "quantiles": [0.025, 0.975], - "estimate_n_folds": True, - "context_size": 3, - }, + {"prediction_interval": True, "estimate_n_folds": True, "start_timestamp": "2021-09-10"}, ADDITIONAL_FORECAST_PARAMETERS, - {"prediction_interval": True, "n_folds": 3, "quantiles": [0.025, 0.975]}, + {"prediction_interval": True}, ), ), ) -def test_get_forecast_call_params(params, to_remove, expected): +def test_remove_params(params, to_remove, expected): result = remove_params(params=params, to_remove=to_remove) assert result == expected From 3222cf92bf759dcf691215b851ea2424cd48761d Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 9 Jun 2023 11:16:59 +0300 Subject: [PATCH 7/9] updated changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c3cec891..2bd4bfe91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Exogenous variables shift transform `ExogShiftTransform`([#1254](https://github.com/tinkoff-ai/etna/pull/1254)) - Parameter `start_timestamp` to forecast CLI command ([#1265](https://github.com/tinkoff-ai/etna/pull/1265)) - Function `estimate_max_n_folds` for folds number estimation ([#1279](https://github.com/tinkoff-ai/etna/pull/1279)) +- Parameters `estimate_n_folds` and `context_size` to forecast and backtest CLI commands ([#1284](https://github.com/tinkoff-ai/etna/pull/1284)) - ### Changed - Set the default value of `final_model` to `LinearRegression(positive=True)` in the constructor of `StackingEnsemble` ([#1238](https://github.com/tinkoff-ai/etna/pull/1238)) From 7e8834c63611aa2f9e65e9ecb6b11f3a16146f6c Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Tue, 13 Jun 2023 11:54:28 +0300 Subject: [PATCH 8/9] added tests --- tests/test_commands/test_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py index 53ead7d25..66df69cab 100644 --- a/tests/test_commands/test_utils.py +++ b/tests/test_commands/test_utils.py @@ -2,7 +2,9 @@ import pytest +from etna.commands.backtest_command import ADDITIONAL_BACKTEST_PARAMETERS from etna.commands.forecast_command import ADDITIONAL_FORECAST_PARAMETERS +from etna.commands.forecast_command import ADDITIONAL_PIPELINE_PARAMETERS from etna.commands.utils import _estimate_n_folds from etna.commands.utils import _max_n_folds_backtest from etna.commands.utils import _max_n_folds_forecast @@ -277,6 +279,16 @@ def test_estimate_max_n_folds_backtest_with_transforms( ADDITIONAL_FORECAST_PARAMETERS, {"prediction_interval": True}, ), + ( + {"n_folds": 2, "n_jobs": 4, "estimate_n_folds": True}, + ADDITIONAL_BACKTEST_PARAMETERS, + {"n_folds": 2, "n_jobs": 4}, + ), + ( + {"_target_": "etna.pipeline.Pipeline", "horizon": 4, "context_size": 1}, + ADDITIONAL_PIPELINE_PARAMETERS, + {"_target_": "etna.pipeline.Pipeline", "horizon": 4}, + ), ), ) def test_remove_params(params, to_remove, expected): From 414ac7122606eb4268934b95e692dc0105404907 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Tue, 13 Jun 2023 13:11:19 +0300 Subject: [PATCH 9/9] updated doc --- docs/source/commands.rst | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/docs/source/commands.rst b/docs/source/commands.rst index d0e98efa6..194a9261c 100644 --- a/docs/source/commands.rst +++ b/docs/source/commands.rst @@ -23,18 +23,20 @@ Basic ``forecast`` usage: **Forecast config parameters** -* :code:`prediction_interval` - whether to estimate prediction interval for forecast. -* :code:`quantiles` - levels of prediction distribution. By default 2.5% and 97.5% are taken to form a 95% prediction interval. -* :code:`n_folds` - number of folds to use in the backtest for prediction interval estimation. By default equals to 3. -* :code:`return_components` - whether to estimate forecast components * :code:`start_timestamp` - timestamp with the starting point of forecast. * :code:`estimate_n_folds` - whether to estimate the number of folds from data. Works only when prediction intervals are enabled. Requires :code:`context_size` parameter set in pipeline config. -:code:`context_size` is the top level field in pipeline config, -determines minimum number of points in the history that is required by pipeline to produce a forecast. +Other parameters that could be set in the configuration file could be found in :meth:`~etna.pipeline.pipeline.Pipeline.forecast` method documentation. Setting these parameters is optional. -Further information on arguments could be found in the documentation of :meth:`~etna.pipeline.pipeline.Pipeline.forecast` method. + + +**Pipeline config parameters** + +* :code:`context_size` - minimum number of points in the history that is required by pipeline to produce a forecast. + +Further information on pipeline parameters could be found in :class:`~etna.pipeline.pipeline.Pipeline` method documentation. + **How to create config?** @@ -140,22 +142,18 @@ Basic ``backtest`` usage: **Backtest config parameters** -* :code:`metrics` - list of metrics to compute for each fold. -* :code:`n_folds` - number of folds to use in the backtest for prediction interval estimation. By default equals to 5. -* :code:`mode` - train generation policy: :code:`expand` or :code:`constant`. -* :code:`aggregate_metrics` - if :code:`True` aggregate metrics above folds, return raw metrics otherwise. -* :code:`n_jobs` - number of jobs to run in parallel. -* :code:`refit` - determines how often pipeline should be retrained during iteration over folds. -* :code:`stride` - number of points between folds. -* :code:`joblib_params` - additional parameters for :py:class:`joblib.Parallel` -* :code:`forecast_params` - additional parameters for :meth:`~etna.pipeline.base.BasePipeline.forecast` * :code:`estimate_n_folds` - whether to estimate the number of folds from data. Requires :code:`context_size` parameter set in pipeline config. -:code:`context_size` is the top level field in pipeline config, -determines minimum number of points in the history that is required by pipeline to produce a forecast. +Other parameters that could be set in the configuration file could be found in :meth:`~etna.pipeline.base.BasePipeline.backtest` method documentation. Setting these parameters is optional. -Further information on arguments could be found in the documentation of :meth:`~etna.pipeline.base.BasePipeline.backtest` method. + + +**Pipeline config parameters** + +* :code:`context_size` - minimum number of points in the history that is required by pipeline to produce a forecast. + +Further information on pipeline parameters could be found in :class:`~etna.pipeline.pipeline.Pipeline` method documentation. **How to create configs?**