From 1b7b639886ffadd782710eb186cb3df12bc6cda2 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 1 Jun 2023 15:09:18 +0300 Subject: [PATCH 01/10] implemented `estimate_max_n_folds` --- etna/commands/utils.py | 112 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 etna/commands/utils.py diff --git a/etna/commands/utils.py b/etna/commands/utils.py new file mode 100644 index 000000000..472b2b7dc --- /dev/null +++ b/etna/commands/utils.py @@ -0,0 +1,112 @@ +from enum import Enum +from math import floor +from typing import Literal +from typing import Optional +from typing import Union + +from etna.datasets import TSDataset +from etna.pipeline import Pipeline + + +class MethodsWithFolds(str, Enum): + """Enum for methods that use `n_folds` argument.""" + + forecast = "forecast" + backtest = "backtest" + + @classmethod + def _missing_(cls, value): + raise ValueError( + f"{value} is not a valid method name. Only {', '.join([repr(m.value) for m in cls])} are allowed" + ) + + +def _estimate_n_folds(num_points: int, horizon: int, stride: int, context_size: int) -> int: + """Estimate number of folds.""" + if num_points < horizon + context_size: + raise ValueError("Not enough data points!") + + res = (num_points - horizon + stride - context_size) / stride + return floor(res) + + +def _max_n_folds_forecast(pipeline: Pipeline, context_size: int, ts: Optional[TSDataset] = None) -> int: + """Estimate max n_folds for forecast method.""" + if ts is None: + if pipeline.ts is None: + raise ValueError( + "There is no ts for forecast method! Pass ts into function or make sure that pipeline is fitted." + ) + + else: + ts = pipeline.ts + + num_points = len(ts.index) + horizon = pipeline.horizon + + return _estimate_n_folds(num_points=num_points, horizon=horizon, stride=horizon, context_size=context_size) + + +def _max_n_folds_backtest(pipeline: Pipeline, context_size: int, ts: TSDataset, **method_kwargs) -> int: + """Estimate max n_folds for backtest method.""" + # process backtest with intervals case + backtest_with_intervals = "forecast_params" in method_kwargs and method_kwargs["forecast_params"].get( + "prediction_interval", False + ) + + if backtest_with_intervals: + raise NotImplementedError("Number of folds estimation for backtest with intervals is not implemented!") + + num_points = len(ts.index) + + horizon = pipeline.horizon + stride = method_kwargs.get("stride", horizon) + + return _estimate_n_folds(num_points=num_points, horizon=horizon, stride=stride, context_size=context_size) + + +def estimate_max_n_folds( + pipeline: Pipeline, + method_name: Union[Literal["forecast"], Literal["backtest"]], + context_size: int, + ts: Optional[TSDataset] = None, + **method_kwargs, +) -> int: + """Estimate number of folds using provided data and pipeline configuration. + + Parameters + ---------- + pipeline: + pipeline for which to estimate number of folds. + method_name: + method name for which to estimate number of folds. + context_size: + minimum number of points for pipeline to be estimated. + ts: + dataset which will be used for estimation. + method_kwargs: + additional arguments for methods that impact number of folds. + + Returns + ------- + : + Number of folds. + """ + if context_size < 1: + raise ValueError("Pipeline `context_size` parameter must be positive integer!") + + if ts is None and method_name != MethodsWithFolds.forecast: + raise ValueError("Parameter `ts` is required when estimating for backtest method") + + if ts is not None and len(ts.index) == 0: + raise ValueError("Empty ts is passed!") + + method = MethodsWithFolds(method_name) + + if method == MethodsWithFolds.forecast: + n_folds = _max_n_folds_forecast(pipeline=pipeline, context_size=context_size, ts=ts) + + else: + n_folds = _max_n_folds_backtest(pipeline=pipeline, context_size=context_size, ts=ts, **method_kwargs) # type: ignore + + return n_folds From 2794967e9cf6783aa8241c6b3e37f66e738a53bf Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 1 Jun 2023 15:15:01 +0300 Subject: [PATCH 02/10] added tests --- tests/test_commands/conftest.py | 8 ++ tests/test_commands/test_utils.py | 232 ++++++++++++++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 tests/test_commands/test_utils.py diff --git a/tests/test_commands/conftest.py b/tests/test_commands/conftest.py index 356be12d5..5917e4150 100644 --- a/tests/test_commands/conftest.py +++ b/tests/test_commands/conftest.py @@ -5,6 +5,7 @@ import pandas as pd import pytest +from etna.datasets import TSDataset from etna.datasets import generate_ar_df @@ -171,3 +172,10 @@ def start_timestamp_forecast_omegaconf_path(): tmp.flush() yield Path(tmp.name) tmp.close() + + +@pytest.fixture +def empty_ts(): + df = pd.DataFrame({"segment": [], "timestamp": [], "target": []}) + df = TSDataset.to_dataset(df=df) + return TSDataset(df=df, freq="D") diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py new file mode 100644 index 000000000..f0177cf5f --- /dev/null +++ b/tests/test_commands/test_utils.py @@ -0,0 +1,232 @@ +import pytest + +from etna.commands.utils import _estimate_n_folds +from etna.commands.utils import _max_n_folds_backtest +from etna.commands.utils import _max_n_folds_forecast +from etna.commands.utils import estimate_max_n_folds +from etna.metrics import MAE +from etna.models import HoltWintersModel +from etna.models import LinearPerSegmentModel +from etna.models import SeasonalMovingAverageModel +from etna.pipeline import Pipeline +from etna.transforms import DensityOutliersTransform +from etna.transforms import DifferencingTransform +from etna.transforms import LagTransform +from etna.transforms import MeanTransform + + +def run_forecast_test(pipeline, context_size, ts, expected): + pipeline.fit(ts=ts) + + n_folds = estimate_max_n_folds(pipeline=pipeline, method_name="forecast", context_size=context_size) + + assert n_folds == expected + pipeline.forecast(prediction_interval=True, n_folds=n_folds) + + +def run_backtest_test(pipeline, context_size, ts, stride, expected): + n_folds = estimate_max_n_folds( + pipeline=pipeline, ts=ts, method_name="backtest", stride=stride, context_size=context_size + ) + + assert n_folds == expected + pipeline.backtest(ts=ts, metrics=[MAE()], n_folds=n_folds, stride=stride) + + +@pytest.fixture +def pipeline_with_context(request): + if hasattr(request, "param"): + horizon = request.param["horizon"] + window = request.param["window"] + else: + horizon = 1 + window = 1 + + pipeline = Pipeline(transforms=[], model=SeasonalMovingAverageModel(seasonality=1, window=window), horizon=horizon) + return pipeline + + +@pytest.fixture +def pipeline_without_context(request): + horizon = request.param if hasattr(request, "param") else 1 + pipeline = Pipeline(transforms=[], model=HoltWintersModel(), horizon=horizon) + return pipeline + + +@pytest.fixture +def pipeline_with_transforms(): + transforms = [ + LagTransform(in_column="target", lags=[14, 17]), + DifferencingTransform(in_column="target"), + MeanTransform(in_column="target", window=7), + DensityOutliersTransform(in_column="target"), + ] + + pipeline = Pipeline(transforms=transforms, model=LinearPerSegmentModel(), horizon=14) + return pipeline + + +@pytest.mark.parametrize( + "num_points, horizon, stride, context_size, expected", + ( + (13, 2, 2, 2, 5), + (13, 2, 1, 2, 10), + (13, 2, 2, 1, 6), + (13, 2, 1, 1, 11), + (13, 1, 1, 1, 12), + (13, 4, 4, 6, 1), + (13, 4, 1, 6, 4), + (10, 5, 1, 5, 1), + (10, 5, 5, 5, 1), + ), +) +def test_estimate_n_folds(num_points, horizon, stride, context_size, expected): + res = _estimate_n_folds(num_points=num_points, horizon=horizon, stride=stride, context_size=context_size) + assert res == expected + + +def test_estimate_n_folds_not_enough_points(num_points=10, horizon=7, stride=1, context_size=5): + with pytest.raises(ValueError, match="Not enough data points!"): + _ = _estimate_n_folds(num_points=num_points, horizon=horizon, stride=stride, context_size=context_size) + + +def test_estimate_n_folds_forecast_no_ts(pipeline_without_context): + with pytest.raises(ValueError, match="There is no ts for forecast method!"): + _ = _max_n_folds_forecast(pipeline=pipeline_without_context, ts=None, context_size=1) + + +def test_estimate_n_folds_backtest_no_ts(pipeline_without_context): + with pytest.raises(ValueError, match="Parameter `ts` is required when estimating for backtest method"): + _ = estimate_max_n_folds(pipeline=pipeline_without_context, method_name="backtest", context_size=1) + + +def test_estimate_n_folds_backtest_intervals_error(pipeline_without_context, example_tsds): + with pytest.raises( + NotImplementedError, match="Number of folds estimation for backtest with intervals is not implemented!" + ): + _ = _max_n_folds_backtest( + pipeline=pipeline_without_context, + ts=example_tsds, + forecast_params={"prediction_interval": True}, + context_size=1, + ) + + +def test_estimate_max_n_folds_invalid_method_name(pipeline_without_context, example_tsds, method_name="fit"): + with pytest.raises(ValueError, match="fit is not a valid method name."): + _ = estimate_max_n_folds( + pipeline=pipeline_without_context, ts=example_tsds, method_name=method_name, context_size=1 + ) + + +def test_estimate_max_n_folds_empty_ts(pipeline_without_context, empty_ts): + with pytest.raises(ValueError, match="Empty ts is passed!"): + _ = estimate_max_n_folds(pipeline=pipeline_without_context, ts=empty_ts, method_name="forecast", context_size=1) + + +def test_estimate_max_n_folds_negative_context(pipeline_without_context, example_tsds): + with pytest.raises(ValueError, match="Pipeline `context_size` parameter must be positive integer!"): + _ = estimate_max_n_folds( + pipeline=pipeline_without_context, ts=example_tsds, method_name="forecast", context_size=-1 + ) + + +@pytest.mark.parametrize( + "pipeline_without_context,context_size,ts_name,expected", + ( + (1, 3, "example_tsds", 97), + (4, 3, "example_tsds", 24), + (13, 3, "example_tsds", 7), + (97, 3, "example_tsds", 1), + (40, 3, "ts_with_different_series_length", 18), + ), + indirect=["pipeline_without_context"], +) +def test_estimate_max_n_folds_forecast_no_context(pipeline_without_context, context_size, ts_name, expected, request): + ts = request.getfixturevalue(ts_name) + run_forecast_test(pipeline=pipeline_without_context, ts=ts, expected=expected, context_size=context_size) + + +@pytest.mark.parametrize( + "pipeline_with_context,context_size,ts_name,expected", + ( + ({"horizon": 1, "window": 1}, 1, "example_tsds", 99), + ({"horizon": 1, "window": 2}, 2, "example_tsds", 98), + ({"horizon": 13, "window": 10}, 10, "example_tsds", 6), + ({"horizon": 10, "window": 1}, 1, "ts_with_different_series_length", 74), + ), + indirect=["pipeline_with_context"], +) +def test_estimate_max_n_folds_forecast_with_context(pipeline_with_context, context_size, ts_name, expected, request): + ts = request.getfixturevalue(ts_name) + run_forecast_test(pipeline=pipeline_with_context, context_size=context_size, ts=ts, expected=expected) + + +@pytest.mark.parametrize( + "context_size,ts_name,expected", + ( + (18, "example_tsds", 5), + (18, "ts_with_different_series_length", 51), + ), +) +def test_estimate_max_n_folds_forecast_with_transforms( + pipeline_with_transforms, context_size, ts_name, expected, request +): + ts = request.getfixturevalue(ts_name) + run_forecast_test(pipeline=pipeline_with_transforms, ts=ts, expected=expected, context_size=context_size) + + +@pytest.mark.parametrize( + "pipeline_without_context,context_size,stride,ts_name,expected", + ( + (4, 3, 8, "example_tsds", 12), + (13, 3, 13, "example_tsds", 7), + (13, 3, 3, "example_tsds", 29), + (97, 3, 3, "example_tsds", 1), + (40, 3, 60, "ts_with_different_series_length", 12), + ), + indirect=["pipeline_without_context"], +) +def test_estimate_max_n_folds_backtest_no_context( + pipeline_without_context, context_size, stride, ts_name, expected, request +): + ts = request.getfixturevalue(ts_name) + run_backtest_test( + pipeline=pipeline_without_context, context_size=context_size, ts=ts, stride=stride, expected=expected + ) + + +@pytest.mark.parametrize( + "pipeline_with_context,context_size,stride,ts_name,expected", + ( + ({"horizon": 1, "window": 1}, 1, 8, "example_tsds", 13), + ({"horizon": 5, "window": 8}, 8, 13, "example_tsds", 7), + ({"horizon": 13, "window": 7}, 7, 3, "example_tsds", 27), + ({"horizon": 13, "window": 60}, 60, 40, "ts_with_different_series_length", 17), + ), + indirect=["pipeline_with_context"], +) +def test_estimate_max_n_folds_backtest_with_context( + pipeline_with_context, context_size, stride, ts_name, expected, request +): + ts = request.getfixturevalue(ts_name) + run_backtest_test( + pipeline=pipeline_with_context, context_size=context_size, ts=ts, stride=stride, expected=expected + ) + + +@pytest.mark.parametrize( + "context_size,stride,ts_name,expected", + ( + (18, 1, "example_tsds", 69), + (18, 14, "example_tsds", 5), + (18, 60, "ts_with_different_series_length", 12), + ), +) +def test_estimate_max_n_folds_backtest_with_transforms( + pipeline_with_transforms, context_size, stride, ts_name, expected, request +): + ts = request.getfixturevalue(ts_name) + run_backtest_test( + pipeline=pipeline_with_transforms, context_size=context_size, ts=ts, stride=stride, expected=expected + ) From 5e673d102b83706bfd03f13d4194545f1c17eacd Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 2 Jun 2023 16:16:10 +0300 Subject: [PATCH 03/10] updated doc --- etna/commands/utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/etna/commands/utils.py b/etna/commands/utils.py index 472b2b7dc..6e7f07a56 100644 --- a/etna/commands/utils.py +++ b/etna/commands/utils.py @@ -73,19 +73,21 @@ def estimate_max_n_folds( **method_kwargs, ) -> int: """Estimate number of folds using provided data and pipeline configuration. + This function helps to estimate maximum number of folds that can be used when performing + forecast with intervals or pipeline backtest. Parameters ---------- pipeline: - pipeline for which to estimate number of folds. + Pipeline for which to estimate number of folds. method_name: - method name for which to estimate number of folds. + Method name for which to estimate number of folds. context_size: - minimum number of points for pipeline to be estimated. + Minimum number of points for pipeline to be estimated. ts: - dataset which will be used for estimation. + Dataset which will be used for estimation. method_kwargs: - additional arguments for methods that impact number of folds. + Additional arguments for methods that impact number of folds. Returns ------- From a67acc9011d3989f1a5785569f2d0f59593f8da6 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 2 Jun 2023 16:16:44 +0300 Subject: [PATCH 04/10] removed check --- etna/commands/utils.py | 3 --- tests/test_commands/test_utils.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/etna/commands/utils.py b/etna/commands/utils.py index 6e7f07a56..0efbe19e3 100644 --- a/etna/commands/utils.py +++ b/etna/commands/utils.py @@ -100,9 +100,6 @@ def estimate_max_n_folds( if ts is None and method_name != MethodsWithFolds.forecast: raise ValueError("Parameter `ts` is required when estimating for backtest method") - if ts is not None and len(ts.index) == 0: - raise ValueError("Empty ts is passed!") - method = MethodsWithFolds(method_name) if method == MethodsWithFolds.forecast: diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py index f0177cf5f..fd84d03d2 100644 --- a/tests/test_commands/test_utils.py +++ b/tests/test_commands/test_utils.py @@ -120,7 +120,7 @@ def test_estimate_max_n_folds_invalid_method_name(pipeline_without_context, exam def test_estimate_max_n_folds_empty_ts(pipeline_without_context, empty_ts): - with pytest.raises(ValueError, match="Empty ts is passed!"): + with pytest.raises(ValueError, match="Not enough data points!"): _ = estimate_max_n_folds(pipeline=pipeline_without_context, ts=empty_ts, method_name="forecast", context_size=1) From 174eb9b3cdca3f5c33e8eeb313d7a1d911aad7e6 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 2 Jun 2023 16:17:18 +0300 Subject: [PATCH 05/10] renamed checks --- tests/test_commands/test_utils.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py index fd84d03d2..b4765c82f 100644 --- a/tests/test_commands/test_utils.py +++ b/tests/test_commands/test_utils.py @@ -15,7 +15,7 @@ from etna.transforms import MeanTransform -def run_forecast_test(pipeline, context_size, ts, expected): +def run_estimate_max_n_folds_forecast_test(pipeline, context_size, ts, expected): pipeline.fit(ts=ts) n_folds = estimate_max_n_folds(pipeline=pipeline, method_name="forecast", context_size=context_size) @@ -24,7 +24,7 @@ def run_forecast_test(pipeline, context_size, ts, expected): pipeline.forecast(prediction_interval=True, n_folds=n_folds) -def run_backtest_test(pipeline, context_size, ts, stride, expected): +def run_estimate_max_n_folds_backtest_test(pipeline, context_size, ts, stride, expected): n_folds = estimate_max_n_folds( pipeline=pipeline, ts=ts, method_name="backtest", stride=stride, context_size=context_size ) @@ -144,7 +144,9 @@ def test_estimate_max_n_folds_negative_context(pipeline_without_context, example ) def test_estimate_max_n_folds_forecast_no_context(pipeline_without_context, context_size, ts_name, expected, request): ts = request.getfixturevalue(ts_name) - run_forecast_test(pipeline=pipeline_without_context, ts=ts, expected=expected, context_size=context_size) + run_estimate_max_n_folds_forecast_test( + pipeline=pipeline_without_context, ts=ts, expected=expected, context_size=context_size + ) @pytest.mark.parametrize( @@ -159,7 +161,9 @@ def test_estimate_max_n_folds_forecast_no_context(pipeline_without_context, cont ) def test_estimate_max_n_folds_forecast_with_context(pipeline_with_context, context_size, ts_name, expected, request): ts = request.getfixturevalue(ts_name) - run_forecast_test(pipeline=pipeline_with_context, context_size=context_size, ts=ts, expected=expected) + run_estimate_max_n_folds_forecast_test( + pipeline=pipeline_with_context, context_size=context_size, ts=ts, expected=expected + ) @pytest.mark.parametrize( @@ -173,7 +177,9 @@ def test_estimate_max_n_folds_forecast_with_transforms( pipeline_with_transforms, context_size, ts_name, expected, request ): ts = request.getfixturevalue(ts_name) - run_forecast_test(pipeline=pipeline_with_transforms, ts=ts, expected=expected, context_size=context_size) + run_estimate_max_n_folds_forecast_test( + pipeline=pipeline_with_transforms, ts=ts, expected=expected, context_size=context_size + ) @pytest.mark.parametrize( @@ -191,7 +197,7 @@ def test_estimate_max_n_folds_backtest_no_context( pipeline_without_context, context_size, stride, ts_name, expected, request ): ts = request.getfixturevalue(ts_name) - run_backtest_test( + run_estimate_max_n_folds_backtest_test( pipeline=pipeline_without_context, context_size=context_size, ts=ts, stride=stride, expected=expected ) @@ -210,7 +216,7 @@ def test_estimate_max_n_folds_backtest_with_context( pipeline_with_context, context_size, stride, ts_name, expected, request ): ts = request.getfixturevalue(ts_name) - run_backtest_test( + run_estimate_max_n_folds_backtest_test( pipeline=pipeline_with_context, context_size=context_size, ts=ts, stride=stride, expected=expected ) @@ -227,6 +233,6 @@ def test_estimate_max_n_folds_backtest_with_transforms( pipeline_with_transforms, context_size, stride, ts_name, expected, request ): ts = request.getfixturevalue(ts_name) - run_backtest_test( + run_estimate_max_n_folds_backtest_test( pipeline=pipeline_with_transforms, context_size=context_size, ts=ts, stride=stride, expected=expected ) From 37a9de4028104f37a55ba396781f59b4ee192b4a Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 2 Jun 2023 16:17:35 +0300 Subject: [PATCH 06/10] added test --- tests/test_commands/test_utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py index b4765c82f..973d61762 100644 --- a/tests/test_commands/test_utils.py +++ b/tests/test_commands/test_utils.py @@ -1,3 +1,5 @@ +from copy import deepcopy + import pytest from etna.commands.utils import _estimate_n_folds @@ -131,6 +133,22 @@ def test_estimate_max_n_folds_negative_context(pipeline_without_context, example ) +def test_estimate_max_n_folds_forecast_with_ts(pipeline_without_context, example_tsds, context_size=3, expected=7): + pipeline = pipeline_without_context + + pipeline.fit(ts=example_tsds) + + ts_to_forecast = deepcopy(example_tsds) + ts_to_forecast.df = ts_to_forecast.df.iloc[-(context_size + expected) :] + + n_folds = estimate_max_n_folds( + pipeline=pipeline, method_name="forecast", ts=ts_to_forecast, context_size=context_size + ) + + assert n_folds == expected + pipeline.forecast(ts=ts_to_forecast, prediction_interval=True, n_folds=n_folds) + + @pytest.mark.parametrize( "pipeline_without_context,context_size,ts_name,expected", ( From 746d3bed3a7df027f9b2de744981cd10885a0e56 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 2 Jun 2023 16:20:06 +0300 Subject: [PATCH 07/10] updated changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b16e37525..61015e359 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Notebook `forecast_interpretation.ipynb` with forecast decomposition ([#1220](https://github.com/tinkoff-ai/etna/pull/1220)) - Exogenous variables shift transform `ExogShiftTransform`([#1254](https://github.com/tinkoff-ai/etna/pull/1254)) - Parameter `start_timestamp` to forecast CLI command ([#1265](https://github.com/tinkoff-ai/etna/pull/1265)) -- +- Function `estimate_max_n_folds` for folds number estimation ([#1279](https://github.com/tinkoff-ai/etna/pull/1279)) +- ### Changed - Set the default value of `final_model` to `LinearRegression(positive=True)` in the constructor of `StackingEnsemble` ([#1238](https://github.com/tinkoff-ai/etna/pull/1238)) - Add microseconds to `FileLogger`'s directory name ([#1264](https://github.com/tinkoff-ai/etna/pull/1264)) From b05c2fbc41da1c4f2cfff7e4ca70aa01aed57c6a Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Tue, 6 Jun 2023 11:41:04 +0300 Subject: [PATCH 08/10] updated docs --- etna/commands/utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/etna/commands/utils.py b/etna/commands/utils.py index 0efbe19e3..e68e5bdc3 100644 --- a/etna/commands/utils.py +++ b/etna/commands/utils.py @@ -73,8 +73,18 @@ def estimate_max_n_folds( **method_kwargs, ) -> int: """Estimate number of folds using provided data and pipeline configuration. + This function helps to estimate maximum number of folds that can be used when performing - forecast with intervals or pipeline backtest. + forecast with intervals or pipeline backtest. Number of folds estimated using the following formula: + + .. math:: + max\\_n\\_folds = \\left\\lfloor\\frac{num\\_points - horizon + stride - context\\_size}{stride}\\right\\rfloor, + + where :math:`num\\_points` is number of points in the dataset, + :math:`horizon` is length of forecasting horizon, + :math:`stride` is number of points between folds, + :math:`context\\_size` is model context size. + Parameters ---------- @@ -106,6 +116,7 @@ def estimate_max_n_folds( n_folds = _max_n_folds_forecast(pipeline=pipeline, context_size=context_size, ts=ts) else: + # ts always not None for backtest case n_folds = _max_n_folds_backtest(pipeline=pipeline, context_size=context_size, ts=ts, **method_kwargs) # type: ignore return n_folds From e18ea544b6b3be39d4ba838487b45cc0d430a3ef Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Tue, 6 Jun 2023 11:41:20 +0300 Subject: [PATCH 09/10] renamed test --- tests/test_commands/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py index 973d61762..559af4f43 100644 --- a/tests/test_commands/test_utils.py +++ b/tests/test_commands/test_utils.py @@ -82,7 +82,7 @@ def pipeline_with_transforms(): (10, 5, 5, 5, 1), ), ) -def test_estimate_n_folds(num_points, horizon, stride, context_size, expected): +def test_private_estimate_n_folds(num_points, horizon, stride, context_size, expected): res = _estimate_n_folds(num_points=num_points, horizon=horizon, stride=stride, context_size=context_size) assert res == expected From 7b0802e445399b52d9cc311d9baff35741560874 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Tue, 6 Jun 2023 12:48:54 +0300 Subject: [PATCH 10/10] fixed doc --- etna/commands/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/etna/commands/utils.py b/etna/commands/utils.py index e68e5bdc3..a97daa457 100644 --- a/etna/commands/utils.py +++ b/etna/commands/utils.py @@ -83,7 +83,7 @@ def estimate_max_n_folds( where :math:`num\\_points` is number of points in the dataset, :math:`horizon` is length of forecasting horizon, :math:`stride` is number of points between folds, - :math:`context\\_size` is model context size. + :math:`context\\_size` is pipeline context size. Parameters @@ -91,13 +91,13 @@ def estimate_max_n_folds( pipeline: Pipeline for which to estimate number of folds. method_name: - Method name for which to estimate number of folds. + Method name for which to estimate number of folds. context_size: - Minimum number of points for pipeline to be estimated. + Minimum number of points for pipeline to be estimated. ts: - Dataset which will be used for estimation. + Dataset which will be used for estimation. method_kwargs: - Additional arguments for methods that impact number of folds. + Additional arguments for methods that impact number of folds. Returns -------