From c25d06ecb36292e160d751b22735f60efdcce961 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Wed, 29 Mar 2023 14:45:26 +0300 Subject: [PATCH 1/9] tolerance comparison --- etna/datasets/tsdataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index e85d06957..b85f08917 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -1166,7 +1166,7 @@ def add_target_components(self, target_components_df: pd.DataFrame): ) components_sum = target_components_df.sum(axis=1, level="segment") - if not np.array_equal(components_sum.values, self[..., "target"].values): + if not np.allclose(components_sum.values, self[..., "target"].values): raise ValueError("Components don't sum up to target!") self._target_components_names = components_names From 28bebbedc81040c95d4a7b4776d8c51c0253bc19 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Wed, 29 Mar 2023 14:45:51 +0300 Subject: [PATCH 2/9] prediction decomposition --- etna/models/deadline_ma.py | 100 +++++++++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 15 deletions(-) diff --git a/etna/models/deadline_ma.py b/etna/models/deadline_ma.py index 58c2cc468..50c09e098 100644 --- a/etna/models/deadline_ma.py +++ b/etna/models/deadline_ma.py @@ -1,6 +1,7 @@ import warnings from enum import Enum from typing import Optional +from typing import Tuple import numpy as np import pandas as pd @@ -25,7 +26,12 @@ def _missing_(cls, value): class DeadlineMovingAverageModel( NonPredictionIntervalContextRequiredAbstractModel, ): - """Moving average model that uses exact previous dates to predict.""" + """Moving average model that uses exact previous dates to predict. + + Notes + _____ + This model supports in-sample and out-of-sample prediction decomposition. + """ def __init__(self, window: int = 3, seasonality: str = "month"): """Initialize deadline moving average model. @@ -156,6 +162,47 @@ def _get_context_beginning( return first_index + def _get_previous_date(self, date, offset): + """Get previous date using seasonality offset.""" + if self.seasonality == SeasonalityMode.month: + prev_date = date - pd.DateOffset(months=offset) + elif self.seasonality == SeasonalityMode.year: + prev_date = date - pd.DateOffset(years=offset) + + return prev_date + + def _make_prediction_components( + self, result_template: pd.DataFrame, context: pd.DataFrame, prediction_size: int + ) -> pd.DataFrame: + """Estimate prediction components using ``result_template`` as a base and ``context`` as a context.""" + index = result_template.index + end_idx = len(result_template) + start_idx = end_idx - prediction_size + + components_data = [] + for i in range(start_idx, end_idx): + + obs_components = [] + for w in range(1, self.window + 1): + prev_date = self._get_previous_date(date=result_template.index[i], offset=w) + obs_components.append(context.loc[prev_date].values) + + components_data.append(obs_components) + + raw_components = np.asarray(components_data, dtype=float) + raw_components = np.swapaxes(raw_components, -1, -2) + raw_components = raw_components.reshape(raw_components.shape[0], -1) + raw_components /= self.window + + components = pd.DataFrame(data=raw_components) + components.index = index[start_idx:end_idx] + components_names = [f"target_component_{self.seasonality.name}_lag_{w}" for w in range(1, self.window + 1)] + components.columns = pd.MultiIndex.from_product( + [context.columns.get_level_values("segment"), components_names], names=("segment", "feature") + ) + + return components + def _make_predictions( self, result_template: pd.DataFrame, context: pd.DataFrame, prediction_size: int ) -> np.ndarray: @@ -165,10 +212,7 @@ def _make_predictions( end_idx = len(result_template) for i in range(start_idx, end_idx): for w in range(1, self.window + 1): - if self.seasonality == SeasonalityMode.month: - prev_date = result_template.index[i] - pd.DateOffset(months=w) - elif self.seasonality == SeasonalityMode.year: - prev_date = result_template.index[i] - pd.DateOffset(years=w) + prev_date = self._get_previous_date(date=result_template.index[i], offset=w) result_template.loc[index[i]] += context.loc[prev_date] @@ -177,7 +221,9 @@ def _make_predictions( result_values = result_template.values[-prediction_size:] return result_values - def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame: + def _forecast( + self, df: pd.DataFrame, prediction_size: int, return_components: bool = False + ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Make autoregressive forecasts on a wide dataframe.""" context_beginning = self._get_context_beginning( df=df, prediction_size=prediction_size, seasonality=self.seasonality, window=self.window @@ -200,7 +246,14 @@ def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame: df = df.iloc[-prediction_size:] y_pred = result_values[-prediction_size:] df.loc[:, pd.IndexSlice[:, "target"]] = y_pred - return df + + components = None + if return_components: + components = self._make_prediction_components( + result_template=result_template, context=result_template, prediction_size=prediction_size + ) + + return df, components def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset: """Make autoregressive forecasts. @@ -231,16 +284,22 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool ValueError: if forecast context contains NaNs """ - if return_components: - raise NotImplementedError("This mode isn't currently implemented!") self._validate_fitted() df = ts.to_pandas() - new_df = self._forecast(df=df, prediction_size=prediction_size) + new_df, target_components = self._forecast( + df=df, prediction_size=prediction_size, return_components=return_components + ) ts.df = new_df + + if return_components: + ts.add_target_components(target_components_df=target_components) + return ts - def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame: + def _predict( + self, df: pd.DataFrame, prediction_size: int, return_components: bool = False + ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Make predictions on a wide dataframe using true values as autoregression context.""" context_beginning = self._get_context_beginning( df=df, prediction_size=prediction_size, seasonality=self.seasonality, window=self.window @@ -261,7 +320,14 @@ def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame: df = df.iloc[-prediction_size:] y_pred = result_values[-prediction_size:] df.loc[:, pd.IndexSlice[:, "target"]] = y_pred - return df + + components = None + if return_components: + components = self._make_prediction_components( + result_template=result_template, context=context, prediction_size=prediction_size + ) + + return df, components def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset: """Make predictions using true values as autoregression context (teacher forcing). @@ -292,13 +358,17 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = ValueError: if forecast context contains NaNs """ - if return_components: - raise NotImplementedError("This mode isn't currently implemented!") self._validate_fitted() df = ts.to_pandas() - new_df = self._predict(df=df, prediction_size=prediction_size) + new_df, target_components = self._predict( + df=df, prediction_size=prediction_size, return_components=return_components + ) ts.df = new_df + + if return_components: + ts.add_target_components(target_components_df=target_components) + return ts From 6feb708d6b12260a0d13a936f878430ef623f08e Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Wed, 29 Mar 2023 14:46:06 +0300 Subject: [PATCH 3/9] added tests --- tests/test_models/test_simple_models.py | 65 +++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/tests/test_models/test_simple_models.py b/tests/test_models/test_simple_models.py index 3d30231de..11df035bb 100644 --- a/tests/test_models/test_simple_models.py +++ b/tests/test_models/test_simple_models.py @@ -55,6 +55,26 @@ def df(): return tsds +@pytest.fixture() +def long_periodic_ts(): + history = 400 + + df1 = pd.DataFrame() + df1["target"] = np.sin(np.arange(history)) + df1["segment"] = "A" + df1["timestamp"] = pd.date_range(start="2020-01-01", periods=history) + + df2 = df1.copy() + df2["segment"] = "B" + df2["target"] *= 4 + + df = pd.concat([df1, df2]).reset_index(drop=True) + df = TSDataset.to_dataset(df) + ts = TSDataset(df, freq="D") + + return ts + + @pytest.mark.parametrize("model", [SeasonalMovingAverageModel, NaiveModel, MovingAverageModel]) def test_sma_model_forecast(simple_df, model): _check_forecast(ts=simple_df, model=model(), horizon=7) @@ -729,3 +749,48 @@ def test_deadline_model_forecast_correct_with_big_horizons(two_month_ts): ) def test_save_load(model, example_tsds): assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=[], horizon=3) + + +@pytest.mark.parametrize("method", ("predict", "forecast")) +@pytest.mark.parametrize( + "window,seasonality,expected_components_names", + ( + (1, "month", ["target_component_month_lag_1"]), + (3, "month", ["target_component_month_lag_1", "target_component_month_lag_2", "target_component_month_lag_3"]), + (1, "year", ["target_component_year_lag_1"]), + ), +) +def test_deadline_ma_predict_components_correct_names( + long_periodic_ts, method, window, seasonality, expected_components_names, horizon=10 +): + model = DeadlineMovingAverageModel(window=window, seasonality=seasonality) + model.fit(ts=long_periodic_ts) + + method_to_call = getattr(model, method) + forecast = method_to_call(ts=long_periodic_ts, prediction_size=horizon, return_components=True) + + assert sorted(forecast.target_components_names) == sorted(expected_components_names) + + +@pytest.mark.parametrize("method", ("predict", "forecast")) +@pytest.mark.parametrize( + "window,seasonality,expected_components_names", + ( + (1, "month", ["target_component_month_lag_1"]), + (3, "month", ["target_component_month_lag_1", "target_component_month_lag_2", "target_component_month_lag_3"]), + (1, "year", ["target_component_year_lag_1"]), + ), +) +def test_deadline_ma_predict_components_sum_up_to_target( + long_periodic_ts, method, window, seasonality, expected_components_names, horizon=10 +): + model = DeadlineMovingAverageModel(window=window, seasonality=seasonality) + model.fit(ts=long_periodic_ts) + + method_to_call = getattr(model, method) + forecast = method_to_call(ts=long_periodic_ts, prediction_size=horizon, return_components=True) + + target = forecast.to_pandas(features=["target"]) + components = forecast.get_target_components() + + np.testing.assert_allclose(target.values, components.sum(axis=1, level="segment").values) From ba18c9a34c801d4eb43c054afd5e027c353c0bcf Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Wed, 29 Mar 2023 14:48:58 +0300 Subject: [PATCH 4/9] updated note --- etna/models/deadline_ma.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/etna/models/deadline_ma.py b/etna/models/deadline_ma.py index 50c09e098..d51b33e6a 100644 --- a/etna/models/deadline_ma.py +++ b/etna/models/deadline_ma.py @@ -31,6 +31,8 @@ class DeadlineMovingAverageModel( Notes _____ This model supports in-sample and out-of-sample prediction decomposition. + Prediction components are corresponding target seasonal lags (monthly or annual) + with weights of :math:`1/window`. """ def __init__(self, window: int = 3, seasonality: str = "month"): From 83db9b70cc5e7e9e7ad9e2b6cf2d1811a404900a Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Wed, 29 Mar 2023 15:00:52 +0300 Subject: [PATCH 5/9] updated changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d6085ccc..61eba017c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,7 +32,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add tests on `inverse_transform` method of transforms on subset of segments, on new segments, on future with gap ([#1127](https://github.com/tinkoff-ai/etna/pull/1127)) - In-sample prediction for `BATSModel` and `TBATSModel` ([#1181](https://github.com/tinkoff-ai/etna/pull/1181)) - Method `predict_components` for forecast decomposition in `_TBATSAdapter` ([#1181](https://github.com/tinkoff-ai/etna/pull/1181)) -- +- Forecast decomposition for `DeadlineMovingAverageModel`([#1186](https://github.com/tinkoff-ai/etna/pull/1186)) +- ### Changed - Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809)) - Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110)) From b97a2f543bc388ad68907debd140f82e91a4228f Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 30 Mar 2023 11:42:11 +0300 Subject: [PATCH 6/9] added test --- tests/test_models/test_simple_models.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_models/test_simple_models.py b/tests/test_models/test_simple_models.py index bf12cc638..b6fe4ebd9 100644 --- a/tests/test_models/test_simple_models.py +++ b/tests/test_models/test_simple_models.py @@ -839,3 +839,19 @@ def test_deadline_ma_predict_components_sum_up_to_target( components = forecast.get_target_components() np.testing.assert_allclose(target.values, components.sum(axis=1, level="segment").values) + + +@pytest.mark.parametrize( + "method_name, expected_values", + (("forecast", [[15, 2], [16, 4], [17, 6]]), ("predict", [[15, 2], [16, 4], [17, 6]])), +) +def test_deadline_ma_predict_components_correct( + simple_df, method_name, expected_values, window=1, seasonality="month", horizon=3 +): + model = DeadlineMovingAverageModel(window=window, seasonality=seasonality) + model.fit(simple_df) + to_call = getattr(model, method_name) + forecast = to_call(ts=simple_df, prediction_size=horizon, return_components=True) + + target_components_df = forecast.get_target_components() + np.testing.assert_allclose(target_components_df.values, expected_values) From 761ca704a0d8b2c01f9f1d0ee24f99d6eda69e3c Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 30 Mar 2023 11:42:29 +0300 Subject: [PATCH 7/9] review fixes --- etna/models/deadline_ma.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/etna/models/deadline_ma.py b/etna/models/deadline_ma.py index d51b33e6a..a02c91bdd 100644 --- a/etna/models/deadline_ma.py +++ b/etna/models/deadline_ma.py @@ -191,19 +191,25 @@ def _make_prediction_components( components_data.append(obs_components) + # shape: (prediction_size, window, num_segments) raw_components = np.asarray(components_data, dtype=float) + + # shape: (prediction_size, num_segments, window) + # this is needed to place elements in the right order raw_components = np.swapaxes(raw_components, -1, -2) + + # shape: (prediction_size, num_segments * window) raw_components = raw_components.reshape(raw_components.shape[0], -1) raw_components /= self.window - components = pd.DataFrame(data=raw_components) - components.index = index[start_idx:end_idx] components_names = [f"target_component_{self.seasonality.name}_lag_{w}" for w in range(1, self.window + 1)] - components.columns = pd.MultiIndex.from_product( - [context.columns.get_level_values("segment"), components_names], names=("segment", "feature") - ) - return components + segment_names = context.columns.get_level_values("segment") + column_names = pd.MultiIndex.from_product([segment_names, components_names], names=("segment", "feature")) + + target_components_df = pd.DataFrame(data=raw_components, columns=column_names, index=index[start_idx:end_idx]) + + return target_components_df def _make_predictions( self, result_template: pd.DataFrame, context: pd.DataFrame, prediction_size: int @@ -249,13 +255,13 @@ def _forecast( y_pred = result_values[-prediction_size:] df.loc[:, pd.IndexSlice[:, "target"]] = y_pred - components = None + target_components_df = None if return_components: - components = self._make_prediction_components( + target_components_df = self._make_prediction_components( result_template=result_template, context=result_template, prediction_size=prediction_size ) - return df, components + return df, target_components_df def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset: """Make autoregressive forecasts. @@ -289,13 +295,13 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool self._validate_fitted() df = ts.to_pandas() - new_df, target_components = self._forecast( + new_df, target_components_df = self._forecast( df=df, prediction_size=prediction_size, return_components=return_components ) ts.df = new_df if return_components: - ts.add_target_components(target_components_df=target_components) + ts.add_target_components(target_components_df=target_components_df) return ts @@ -323,13 +329,13 @@ def _predict( y_pred = result_values[-prediction_size:] df.loc[:, pd.IndexSlice[:, "target"]] = y_pred - components = None + target_components_df = None if return_components: - components = self._make_prediction_components( + target_components_df = self._make_prediction_components( result_template=result_template, context=context, prediction_size=prediction_size ) - return df, components + return df, target_components_df def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset: """Make predictions using true values as autoregression context (teacher forcing). @@ -363,13 +369,13 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = self._validate_fitted() df = ts.to_pandas() - new_df, target_components = self._predict( + new_df, target_components_df = self._predict( df=df, prediction_size=prediction_size, return_components=return_components ) ts.df = new_df if return_components: - ts.add_target_components(target_components_df=target_components) + ts.add_target_components(target_components_df=target_components_df) return ts From 6da0f36142a8b83cd16bedcf53e0fa28e73df358 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 31 Mar 2023 11:28:37 +0300 Subject: [PATCH 8/9] updated tests --- tests/test_models/test_simple_models.py | 32 +++++++++++++++---------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/test_models/test_simple_models.py b/tests/test_models/test_simple_models.py index b6fe4ebd9..cfd158e78 100644 --- a/tests/test_models/test_simple_models.py +++ b/tests/test_models/test_simple_models.py @@ -819,16 +819,14 @@ def test_deadline_ma_predict_components_correct_names( @pytest.mark.parametrize("method", ("predict", "forecast")) @pytest.mark.parametrize( - "window,seasonality,expected_components_names", + "window,seasonality", ( - (1, "month", ["target_component_month_lag_1"]), - (3, "month", ["target_component_month_lag_1", "target_component_month_lag_2", "target_component_month_lag_3"]), - (1, "year", ["target_component_year_lag_1"]), + (1, "month"), + (3, "month"), + (1, "year"), ), ) -def test_deadline_ma_predict_components_sum_up_to_target( - long_periodic_ts, method, window, seasonality, expected_components_names, horizon=10 -): +def test_deadline_ma_predict_components_sum_up_to_target(long_periodic_ts, method, window, seasonality, horizon=10): model = DeadlineMovingAverageModel(window=window, seasonality=seasonality) model.fit(ts=long_periodic_ts) @@ -842,16 +840,24 @@ def test_deadline_ma_predict_components_sum_up_to_target( @pytest.mark.parametrize( - "method_name, expected_values", - (("forecast", [[15, 2], [16, 4], [17, 6]]), ("predict", [[15, 2], [16, 4], [17, 6]])), + "method_name, out_of_sample_pred", + (("forecast", [-0.75100715, -3.00402861]), ("predict", [-0.42019439, -1.68077756])), ) def test_deadline_ma_predict_components_correct( - simple_df, method_name, expected_values, window=1, seasonality="month", horizon=3 + long_periodic_ts, method_name, out_of_sample_pred, window=1, seasonality="month", horizon=32 ): + predict_lags = long_periodic_ts.df.values[-63:-32] + model = DeadlineMovingAverageModel(window=window, seasonality=seasonality) - model.fit(simple_df) + model.fit(long_periodic_ts) + to_call = getattr(model, method_name) - forecast = to_call(ts=simple_df, prediction_size=horizon, return_components=True) + forecast = to_call(ts=long_periodic_ts, prediction_size=horizon, return_components=True) target_components_df = forecast.get_target_components() - np.testing.assert_allclose(target_components_df.values, expected_values) + + # testing in-sample prediction + np.testing.assert_allclose(target_components_df.values[:-1], predict_lags) + + # testing out-of-sample prediction + np.testing.assert_allclose(target_components_df.values[-1], out_of_sample_pred) From 18495ce7c5c2f6aa76c7c5f86d526f713d05eea5 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 31 Mar 2023 11:58:54 +0300 Subject: [PATCH 9/9] added docstrings --- tests/test_models/test_simple_models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_models/test_simple_models.py b/tests/test_models/test_simple_models.py index cfd158e78..549f5acac 100644 --- a/tests/test_models/test_simple_models.py +++ b/tests/test_models/test_simple_models.py @@ -787,6 +787,7 @@ def test_sma_model_predict_components_sum_up_to_target(example_tsds, method_name def test_sma_model_predict_components_correct( simple_df, method_name, expected_values, window=1, seasonality=2, horizon=3 ): + """Testing that correct lag used as a component.""" model = SeasonalMovingAverageModel(window=window, seasonality=seasonality) model.fit(simple_df) to_call = getattr(model, method_name) @@ -846,6 +847,7 @@ def test_deadline_ma_predict_components_sum_up_to_target(long_periodic_ts, metho def test_deadline_ma_predict_components_correct( long_periodic_ts, method_name, out_of_sample_pred, window=1, seasonality="month", horizon=32 ): + """Testing that correct lag used as a component.""" predict_lags = long_periodic_ts.df.values[-63:-32] model = DeadlineMovingAverageModel(window=window, seasonality=seasonality)