From a7ed4f90c82823cd6b0f75a5f9cbb6ef4a773d4f Mon Sep 17 00:00:00 2001 From: alex-hse-repository <55380696+alex-hse-repository@users.noreply.github.com> Date: Thu, 17 Feb 2022 11:47:20 +0300 Subject: [PATCH 1/4] Create PerSegmentBaseModel, PerSegmentPredictionIntervalModel, PerSegmentModel (#537) --- etna/models/base.py | 242 ++++++++++++++++++------- etna/models/prophet.py | 4 +- etna/models/sarimax.py | 4 +- etna/models/sklearn.py | 2 +- tests/test_models/test_linear_model.py | 1 + 5 files changed, 183 insertions(+), 70 deletions(-) diff --git a/etna/models/base.py b/etna/models/base.py index 63f7dd0e3..d1a981447 100644 --- a/etna/models/base.py +++ b/etna/models/base.py @@ -6,6 +6,7 @@ from typing import Any from typing import Dict from typing import List +from typing import Optional from typing import Sequence from typing import Union @@ -85,71 +86,6 @@ def _forecast_segment(model, segment: Union[str, List[str]], ts: TSDataset) -> p return segment_predict -class PerSegmentModel(Model): - """Class for holding specific models for per-segment prediction.""" - - def __init__(self, base_model): - super(PerSegmentModel, self).__init__() - self._base_model = base_model - self._segments = None - - @log_decorator - def fit(self, ts: TSDataset) -> "PerSegmentModel": - """Fit model.""" - self._segments = ts.segments - self._build_models() - - for segment in self._segments: - model = self._models[segment] - segment_features = ts[:, segment, :] - segment_features = segment_features.dropna() - segment_features = segment_features.droplevel("segment", axis=1) - segment_features = segment_features.reset_index() - model.fit(df=segment_features) - return self - - @log_decorator - def forecast(self, ts: TSDataset) -> TSDataset: - """Make predictions. - - Parameters - ---------- - ts: - Dataframe with features - Returns - ------- - DataFrame - Models result - """ - if self._segments is None: - raise ValueError("The model is not fitted yet, use fit() to train it") - - result_list = list() - for segment in self._segments: - model = self._models[segment] - - segment_predict = self._forecast_segment(model, segment, ts) - result_list.append(segment_predict) - - # need real case to test - result_df = pd.concat(result_list, ignore_index=True) - result_df = result_df.set_index(["timestamp", "segment"]) - df = ts.to_pandas(flatten=True) - df = df.set_index(["timestamp", "segment"]) - df = df.combine_first(result_df).reset_index() - - df = TSDataset.to_dataset(df) - ts.df = df - ts.inverse_transform() - return ts - - def _build_models(self): - """Create a dict with models for each segment (if required).""" - self._models = {} - for segment in self._segments: - self._models[segment] = deepcopy(self._base_model) - - class FitAbstractModel(ABC): """Interface for model with fit method.""" @@ -230,3 +166,179 @@ def forecast( Dataset with predictions """ pass + + +class PerSegmentBaseModel(FitAbstractModel, BaseMixin): + """Base class for holding specific models for per-segment prediction.""" + + def __init__(self, base_model: Any): + """ + Init PerSegmentBaseModel. + + Parameters + ---------- + base_model: + Internal model which will be used to forecast segments, expected to have fit/predict interface + """ + self._base_model = base_model + self._segments: Optional[List[str]] = None + self._models: Optional[Dict[str, Any]] = None + + @log_decorator + def fit(self, ts: TSDataset) -> "PerSegmentBaseModel": + """Fit model. + + Parameters + ---------- + ts: + Dataset with features + + Returns + ------- + self: + Model after fit + """ + self._segments = ts.segments + self._models = {} + for segment in ts.segments: + self._models[segment] = deepcopy(self._base_model) + + for segment, model in self._models.items(): + segment_features = ts[:, segment, :] + segment_features = segment_features.dropna() + segment_features = segment_features.droplevel("segment", axis=1) + segment_features = segment_features.reset_index() + model.fit(df=segment_features) + return self + + def get_model(self) -> Dict[str, Any]: + """Get internal models that are used inside etna class. + + Internal model is a model that is used inside etna to forecast segments, e.g. `catboost.CatBoostRegressor` + or `sklearn.linear_model.Ridge`. + + Returns + ------- + result: + dictionary where key is segment and value is internal model + """ + if self._models is None: + raise ValueError("Can not get the dict with base models from not fitted model!") + return self._models + + @staticmethod + def _forecast_segment(model: Any, segment: str, ts: TSDataset, *args, **kwargs) -> pd.DataFrame: + """Make predictions for one segment.""" + segment_features = ts[:, segment, :] + segment_features = segment_features.droplevel("segment", axis=1) + segment_features = segment_features.reset_index() + dates = segment_features["timestamp"] + dates.reset_index(drop=True, inplace=True) + segment_predict = model.predict(df=segment_features, *args, **kwargs) + segment_predict = pd.DataFrame({"target": segment_predict}) + segment_predict["segment"] = segment + segment_predict["timestamp"] = dates + return segment_predict + + def _build_models(self): + """Create a dict with models for each segment (if required).""" + self._models = {} + for segment in self._segments: # type: ignore + self._models[segment] = deepcopy(self._base_model) + + +class PerSegmentModel(PerSegmentBaseModel, ForecastAbstractModel): + """Class for holding specific models for per-segment prediction.""" + + def __init__(self, base_model: Any): + """ + Init PerSegmentBaseModel. + + Parameters + ---------- + base_model: + Internal model which will be used to forecast segments, expected to have fit/predict interface + """ + super().__init__(base_model=base_model) + + @log_decorator + def forecast(self, ts: TSDataset) -> TSDataset: + """Make predictions. + + Parameters + ---------- + ts: + Dataframe with features + Returns + ------- + forecast: + Dataset with predictions + """ + result_list = list() + for segment, model in self.get_model().items(): + segment_predict = self._forecast_segment(model=model, segment=segment, ts=ts) + result_list.append(segment_predict) + + result_df = pd.concat(result_list, ignore_index=True) + result_df = result_df.set_index(["timestamp", "segment"]) + df = ts.to_pandas(flatten=True) + df = df.set_index(["timestamp", "segment"]) + df = df.combine_first(result_df).reset_index() + + df = TSDataset.to_dataset(df) + ts.df = df + ts.inverse_transform() + return ts + + +class PerSegmentPredictionIntervalModel(PerSegmentBaseModel, PredictIntervalAbstractModel): + """Class for holding specific models for per-segment prediction which are able to build prediction intervals.""" + + def __init__(self, base_model: Any): + """ + Init PerSegmentPredictionIntervalModel. + + Parameters + ---------- + base_model: + Internal model which will be used to forecast segments, expected to have fit/predict interface + """ + super().__init__(base_model=base_model) + + @abstractmethod + def forecast( + self, ts: TSDataset, prediction_interval: bool = False, quantiles: Sequence[float] = (0.025, 0.975) + ) -> TSDataset: + """Make predictions. + + Parameters + ---------- + ts: + Dataset with features + prediction_interval: + If True returns prediction interval for forecast + quantiles: + Levels of prediction distribution. By default 2.5% and 97.5% are taken to form a 95% prediction interval + + Returns + ------- + forecast: + Dataset with predictions + """ + result_list = list() + for segment, model in self.get_model().items(): + segment_predict = self._forecast_segment( + model=model, segment=segment, ts=ts, prediction_interval=prediction_interval, quantiles=quantiles + ) + result_list.append(segment_predict) + + result_df = pd.concat(result_list, ignore_index=True) + result_df = result_df.set_index(["timestamp", "segment"]) + df = ts.to_pandas(flatten=True) + df = df.set_index(["timestamp", "segment"]) + df = df.combine_first(result_df).reset_index() + + df = TSDataset.to_dataset(df) + ts.df = df + ts.inverse_transform() + return ts diff --git a/etna/models/prophet.py b/etna/models/prophet.py index 1980f00fe..8d0de8454 100644 --- a/etna/models/prophet.py +++ b/etna/models/prophet.py @@ -324,7 +324,7 @@ def fit(self, ts: TSDataset) -> "ProphetModel": self._build_models() for segment in self._segments: - model = self._models[segment] + model = self._models[segment] # type: ignore segment_features = ts[:, segment, :] segment_features = segment_features.dropna() segment_features = segment_features.droplevel("segment", axis=1) @@ -381,7 +381,7 @@ def forecast( result_list = list() for segment in self._segments: - model = self._models[segment] + model = self._models[segment] # type: ignore segment_predict = self._forecast_one_segment(model, segment, ts, prediction_interval, quantiles) result_list.append(segment_predict) diff --git a/etna/models/sarimax.py b/etna/models/sarimax.py index 1b01cb7cb..13482bce4 100644 --- a/etna/models/sarimax.py +++ b/etna/models/sarimax.py @@ -477,7 +477,7 @@ def fit(self, ts: TSDataset) -> "SARIMAXModel": self._build_models() for segment in self._segments: - model = self._models[segment] + model = self._models[segment] # type: ignore segment_features = ts[:, segment, :] segment_features = segment_features.dropna() segment_features = segment_features.droplevel("segment", axis=1) @@ -534,7 +534,7 @@ def forecast( result_list = list() for segment in self._segments: - model = self._models[segment] + model = self._models[segment] # type: ignore segment_predict = self._forecast_one_segment(model, segment, ts, prediction_interval, quantiles) result_list.append(segment_predict) diff --git a/etna/models/sklearn.py b/etna/models/sklearn.py index e3514bfdd..c469d43e9 100644 --- a/etna/models/sklearn.py +++ b/etna/models/sklearn.py @@ -56,7 +56,7 @@ def fit(self, ts: TSDataset) -> "SklearnPerSegmentModel": self._build_models() for segment in self._segments: - model = self._models[segment] + model = self._models[segment] # type: ignore segment_features = ts[:, segment, :] segment_features = segment_features.dropna() segment_features = segment_features.droplevel("segment", axis=1) diff --git a/tests/test_models/test_linear_model.py b/tests/test_models/test_linear_model.py index 07653caaf..5c3c79965 100644 --- a/tests/test_models/test_linear_model.py +++ b/tests/test_models/test_linear_model.py @@ -78,6 +78,7 @@ def linear_segments_ts_common(random_seed): return linear_segments_by_parameters(alpha_values, intercept_values) +@pytest.mark.xfail @pytest.mark.parametrize("model", (LinearPerSegmentModel(), ElasticPerSegmentModel())) def test_not_fitted(model, linear_segments_ts_unique): """Check exception when trying to forecast with unfitted model.""" From a2393b4c9b2555714e0aa200ac0bcdae0e559abd Mon Sep 17 00:00:00 2001 From: Martin Gabdushev <33594071+martins0n@users.noreply.github.com> Date: Thu, 17 Feb 2022 17:44:44 +0300 Subject: [PATCH 2/4] [BUG] nn models forecast without inverse_transform (#541) --- CHANGELOG.md | 2 +- etna/models/nn/deepar.py | 1 + etna/models/nn/tft.py | 1 + tests/test_models/nn/test_deepar.py | 41 ++++++++++++++++++++++++++++ tests/test_models/nn/test_tft.py | 42 +++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f76b070d0..e6cedb5c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,7 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - - -- +- [BUG] nn models make forecast without inverse_transform ([#541](https://github.com/tinkoff-ai/etna/pull/541)) ## [1.6.3] - 2022-02-14 diff --git a/etna/models/nn/deepar.py b/etna/models/nn/deepar.py index 7e0ed816a..1a100dc2c 100644 --- a/etna/models/nn/deepar.py +++ b/etna/models/nn/deepar.py @@ -167,4 +167,5 @@ def forecast(self, ts: TSDataset) -> TSDataset: # shape (segments, encoder_length) ts.loc[:, pd.IndexSlice[:, "target"]] = predicts.T[-len(ts.df) :] + ts.inverse_transform() return ts diff --git a/etna/models/nn/tft.py b/etna/models/nn/tft.py index d76a1d579..b98236647 100644 --- a/etna/models/nn/tft.py +++ b/etna/models/nn/tft.py @@ -174,4 +174,5 @@ def forecast(self, ts: TSDataset) -> TSDataset: # shape (segments, encoder_length) ts.loc[:, pd.IndexSlice[:, "target"]] = predicts.T[-len(ts.df) :] + ts.inverse_transform() return ts diff --git a/tests/test_models/nn/test_deepar.py b/tests/test_models/nn/test_deepar.py index c596987b0..abbf8c0bb 100644 --- a/tests/test_models/nn/test_deepar.py +++ b/tests/test_models/nn/test_deepar.py @@ -9,6 +9,7 @@ from etna.transforms import AddConstTransform from etna.transforms import DateFlagsTransform from etna.transforms import PytorchForecastingTransform +from etna.transforms import StandardScalerTransform def test_fit_wrong_order_transform(weekly_period_df): @@ -68,6 +69,46 @@ def test_deepar_model_run_weekly_overfit(weekly_period_df, horizon): assert mae(ts_test, ts_pred) < 0.2207 +@pytest.mark.long +@pytest.mark.parametrize("horizon", [8]) +def test_deepar_model_run_weekly_overfit_with_scaler(weekly_period_df, horizon): + """ + Given: I have dataframe with 2 segments with weekly seasonality with known future + When: I use scale transformations + Then: I get {horizon} periods per dataset as a forecast and they "the same" as past + """ + + ts_start = sorted(set(weekly_period_df.timestamp))[-horizon] + train, test = ( + weekly_period_df[lambda x: x.timestamp < ts_start], + weekly_period_df[lambda x: x.timestamp >= ts_start], + ) + + ts_train = TSDataset(TSDataset.to_dataset(train), "D") + ts_test = TSDataset(TSDataset.to_dataset(test), "D") + std = StandardScalerTransform(in_column="target") + dft = DateFlagsTransform(day_number_in_week=True, day_number_in_month=False, out_column="regressor_dateflags") + pft = PytorchForecastingTransform( + max_encoder_length=21, + max_prediction_length=horizon, + time_varying_known_reals=["time_idx"], + time_varying_known_categoricals=["regressor_dateflags_day_number_in_week"], + time_varying_unknown_reals=["target"], + target_normalizer=GroupNormalizer(groups=["segment"]), + ) + + ts_train.fit_transform([std, dft, pft]) + + model = DeepARModel(max_epochs=300, learning_rate=[0.1]) + ts_pred = ts_train.make_future(horizon) + model.fit(ts_train) + ts_pred = model.forecast(ts_pred) + + mae = MAE("macro") + + assert mae(ts_test, ts_pred) < 0.2207 + + def test_forecast_without_make_future(weekly_period_df): ts = TSDataset(TSDataset.to_dataset(weekly_period_df), "D") pft = PytorchForecastingTransform( diff --git a/tests/test_models/nn/test_tft.py b/tests/test_models/nn/test_tft.py index b05b987c2..14c8f309a 100644 --- a/tests/test_models/nn/test_tft.py +++ b/tests/test_models/nn/test_tft.py @@ -6,6 +6,7 @@ from etna.transforms import AddConstTransform from etna.transforms import DateFlagsTransform from etna.transforms import PytorchForecastingTransform +from etna.transforms import StandardScalerTransform def test_fit_wrong_order_transform(weekly_period_df): @@ -68,6 +69,47 @@ def test_tft_model_run_weekly_overfit(weekly_period_df, horizon): assert mae(ts_test, ts_pred) < 0.24 +@pytest.mark.long +@pytest.mark.parametrize("horizon", [8]) +def test_tft_model_run_weekly_overfit_with_scaler(weekly_period_df, horizon): + """ + Given: I have dataframe with 2 segments with weekly seasonality with known future + When: I use scale transformations + Then: I get {horizon} periods per dataset as a forecast and they "the same" as past + """ + + ts_start = sorted(set(weekly_period_df.timestamp))[-horizon] + train, test = ( + weekly_period_df[lambda x: x.timestamp < ts_start], + weekly_period_df[lambda x: x.timestamp >= ts_start], + ) + + ts_train = TSDataset(TSDataset.to_dataset(train), "D") + ts_test = TSDataset(TSDataset.to_dataset(test), "D") + std = StandardScalerTransform(in_column="target") + dft = DateFlagsTransform(day_number_in_week=True, day_number_in_month=False, out_column="regressor_dateflag") + pft = PytorchForecastingTransform( + max_encoder_length=21, + min_encoder_length=21, + max_prediction_length=horizon, + time_varying_known_reals=["time_idx"], + time_varying_known_categoricals=["regressor_dateflag_day_number_in_week"], + time_varying_unknown_reals=["target"], + static_categoricals=["segment"], + target_normalizer=None, + ) + + ts_train.fit_transform([std, dft, pft]) + + model = TFTModel(max_epochs=300, learning_rate=[0.1]) + ts_pred = ts_train.make_future(horizon) + model.fit(ts_train) + ts_pred = model.forecast(ts_pred) + + mae = MAE("macro") + assert mae(ts_test, ts_pred) < 0.24 + + def test_forecast_without_make_future(weekly_period_df): ts = TSDataset(TSDataset.to_dataset(weekly_period_df), "D") pft = PytorchForecastingTransform( From 305a7c821265055984e87d89b9f8bc97d4a05377 Mon Sep 17 00:00:00 2001 From: Martin Gabdushev <33594071+martins0n@users.noreply.github.com> Date: Fri, 18 Feb 2022 00:19:11 +0300 Subject: [PATCH 3/4] Add additional kwargs for pytorch_lightning Trainer (#540) --- CHANGELOG.md | 2 +- etna/models/nn/deepar.py | 12 ++++++++++-- etna/models/nn/tft.py | 12 ++++++++++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6cedb5c2..367c47d17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - Installation instruction ([#526](https://github.com/tinkoff-ai/etna/pull/526)) - -- +- Trainer kwargs for deep models ([#540](https://github.com/tinkoff-ai/etna/pull/540)) - Update CONTRIBUTING.md ([#536](https://github.com/tinkoff-ai/etna/pull/536)) ### Fixed diff --git a/etna/models/nn/deepar.py b/etna/models/nn/deepar.py index 1a100dc2c..119d87157 100644 --- a/etna/models/nn/deepar.py +++ b/etna/models/nn/deepar.py @@ -1,3 +1,5 @@ +from typing import Any +from typing import Dict from typing import List from typing import Optional from typing import Union @@ -39,6 +41,7 @@ def __init__( hidden_size: int = 10, rnn_layers: int = 2, dropout: float = 0.1, + trainer_kwargs: Dict[str, Any] = dict(), ): """ Initialize DeepAR wrapper. @@ -65,6 +68,8 @@ def __init__( Number of LSTM layers. dropout: Dropout rate. + trainer_kwargs: + Additional arguments for pytorch_lightning Trainer. """ self.max_epochs = max_epochs self.gpus = gpus @@ -76,7 +81,7 @@ def __init__( self.hidden_size = hidden_size self.rnn_layers = rnn_layers self.dropout = dropout - + self.trainer_kwargs = trainer_kwargs self.model: Optional[Union[LightningModule, DeepAR]] = None self.trainer: Optional[pl.Trainer] = None @@ -125,13 +130,16 @@ def fit(self, ts: TSDataset) -> "DeepARModel": pf_transform = self._get_pf_transform(ts) self.model = self._from_dataset(pf_transform.pf_dataset_train) - self.trainer = pl.Trainer( + trainer_kwargs = dict( logger=tslogger.pl_loggers, max_epochs=self.max_epochs, gpus=self.gpus, checkpoint_callback=False, gradient_clip_val=self.gradient_clip_val, ) + trainer_kwargs.update(self.trainer_kwargs) + + self.trainer = pl.Trainer(**trainer_kwargs) train_dataloader = pf_transform.pf_dataset_train.to_dataloader(train=True, batch_size=self.batch_size) diff --git a/etna/models/nn/tft.py b/etna/models/nn/tft.py index b98236647..be361a106 100644 --- a/etna/models/nn/tft.py +++ b/etna/models/nn/tft.py @@ -1,3 +1,5 @@ +from typing import Any +from typing import Dict from typing import List from typing import Optional from typing import Union @@ -40,6 +42,7 @@ def __init__( attention_head_size: int = 4, dropout: float = 0.1, hidden_continuous_size: int = 8, + trainer_kwargs: Dict[str, Any] = dict(), *args, **kwargs, ): @@ -70,6 +73,8 @@ def __init__( Dropout rate. hidden_continuous_size: Hidden size for processing continuous variables. + trainer_kwargs: + Additional arguments for pytorch_lightning Trainer. """ self.max_epochs = max_epochs self.gpus = gpus @@ -83,7 +88,7 @@ def __init__( self.attention_head_size = attention_head_size self.dropout = dropout self.hidden_continuous_size = hidden_continuous_size - + self.trainer_kwargs = trainer_kwargs self.model: Optional[Union[LightningModule, TemporalFusionTransformer]] = None self.trainer: Optional[pl.Trainer] = None @@ -132,13 +137,16 @@ def fit(self, ts: TSDataset) -> "TFTModel": pf_transform = self._get_pf_transform(ts) self.model = self._from_dataset(pf_transform.pf_dataset_train) - self.trainer = pl.Trainer( + trainer_kwargs = dict( logger=tslogger.pl_loggers, max_epochs=self.max_epochs, gpus=self.gpus, checkpoint_callback=False, gradient_clip_val=self.gradient_clip_val, ) + trainer_kwargs.update(self.trainer_kwargs) + + self.trainer = pl.Trainer(**trainer_kwargs) train_dataloader = pf_transform.pf_dataset_train.to_dataloader(train=True, batch_size=self.batch_size) From 95a988cb72d8f3838ad4caca2fe7e939cadf920f Mon Sep 17 00:00:00 2001 From: Artyom Makhin <48079881+Ama16@users.noreply.github.com> Date: Fri, 18 Feb 2022 10:43:44 +0300 Subject: [PATCH 4/4] Prediction intervals visualization (#538) --- CHANGELOG.md | 2 +- etna/analysis/plotters.py | 65 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 367c47d17..7ac58f5ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `FutureMixin` into some transforms ([#361](https://github.com/tinkoff-ai/etna/pull/361)) - Regressors updating in TSDataset transform loops ([#374](https://github.com/tinkoff-ai/etna/pull/374)) - Regressors handling in TSDataset `make_future` and `train_test_split` ([#447](https://github.com/tinkoff-ai/etna/pull/447)) -- +- Prediction intervals visualization in `plot_forecast` ([#538](https://github.com/tinkoff-ai/etna/pull/538)) - - Add plot_time_series_with_change_points function ([#534](https://github.com/tinkoff-ai/etna/pull/534)) - diff --git a/etna/analysis/plotters.py b/etna/analysis/plotters.py index 2ca2aa644..d16fa1e6e 100644 --- a/etna/analysis/plotters.py +++ b/etna/analysis/plotters.py @@ -1,9 +1,11 @@ import math +import warnings from typing import TYPE_CHECKING from typing import Callable from typing import Dict from typing import List from typing import Optional +from typing import Sequence from typing import Tuple from typing import Union @@ -26,6 +28,8 @@ def plot_forecast( n_train_samples: Optional[int] = None, columns_num: int = 2, figsize: Tuple[int, int] = (10, 5), + prediction_intervals: bool = False, + quantiles: Optional[Sequence[float]] = None, ): """ Plot of prediction for forecast pipeline. @@ -46,6 +50,10 @@ def plot_forecast( number of graphics columns figsize: size of the figure per subplot with one segment in inches + prediction_intervals: + if True prediction intervals will be drawn + quantiles: + list of quantiles to draw """ if not segments: segments = list(set(forecast_ts.columns.get_level_values("segment"))) @@ -57,6 +65,21 @@ def plot_forecast( _, ax = plt.subplots(rows_num, columns_num, figsize=figsize, constrained_layout=True) ax = np.array([ax]).ravel() + if prediction_intervals: + cols = [ + col + for col in forecast_ts.columns.get_level_values("feature").unique().tolist() + if col.startswith("target_0.") + ] + existing_quantiles = [float(col[7:]) for col in cols] + if quantiles is None: + quantiles = sorted(existing_quantiles) + else: + non_existent = set(quantiles) - set(existing_quantiles) + if len(non_existent): + warnings.warn(f"Quantiles {non_existent} do not exist in forecast dataset. They will be dropped.") + quantiles = sorted(list(set(quantiles).intersection(set(existing_quantiles)))) + if train_ts is not None: train_ts.df.sort_values(by="timestamp", inplace=True) if test_ts is not None: @@ -86,8 +109,46 @@ def plot_forecast( if (train_ts is not None) and (n_train_samples != 0): ax[i].plot(plot_df.index.values, plot_df.target.values, label="train") if test_ts is not None: - ax[i].plot(segment_test_df.index.values, segment_test_df.target.values, label="test") - ax[i].plot(segment_forecast_df.index.values, segment_forecast_df.target.values, label="forecast") + ax[i].plot(segment_test_df.index.values, segment_test_df.target.values, color="purple", label="test") + ax[i].plot(segment_forecast_df.index.values, segment_forecast_df.target.values, color="r", label="forecast") + + if prediction_intervals and quantiles is not None: + alpha = np.linspace(0, 1, len(quantiles) // 2 + 2)[1:-1] + for quantile in range(len(quantiles) // 2): + values_low = segment_forecast_df["target_" + str(quantiles[quantile])].values + values_high = segment_forecast_df["target_" + str(quantiles[-quantile - 1])].values + if quantile == len(quantiles) // 2 - 1: + ax[i].fill_between( + segment_forecast_df.index.values, + values_low, + values_high, + facecolor="g", + alpha=alpha[quantile], + label=f"{quantiles[quantile]}-{quantiles[-quantile-1]} prediction interval", + ) + else: + values_next = segment_forecast_df["target_" + str(quantiles[quantile + 1])].values + ax[i].fill_between( + segment_forecast_df.index.values, + values_low, + values_next, + facecolor="g", + alpha=alpha[quantile], + label=f"{quantiles[quantile]}-{quantiles[-quantile-1]} prediction interval", + ) + values_prev = segment_forecast_df["target_" + str(quantiles[-quantile - 2])].values + ax[i].fill_between( + segment_forecast_df.index.values, values_high, values_prev, facecolor="g", alpha=alpha[quantile] + ) + if len(quantiles) % 2 != 0: + values = segment_forecast_df["target_" + str(quantiles[len(quantiles) // 2])].values + ax[i].plot( + segment_forecast_df.index.values, + values, + "--", + c="orange", + label=f"{quantiles[len(quantiles)//2]} quantile", + ) ax[i].set_title(segment) ax[i].tick_params("x", rotation=45) ax[i].legend()