Skip to content

Commit

Permalink
Merge branch 'master' into issue-514
Browse files Browse the repository at this point in the history
  • Loading branch information
martins0n committed Feb 18, 2022
2 parents 1402863 + 95a988c commit adadd9d
Show file tree
Hide file tree
Showing 11 changed files with 353 additions and 79 deletions.
6 changes: 3 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `FutureMixin` into some transforms ([#361](https://github.com/tinkoff-ai/etna/pull/361))
- Regressors updating in TSDataset transform loops ([#374](https://github.com/tinkoff-ai/etna/pull/374))
- Regressors handling in TSDataset `make_future` and `train_test_split` ([#447](https://github.com/tinkoff-ai/etna/pull/447))
-
- Prediction intervals visualization in `plot_forecast` ([#538](https://github.com/tinkoff-ai/etna/pull/538))
-
- Add plot_time_series_with_change_points function ([#534](https://github.com/tinkoff-ai/etna/pull/534))
-
Expand All @@ -28,7 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
- Installation instruction ([#526](https://github.com/tinkoff-ai/etna/pull/526))
-
-
- Trainer kwargs for deep models ([#540](https://github.com/tinkoff-ai/etna/pull/540))
- Update CONTRIBUTING.md ([#536](https://github.com/tinkoff-ai/etna/pull/536))

### Fixed
Expand All @@ -41,7 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
-
-
-
- [BUG] nn models make forecast without inverse_transform ([#541](https://github.com/tinkoff-ai/etna/pull/541))

## [1.6.3] - 2022-02-14

Expand Down
64 changes: 62 additions & 2 deletions etna/analysis/plotters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import math
import warnings
from copy import deepcopy
from typing import TYPE_CHECKING
from typing import Callable
Expand Down Expand Up @@ -44,6 +45,8 @@ def plot_forecast(
n_train_samples: Optional[int] = None,
columns_num: int = 2,
figsize: Tuple[int, int] = (10, 5),
prediction_intervals: bool = False,
quantiles: Optional[Sequence[float]] = None,
):
"""
Plot of prediction for forecast pipeline.
Expand All @@ -64,12 +67,31 @@ def plot_forecast(
number of graphics columns
figsize:
size of the figure per subplot with one segment in inches
prediction_intervals:
if True prediction intervals will be drawn
quantiles:
list of quantiles to draw
"""
if not segments:
segments = list(set(forecast_ts.columns.get_level_values("segment")))

ax = prepare_axes(segments=segments, columns_num=columns_num, figsize=figsize)

if prediction_intervals:
cols = [
col
for col in forecast_ts.columns.get_level_values("feature").unique().tolist()
if col.startswith("target_0.")
]
existing_quantiles = [float(col[7:]) for col in cols]
if quantiles is None:
quantiles = sorted(existing_quantiles)
else:
non_existent = set(quantiles) - set(existing_quantiles)
if len(non_existent):
warnings.warn(f"Quantiles {non_existent} do not exist in forecast dataset. They will be dropped.")
quantiles = sorted(list(set(quantiles).intersection(set(existing_quantiles))))

if train_ts is not None:
train_ts.df.sort_values(by="timestamp", inplace=True)
if test_ts is not None:
Expand Down Expand Up @@ -99,8 +121,46 @@ def plot_forecast(
if (train_ts is not None) and (n_train_samples != 0):
ax[i].plot(plot_df.index.values, plot_df.target.values, label="train")
if test_ts is not None:
ax[i].plot(segment_test_df.index.values, segment_test_df.target.values, label="test")
ax[i].plot(segment_forecast_df.index.values, segment_forecast_df.target.values, label="forecast")
ax[i].plot(segment_test_df.index.values, segment_test_df.target.values, color="purple", label="test")
ax[i].plot(segment_forecast_df.index.values, segment_forecast_df.target.values, color="r", label="forecast")

if prediction_intervals and quantiles is not None:
alpha = np.linspace(0, 1, len(quantiles) // 2 + 2)[1:-1]
for quantile in range(len(quantiles) // 2):
values_low = segment_forecast_df["target_" + str(quantiles[quantile])].values
values_high = segment_forecast_df["target_" + str(quantiles[-quantile - 1])].values
if quantile == len(quantiles) // 2 - 1:
ax[i].fill_between(
segment_forecast_df.index.values,
values_low,
values_high,
facecolor="g",
alpha=alpha[quantile],
label=f"{quantiles[quantile]}-{quantiles[-quantile-1]} prediction interval",
)
else:
values_next = segment_forecast_df["target_" + str(quantiles[quantile + 1])].values
ax[i].fill_between(
segment_forecast_df.index.values,
values_low,
values_next,
facecolor="g",
alpha=alpha[quantile],
label=f"{quantiles[quantile]}-{quantiles[-quantile-1]} prediction interval",
)
values_prev = segment_forecast_df["target_" + str(quantiles[-quantile - 2])].values
ax[i].fill_between(
segment_forecast_df.index.values, values_high, values_prev, facecolor="g", alpha=alpha[quantile]
)
if len(quantiles) % 2 != 0:
values = segment_forecast_df["target_" + str(quantiles[len(quantiles) // 2])].values
ax[i].plot(
segment_forecast_df.index.values,
values,
"--",
c="orange",
label=f"{quantiles[len(quantiles)//2]} quantile",
)
ax[i].set_title(segment)
ax[i].tick_params("x", rotation=45)
ax[i].legend()
Expand Down
242 changes: 177 additions & 65 deletions etna/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Sequence
from typing import Union

Expand Down Expand Up @@ -85,71 +86,6 @@ def _forecast_segment(model, segment: Union[str, List[str]], ts: TSDataset) -> p
return segment_predict


class PerSegmentModel(Model):
"""Class for holding specific models for per-segment prediction."""

def __init__(self, base_model):
super(PerSegmentModel, self).__init__()
self._base_model = base_model
self._segments = None

@log_decorator
def fit(self, ts: TSDataset) -> "PerSegmentModel":
"""Fit model."""
self._segments = ts.segments
self._build_models()

for segment in self._segments:
model = self._models[segment]
segment_features = ts[:, segment, :]
segment_features = segment_features.dropna()
segment_features = segment_features.droplevel("segment", axis=1)
segment_features = segment_features.reset_index()
model.fit(df=segment_features)
return self

@log_decorator
def forecast(self, ts: TSDataset) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataframe with features
Returns
-------
DataFrame
Models result
"""
if self._segments is None:
raise ValueError("The model is not fitted yet, use fit() to train it")

result_list = list()
for segment in self._segments:
model = self._models[segment]

segment_predict = self._forecast_segment(model, segment, ts)
result_list.append(segment_predict)

# need real case to test
result_df = pd.concat(result_list, ignore_index=True)
result_df = result_df.set_index(["timestamp", "segment"])
df = ts.to_pandas(flatten=True)
df = df.set_index(["timestamp", "segment"])
df = df.combine_first(result_df).reset_index()

df = TSDataset.to_dataset(df)
ts.df = df
ts.inverse_transform()
return ts

def _build_models(self):
"""Create a dict with models for each segment (if required)."""
self._models = {}
for segment in self._segments:
self._models[segment] = deepcopy(self._base_model)


class FitAbstractModel(ABC):
"""Interface for model with fit method."""

Expand Down Expand Up @@ -230,3 +166,179 @@ def forecast(
Dataset with predictions
"""
pass


class PerSegmentBaseModel(FitAbstractModel, BaseMixin):
"""Base class for holding specific models for per-segment prediction."""

def __init__(self, base_model: Any):
"""
Init PerSegmentBaseModel.
Parameters
----------
base_model:
Internal model which will be used to forecast segments, expected to have fit/predict interface
"""
self._base_model = base_model
self._segments: Optional[List[str]] = None
self._models: Optional[Dict[str, Any]] = None

@log_decorator
def fit(self, ts: TSDataset) -> "PerSegmentBaseModel":
"""Fit model.
Parameters
----------
ts:
Dataset with features
Returns
-------
self:
Model after fit
"""
self._segments = ts.segments
self._models = {}
for segment in ts.segments:
self._models[segment] = deepcopy(self._base_model)

for segment, model in self._models.items():
segment_features = ts[:, segment, :]
segment_features = segment_features.dropna()
segment_features = segment_features.droplevel("segment", axis=1)
segment_features = segment_features.reset_index()
model.fit(df=segment_features)
return self

def get_model(self) -> Dict[str, Any]:
"""Get internal models that are used inside etna class.
Internal model is a model that is used inside etna to forecast segments, e.g. `catboost.CatBoostRegressor`
or `sklearn.linear_model.Ridge`.
Returns
-------
result:
dictionary where key is segment and value is internal model
"""
if self._models is None:
raise ValueError("Can not get the dict with base models from not fitted model!")
return self._models

@staticmethod
def _forecast_segment(model: Any, segment: str, ts: TSDataset, *args, **kwargs) -> pd.DataFrame:
"""Make predictions for one segment."""
segment_features = ts[:, segment, :]
segment_features = segment_features.droplevel("segment", axis=1)
segment_features = segment_features.reset_index()
dates = segment_features["timestamp"]
dates.reset_index(drop=True, inplace=True)
segment_predict = model.predict(df=segment_features, *args, **kwargs)
segment_predict = pd.DataFrame({"target": segment_predict})
segment_predict["segment"] = segment
segment_predict["timestamp"] = dates
return segment_predict

def _build_models(self):
"""Create a dict with models for each segment (if required)."""
self._models = {}
for segment in self._segments: # type: ignore
self._models[segment] = deepcopy(self._base_model)


class PerSegmentModel(PerSegmentBaseModel, ForecastAbstractModel):
"""Class for holding specific models for per-segment prediction."""

def __init__(self, base_model: Any):
"""
Init PerSegmentBaseModel.
Parameters
----------
base_model:
Internal model which will be used to forecast segments, expected to have fit/predict interface
"""
super().__init__(base_model=base_model)

@log_decorator
def forecast(self, ts: TSDataset) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataframe with features
Returns
-------
forecast:
Dataset with predictions
"""
result_list = list()
for segment, model in self.get_model().items():
segment_predict = self._forecast_segment(model=model, segment=segment, ts=ts)
result_list.append(segment_predict)

result_df = pd.concat(result_list, ignore_index=True)
result_df = result_df.set_index(["timestamp", "segment"])
df = ts.to_pandas(flatten=True)
df = df.set_index(["timestamp", "segment"])
df = df.combine_first(result_df).reset_index()

df = TSDataset.to_dataset(df)
ts.df = df
ts.inverse_transform()
return ts


class PerSegmentPredictionIntervalModel(PerSegmentBaseModel, PredictIntervalAbstractModel):
"""Class for holding specific models for per-segment prediction which are able to build prediction intervals."""

def __init__(self, base_model: Any):
"""
Init PerSegmentPredictionIntervalModel.
Parameters
----------
base_model:
Internal model which will be used to forecast segments, expected to have fit/predict interface
"""
super().__init__(base_model=base_model)

@abstractmethod
def forecast(
self, ts: TSDataset, prediction_interval: bool = False, quantiles: Sequence[float] = (0.025, 0.975)
) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataset with features
prediction_interval:
If True returns prediction interval for forecast
quantiles:
Levels of prediction distribution. By default 2.5% and 97.5% are taken to form a 95% prediction interval
Returns
-------
forecast:
Dataset with predictions
"""
result_list = list()
for segment, model in self.get_model().items():
segment_predict = self._forecast_segment(
model=model, segment=segment, ts=ts, prediction_interval=prediction_interval, quantiles=quantiles
)
result_list.append(segment_predict)

result_df = pd.concat(result_list, ignore_index=True)
result_df = result_df.set_index(["timestamp", "segment"])
df = ts.to_pandas(flatten=True)
df = df.set_index(["timestamp", "segment"])
df = df.combine_first(result_df).reset_index()

df = TSDataset.to_dataset(df)
ts.df = df
ts.inverse_transform()
return ts

0 comments on commit adadd9d

Please sign in to comment.