Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Interface statsmodels MSTL - transformer #5125

Merged
merged 23 commits into from Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 11 additions & 2 deletions docs/source/api_reference/transformations.rst
Expand Up @@ -236,6 +236,7 @@ Series-to-series transformers transform individual time series into another time

When applied to panels or hierarchical data, individual series are transformed.


luca-miniati marked this conversation as resolved.
Show resolved Hide resolved
Lagging
~~~~~~~

Expand Down Expand Up @@ -288,8 +289,8 @@ Depending on the transformer, the transformation parameters can be fitted.
ExponentTransformer
SqrtTransformer

Detrending
~~~~~~~~~~
Detrending and Decomposition
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. currentmodule:: sktime.transformations.series.detrend

Expand All @@ -302,6 +303,14 @@ Detrending
ConditionalDeseasonalizer
STLTransformer

.. currentmodule:: sktime.transformations.series.detrend.mstl

.. autosummary::
:toctree: auto_generated/
:template: class.rst

MSTL

.. currentmodule:: sktime.transformations.series.clear_sky

.. autosummary::
Expand Down
218 changes: 218 additions & 0 deletions sktime/transformations/series/detrend/mstl.py
@@ -0,0 +1,218 @@
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
"""Implements MSTL."""

__all__ = ["MSTL"]
__authors__ = ["luca-miniati"]

from typing import Dict, Optional, Sequence, Union

import pandas as pd

from sktime.transformations.base import BaseTransformer


class MSTL(BaseTransformer):
"""Season-Trend decomposition using LOESS for multiple seasonalities.

Direct interface for `statsmodels.tsa.seasonal.MSTL`.

luca-miniati marked this conversation as resolved.
Show resolved Hide resolved
Parameters
----------
endog : array_like
Data to be decomposed. Must be squeezable to 1-d.
periods : {int, array_like, None}, optional
Periodicity of the seasonal components. If None and endog is a pandas Series or
DataFrame, attempts to determine from endog. If endog is a ndarray, periods
must be provided.
windows : {int, array_like, None}, optional
Length of the seasonal smoothers for each corresponding period. Must be an odd
integer, and should normally be >= 7 (default). If None then default values
determined using 7 + 4 * np.arange(1, n + 1, 1) where n is number of seasonal
components.
lmbda : {float, str, None}, optional
The lambda parameter for the Box-Cox transform to be applied to endog prior to
decomposition. If None, no transform is applied. If `auto`, a value will be
estimated that maximizes the log-likelihood function.
iterate : int, optional
Number of iterations to use to refine the seasonal component.
stl_kwargs : dict, optional
Arguments to pass to STL.
return_components : bool, default=False
if False, will return only the MSTL transformed series
if True, will return the transformed series, as well as three components
as variables in the returned multivariate series (DataFrame cols)
"transformed" - the transformed series
"seasonal" - the seasonal component
"trend" - the trend component
"resid" - the residuals after de-trending, de-seasonalizing

References
----------
[1] https://www.statsmodels.org/dev/generated/statsmodels.tsa.seasonal.MSTL.html

Examples
--------
>>> import matplotlib.pyplot as plt # doctest: +SKIP
>>> from sktime.datasets import load_airline
>>> from sktime.transformations.series.detrend.mstl import MSTL
>>> y = load_airline()
>>> y.index = y.index.to_timestamp()
>>> mstl = MSTL(return_components=True) # doctest: +SKIP
>>> fitted = mstl.fit(y) # doctest: +SKIP
>>> res = fitted.transform(y) # doctest: +SKIP
>>> res.plot() # doctest: +SKIP
>>> plt.tight_layout() # doctest: +SKIP
>>> plt.show() # doctest: +SKIP
"""

_tags = {
"scitype:transform-input": "Series",
"scitype:transform-output": "Series",
"scitype:instancewise": True,
"X_inner_mtype": "pd.Series",
"y_inner_mtype": "pd.Series",
"transform-returns-same-time-index": True,
"univariate-only": True,
"fit_is_empty": False,
"python_dependencies": "statsmodels",
luca-miniati marked this conversation as resolved.
Show resolved Hide resolved
}

def __init__(
self,
*,
periods: Optional[Union[int, Sequence[int]]] = None,
windows: Optional[Union[int, Sequence[int]]] = None,
lmbda: Optional[Union[float, str]] = None,
iterate: Optional[int] = 2,
stl_kwargs: Optional[Dict[str, Union[int, bool, None]]] = None,
return_components: bool = False,
):
self.periods = periods
self.windows = windows
self.lmbda = lmbda
self.iterate = iterate
self.stl_kwargs = stl_kwargs
self.return_components = return_components
self._X = None

super().__init__()

def _fit(self, X, y=None):
from statsmodels.tsa.seasonal import MSTL as _MSTL

self._X = X

self.mstl_ = _MSTL(
X,
periods=self.periods,
windows=self.windows,
lmbda=self.lmbda,
iterate=self.iterate,
stl_kwargs=self.stl_kwargs,
).fit()

self.seasonal_ = [
pd.Series(column, index=X.index) for column in self.mstl_.seasonal
]
self.resid_ = pd.Series(self.mstl_.resid, index=X.index)
self.trend_ = pd.Series(self.mstl_.trend, index=X.index)

return self

def _transform(self, X, y=None):
"""Transform X and return a transformed version.

private _transform containing core logic, called from transform

Parameters
----------
X : Series, Panel, or Hierarchical data, of mtype X_inner_mtype
if X_inner_mtype is list, _transform must support all types in it
Data to be transformed
y : Series, Panel, or Hierarchical data, of mtype y_inner_mtype, default=None
Additional data, e.g., labels for transformation

Returns
-------
transformed version of X
"""
from statsmodels.tsa.seasonal import MSTL as _MSTL

# fit again if indices not seen, but don't store anything
if not X.index.equals(self._X.index):
X_full = X.combine_first(self._X)
new_mstl = _MSTL(
X_full.values,
periods=self.periods,
windows=self.windows,
lmbda=self.lmbda,
iterate=self.iterate,
stl_kwargs=self.stl_kwargs,
).fit()

ret_obj = self._make_return_object(X_full, new_mstl)
else:
ret_obj = self._make_return_object(X, self.mstl_)

return ret_obj

def _make_return_object(self, X, mstl):
if len(mstl.seasonal.shape) > 1:
seasonal = mstl.seasonal.sum(axis=1)
else:
seasonal = mstl.seasonal
# deseasonalize only
transformed = pd.Series(X.values - seasonal, index=X.index)
# transformed = pd.Series(X.values - stl.seasonal - stl.trend, index=X.index)

if self.return_components:
resid = pd.Series(mstl.resid, index=X.index)
trend = pd.Series(mstl.trend, index=X.index)

ret = {
"transformed": transformed,
"trend": trend,
"resid": resid,
}

for column_name, column_data in mstl.seasonal.iteritems():
ret[column_name] = column_data

ret = pd.DataFrame(ret)
else:
ret = transformed

return ret

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.

Parameters
----------
parameter_set : str , default = "default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
There are currently no reserved values for forecasters.

Returns
-------
params : dict or list of dict , default = {}
arameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params
"""
params1 = {
"periods": 3,
"windows": 3,
}
params2 = {
"periods": [3, 24],
"windows": [3, 9],
"lmbda": "auto",
"iterate": 10,
"stl_kwargs": {"trend_deg": 0},
}

return [params1, params2]