Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ColumnwiseTransformer (multivariate compositor for series-to-series transformer) #1044

Merged
merged 37 commits into from
Aug 14, 2021
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
df15fe6
first draft of multivariate compositor
SveaMeyer13 Jun 24, 2021
730197f
change syntax
SveaMeyer13 Jun 24, 2021
249e44d
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jun 29, 2021
12d532b
added transformer to test_config; introduced multivariate-only-tag; a…
SveaMeyer13 Jun 29, 2021
27afa75
remove apply function for now; next step: add functionality for trans…
SveaMeyer13 Jun 29, 2021
f3615f1
Update sktime/transformations/series/multivariate_compositor.py
SveaMeyer13 Jul 5, 2021
7ca2383
Update sktime/transformations/series/multivariate_compositor.py
SveaMeyer13 Jul 5, 2021
70f6905
Update sktime/transformations/series/multivariate_compositor.py
SveaMeyer13 Jul 5, 2021
bc1e914
revert tests, as we also accept univariate series
SveaMeyer13 Jul 6, 2021
7d09c9d
make transformer accept pd.Series; implement suggestions from aiwalte…
SveaMeyer13 Jul 6, 2021
c736b2e
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 6, 2021
743e3e8
reformat test_config.py
SveaMeyer13 Jul 6, 2021
2a8893d
Update sktime/transformations/series/compose.py
SveaMeyer13 Jul 6, 2021
eff49f4
improve docstrings
SveaMeyer13 Jul 6, 2021
fe493df
Update compose.py
SveaMeyer13 Jul 6, 2021
3d08399
add example and decorator
SveaMeyer13 Jul 7, 2021
e229caa
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 7, 2021
735a39d
reformat config.py
SveaMeyer13 Jul 7, 2021
389b3eb
Update sktime/transformations/series/compose.py
SveaMeyer13 Jul 7, 2021
9c9adb4
Update sktime/transformations/series/compose.py
SveaMeyer13 Jul 7, 2021
cd9f266
make additional _check_columns and _revert_to_series function; get ri…
SveaMeyer13 Jul 12, 2021
45b1e3b
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 12, 2021
0b7c230
add transformer to api_reference
SveaMeyer13 Jul 13, 2021
126f5e9
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 13, 2021
b663680
remove space
SveaMeyer13 Jul 21, 2021
40384bb
directly import load_longley
SveaMeyer13 Jul 21, 2021
a341149
remove empty line
SveaMeyer13 Jul 21, 2021
9ff82bb
minor change to OptionalPassthrough
SveaMeyer13 Jul 21, 2021
d7952f1
directly import load_longley
SveaMeyer13 Jul 21, 2021
d467c4e
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 21, 2021
31c94c7
make docstrings pydocstyle compliant, move _attributes to fit
SveaMeyer13 Jul 22, 2021
aed22dd
z_name cannot be an attribute of the transformer as it changes in tra…
SveaMeyer13 Jul 22, 2021
e2ec24c
make functions standalone, change z[0] to z.squeeze(columns), add hel…
SveaMeyer13 Jul 26, 2021
e9ac82f
Merge branch 'main' into multivariate-compositor
aiwalter Aug 11, 2021
0b65c57
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Aug 12, 2021
9da6a2b
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Aug 12, 2021
e1eb092
Merge branch 'main' into multivariate-compositor
mloning Aug 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion .all-contributorsrc
Expand Up @@ -1041,7 +1041,8 @@
"avatar_url": "https://avatars.githubusercontent.com/u/46671894?v=4",
"profile": "https://github.com/SveaMeyer13",
"contributions": [
"doc"
"doc",
"code"
]
},
{
Expand Down
1 change: 1 addition & 0 deletions docs/source/api_reference/transformations.rst
Expand Up @@ -281,6 +281,7 @@ Composition
:template: class.rst

OptionalPassthrough
ColumnwiseTransformer

Theta
~~~~~
Expand Down
3 changes: 3 additions & 0 deletions sktime/tests/_config.py
Expand Up @@ -92,10 +92,12 @@
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.compose import OptionalPassthrough
from sktime.transformations.series.compose import ColumnwiseTransformer
from sktime.transformations.series.detrend import Detrender
from sktime.transformations.series.impute import Imputer
from sktime.transformations.series.outlier_detection import HampelFilter


# The following estimators currently do not pass all unit tests
# What do they fail? ShapeDTW fails on 3d_numpy_input test, not set up for that
EXCLUDE_ESTIMATORS = [
Expand Down Expand Up @@ -326,6 +328,7 @@
Imputer: {"method": "mean"},
HampelFilter: {"window_length": 3},
OptionalPassthrough: {"transformer": BoxCoxTransformer(), "passthrough": True},
ColumnwiseTransformer: {"transformer": Detrender()},
AggrDist: {"transformer": ScipyDist()},
PyODAnnotator: {"estimator": ANOMALY_DETECTOR},
}
Expand Down
266 changes: 261 additions & 5 deletions sktime/transformations/series/compose.py
Expand Up @@ -2,9 +2,12 @@
# -*- coding: utf-8 -*-
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)

__author__ = ["Martin Walter"]
__all__ = ["OptionalPassthrough"]
"""Series-to-Series Transformers: OptionalPassthrough and Columnwisetransformer."""

__author__ = ["Martin Walter", "Svea Meyer"]
__all__ = ["OptionalPassthrough", "ColumnwiseTransformer"]

import pandas as pd
from sktime.transformations.base import _SeriesToSeriesTransformer
from sktime.utils.validation.series import check_series

Expand All @@ -13,7 +16,10 @@


class OptionalPassthrough(_SeriesToSeriesTransformer):
"""A transformer to tune the implicit hyperparameter whether or not to use a
"""
Tune implicit hyperparameter.

A transformer to tune the implicit hyperparameter whether or not to use a
particular transformer inside a pipeline (e.g. TranformedTargetForecaster)
or not. This is achived by having the additional hyperparameter
"passthrough" which can be added to a grid then (see example).
Expand All @@ -27,7 +33,7 @@ class OptionalPassthrough(_SeriesToSeriesTransformer):
passthrough the data (identity transformation)

Example
----------
-------
>>> from sktime.datasets import load_airline
>>> from sktime.forecasting.naive import NaiveForecaster
>>> from sktime.transformations.series.compose import OptionalPassthrough
Expand Down Expand Up @@ -76,13 +82,40 @@ def __init__(self, transformer, passthrough=False):
super(OptionalPassthrough, self).__init__()

def fit(self, Z, X=None):
"""Fit data.

Parameters
----------
Z : pd.Series
Series to fit.
X : pd.DataFrame, optional (default=None)
Exogenous data used in transformation.

Returns
-------
self
"""
if not self.passthrough:
self.transformer_ = clone(self.transformer)
self.transformer_.fit(Z, X)
self._is_fitted = True
return self

def transform(self, Z, X=None):
"""Transform data.

Parameters
----------
Z : pd.Series
Series to transform.
X : pd.DataFrame, optional (default=None)
Exogenous data used in transformation.

Returns
-------
z : pd.Series
Transformed series.
"""
self.check_is_fitted()
z = check_series(Z, enforce_univariate=False)
if not self.passthrough:
Expand All @@ -91,8 +124,231 @@ def transform(self, Z, X=None):

@if_delegate_has_method(delegate="transformer")
def inverse_transform(self, Z, X=None):
"""Inverse transform data.

Parameters
----------
Z : pd.Series
Series to transform.
X : pd.DataFrame, optional (default=None)
Exogenous data used in transformation.

Returns
-------
z : pd.Series
Inverse transformed data.
"""
self.check_is_fitted()
z = check_series(Z, enforce_univariate=False)
if not self.passthrough:
z = self.transformer_.inverse_transform(z, X=None)
z = self.transformer_.inverse_transform(z, X=X)
return z


class ColumnwiseTransformer(_SeriesToSeriesTransformer):
"""
Apply a transformer columnwise to multivariate series.

Parameters
----------
transformer : Estimator
scikit-learn-like or sktime-like transformer to fit and apply to series
columns : list of str or None
Names of columns that are supposed to be transformed.
If it is None all columne are transformed.

Attributes
----------
transformers_ : dict of {str : transformer}
Maps columns to transformers
columns_ : list of str
Names of columns that are supposed to be transformed.


Example
-------
>>> from sktime.datasets import load_longley
>>> from sktime.transformations.series.detrend import Detrender
>>> from sktime.transformations.series.compose import ColumnwiseTransformer

>>> y, X = load_longley()
>>> transformer = ColumnwiseTransformer(Detrender())
>>> yt = transformer.fit_transform(X)
"""

_required_parameters = ["transformer"]

def __init__(self, transformer, columns=None):
self.transformer = transformer
self.columns = columns
mloning marked this conversation as resolved.
Show resolved Hide resolved
super(ColumnwiseTransformer, self).__init__()

def fit(self, Z, X=None):
"""
Fit data.

Iterates over columns (series) and applies
the fit function of the transformer.

Parameters
----------
Z : pd.Series, pd.DataFrame

Returns
-------
self : an instance of self
"""
self._is_fitted = False

z = check_series(Z, allow_numpy=False)

# cast to pd.DataFrame in univariate case
if isinstance(z, pd.Series):
z = z.to_frame()

# check that columns are None or list of strings
if self.columns is not None:
if not isinstance(self.columns, list) and all(
isinstance(s, str) for s in self.columns
):
raise ValueError("Columns need to be a list of strings or None.")

# set self.columns_ to columns that are going to be transformed
# (all if self.columns is None)
self.columns_ = self.columns
if self.columns_ is None:
self.columns_ = z.columns

# make sure z contains all columns that the user wants to transform
_check_columns(z, selected_columns=self.columns_)

# fit by iterating over columns
self.transformers_ = {}
for colname in self.columns_:
transformer = clone(self.transformer)
self.transformers_[colname] = transformer
self.transformers_[colname].fit(z[colname], X)
self._is_fitted = True
return self

def transform(self, Z, X=None):
"""Transform data.

Returns a transformed version of Z by iterating over specified
columns and applying the univariate series transformer to them.

Parameters
----------
Z : pd.Series, pd.DataFrame

Returns
-------
Z : pd.Series, pd.DataFrame
Transformed time series(es).
"""
self.check_is_fitted()
z = check_series(Z)

# handle univariate case
z, is_series = _check_is_pdseries(z)

# make copy of z
z = z.copy()

# make sure z contains all columns that the user wants to transform
_check_columns(z, selected_columns=self.columns_)
for colname in self.columns_:
z[colname] = self.transformers_[colname].transform(z[colname], X)

# make z a series again in univariate case
if is_series:
z = z.squeeze("columns")
return z

@if_delegate_has_method(delegate="transformer")
def inverse_transform(self, Z, X=None):
"""
Inverse-transform data.

Returns an inverse-transformed version of Z by iterating over specified
columns and applying the univariate series transformer to them.
Only works if self.transformer has an inverse-transform method.

Parameters
----------
Z : pd.Series, pd.DataFrame

Returns
-------
Z : pd.Series, pd.DataFrame
Inverse-transformed time series(es).
"""
self.check_is_fitted()
z = check_series(Z)

# handle univariate case
z, is_series = _check_is_pdseries(z)

# make copy of z
z = z.copy()

# make sure z contains all columns that the user wants to transform
_check_columns(z, selected_columns=self.columns_)

# iterate over columns that are supposed to be inverse_transformed
for colname in self.columns_:
z[colname] = self.transformers_[colname].inverse_transform(z[colname], X)

# make z a series again in univariate case
if is_series:
z = z.squeeze("columns")
return z

@if_delegate_has_method(delegate="transformer")
def update(self, Z, X=None, update_params=True):
"""
Update Parameters.

Update the parameters of the estimator with new data
by iterating over specified columns.
Only works if self.transformer has an update method.

Parameters
----------
Z : pd.Series
New time series
update_params : bool, optional (default=True)

Returns
-------
self : an instance of self
"""
z = check_series(Z)

# make z a pd.DataFrame in univariate case
if isinstance(z, pd.Series):
z = z.to_frame()

# make sure z contains all columns that the user wants to transform
_check_columns(z, selected_columns=self.columns_)
for colname in self.columns_:
self.transformers_[colname].update(z[colname], X)
return self


def _check_columns(z, selected_columns):
# make sure z contains all columns that the user wants to transform
z_wanted_keys = set(selected_columns)
z_new_keys = set(z.columns)
difference = z_wanted_keys.difference(z_new_keys)
if len(difference) != 0:
raise ValueError("Missing columns" + str(difference) + "in Z.")


def _check_is_pdseries(z):
# make z a pd.Dataframe in univariate case
is_series = False
if isinstance(z, pd.Series):
z = z.to_frame()
is_series = True
return z, is_series