Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ColumnwiseTransformer (multivariate compositor for series-to-series transformer) #1044

Merged
merged 37 commits into from
Aug 14, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
df15fe6
first draft of multivariate compositor
SveaMeyer13 Jun 24, 2021
730197f
change syntax
SveaMeyer13 Jun 24, 2021
249e44d
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jun 29, 2021
12d532b
added transformer to test_config; introduced multivariate-only-tag; a…
SveaMeyer13 Jun 29, 2021
27afa75
remove apply function for now; next step: add functionality for trans…
SveaMeyer13 Jun 29, 2021
f3615f1
Update sktime/transformations/series/multivariate_compositor.py
SveaMeyer13 Jul 5, 2021
7ca2383
Update sktime/transformations/series/multivariate_compositor.py
SveaMeyer13 Jul 5, 2021
70f6905
Update sktime/transformations/series/multivariate_compositor.py
SveaMeyer13 Jul 5, 2021
bc1e914
revert tests, as we also accept univariate series
SveaMeyer13 Jul 6, 2021
7d09c9d
make transformer accept pd.Series; implement suggestions from aiwalte…
SveaMeyer13 Jul 6, 2021
c736b2e
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 6, 2021
743e3e8
reformat test_config.py
SveaMeyer13 Jul 6, 2021
2a8893d
Update sktime/transformations/series/compose.py
SveaMeyer13 Jul 6, 2021
eff49f4
improve docstrings
SveaMeyer13 Jul 6, 2021
fe493df
Update compose.py
SveaMeyer13 Jul 6, 2021
3d08399
add example and decorator
SveaMeyer13 Jul 7, 2021
e229caa
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 7, 2021
735a39d
reformat config.py
SveaMeyer13 Jul 7, 2021
389b3eb
Update sktime/transformations/series/compose.py
SveaMeyer13 Jul 7, 2021
9c9adb4
Update sktime/transformations/series/compose.py
SveaMeyer13 Jul 7, 2021
cd9f266
make additional _check_columns and _revert_to_series function; get ri…
SveaMeyer13 Jul 12, 2021
45b1e3b
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 12, 2021
0b7c230
add transformer to api_reference
SveaMeyer13 Jul 13, 2021
126f5e9
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 13, 2021
b663680
remove space
SveaMeyer13 Jul 21, 2021
40384bb
directly import load_longley
SveaMeyer13 Jul 21, 2021
a341149
remove empty line
SveaMeyer13 Jul 21, 2021
9ff82bb
minor change to OptionalPassthrough
SveaMeyer13 Jul 21, 2021
d7952f1
directly import load_longley
SveaMeyer13 Jul 21, 2021
d467c4e
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Jul 21, 2021
31c94c7
make docstrings pydocstyle compliant, move _attributes to fit
SveaMeyer13 Jul 22, 2021
aed22dd
z_name cannot be an attribute of the transformer as it changes in tra…
SveaMeyer13 Jul 22, 2021
e2ec24c
make functions standalone, change z[0] to z.squeeze(columns), add hel…
SveaMeyer13 Jul 26, 2021
e9ac82f
Merge branch 'main' into multivariate-compositor
aiwalter Aug 11, 2021
0b65c57
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Aug 12, 2021
9da6a2b
Merge branch 'main' into multivariate-compositor
SveaMeyer13 Aug 12, 2021
e1eb092
Merge branch 'main' into multivariate-compositor
mloning Aug 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions sktime/tests/_config.py
Expand Up @@ -85,6 +85,7 @@
from sktime.transformations.series.compose import OptionalPassthrough
from sktime.transformations.series.outlier_detection import HampelFilter
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.multivariate_compositor import MultivariateCompositor
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved
from sktime.clustering.base.base import BaseCluster


Expand Down Expand Up @@ -276,6 +277,7 @@
Imputer: {"method": "mean"},
HampelFilter: {"window_length": 3},
OptionalPassthrough: {"transformer": BoxCoxTransformer(), "passthrough": True},
MultivariateCompositor: {"transformer": BoxCoxTransformer()},
}

# We use estimator tags in addition to class hierarchies to further distinguish
Expand Down
118 changes: 118 additions & 0 deletions sktime/transformations/series/multivariate_compositor.py
@@ -0,0 +1,118 @@
#!/usr/bin/env python3 -u
# -*- coding: utf-8 -*-
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)

__all__ = ["MultivariateCompositor"]
__author__ = ["Svea Meyer"]

import pandas as pd
from sklearn.base import clone
from sktime.transformations.base import _SeriesToSeriesTransformer
from sktime.utils.validation.series import check_series


class MultivariateCompositor(_SeriesToSeriesTransformer):
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved
"""
Parameters
----------
transformer : series-to-series transformer to be applied to each series
columns : names of columns that are supposed to be transformed
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved
"""

_required_parameters = ["transformer"]
_tags = {
"multivariate-only": True,
}

def __init__(self, transformer, columns="all"):
self.transformer = transformer
self.columns = columns
self.transformers_ = None
super(MultivariateCompositor, self).__init__()

def fit(self, Z, X=None):
"""
Iterates over columns (series) and applies the fit function of the transformer
"""
if not isinstance(Z, pd.DataFrame):
raise ValueError("Z needs to be a multivariate Pandas Series")
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved
self._is_fitted = False
z = check_series(Z)
if self.columns == "all":
self.columns = z.columns

# make sure z contains all columns that the user wants to transform
Z_wanted_keys = set(self.columns)
Z_new_keys = set(z.columns)
difference = Z_wanted_keys.difference(Z_new_keys)
if len(difference) != 0:
raise ValueError("Missing columns" + str(difference) + "in Z.")

self.transformers_ = {}
for colname in self.columns:
transformer = clone(self.transformer)
self.transformers_[colname] = transformer
self.transformers_[colname].fit(z[colname], X)
# self.transformers_[colname].is_fitted = True
self._is_fitted = True
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved
return self

def transform(self, Z, X=None):
"""
calls transform on every single transformer (one transformer per series)
"""
if not isinstance(Z, pd.DataFrame):
raise ValueError("Z needs to be a multivariate Pandas Series")
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved

self.check_is_fitted()
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved
z = check_series(Z)

# make copy of z
z = z.copy()
# make sure z contains all columns that the user wants to transform
Z_wanted_keys = set(self.columns)
Z_new_keys = set(z.columns)
difference = Z_wanted_keys.difference(Z_new_keys)
if len(difference) != 0:
raise ValueError("Missing columns" + str(difference) + "in Z.")
for colname in self.columns:
# self.columns : columns that are supposed to be transformed
self.transformers_[colname].check_is_fitted()
z[colname] = self.transformers_[colname].transform(z[colname], X)
return z

def inverse_transform(self, Z, X=None):
"""
if the base transformer has an inverse-transform this
inverse transform is called on every single transformer
(one transformer per series)
"""
if not isinstance(Z, pd.DataFrame):
raise ValueError("Z needs to be a multivariate Pandas Series")
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved

if not hasattr(self.transformer, "inverse_transform"):
raise NotImplementedError(
"this transform does not have an inverse_transform method"
)
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved
self.check_is_fitted()
z = check_series(Z)

if isinstance(Z, pd.DataFrame):
# make copy of z
z = z.copy()
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved

# make sure z contains all columns that the user wants to transform
Z_wanted_keys = set(self.columns)
Z_new_keys = set(z.columns)
difference = Z_wanted_keys.difference(Z_new_keys)
if len(difference) != 0:
raise ValueError("Missing columns" + str(difference) + "in Z.")
for colname in self.columns:
# self.columns : columns that are supposed to be transformed
self.transformers_[colname].check_is_fitted()
SveaMeyer13 marked this conversation as resolved.
Show resolved Hide resolved
z[colname] = self.transformers_[colname].inverse_transform(
z[colname], X
)
return z

# todo: add functionality for update
22 changes: 14 additions & 8 deletions sktime/transformations/tests/test_all_transformers.py
Expand Up @@ -57,7 +57,7 @@ def check_series_to_primitive_transform_univariate(Estimator, **kwargs):


def _check_raises_error(Estimator, **kwargs):
with pytest.raises(ValueError, match=r"univariate"):
with pytest.raises(ValueError, match=r"variate"):
if _has_tag(Estimator, "fit-in-transform"):
# As some estimators have an empty fit method, we here check if
# they raise the appropriate error in transform rather than fit.
Expand All @@ -79,12 +79,15 @@ def check_series_to_primitive_transform_multivariate(Estimator):

def check_series_to_series_transform_univariate(Estimator):
n_timepoints = 15
out = _construct_fit_transform(
Estimator,
n_timepoints=n_timepoints,
add_nan=_has_tag(Estimator, "handles-missing-data"),
)
assert isinstance(out, (pd.Series, np.ndarray, pd.DataFrame))
if _has_tag(Estimator, "multivariate-only"):
_check_raises_error(Estimator, n_timepoints=n_timepoints)
else:
out = _construct_fit_transform(
Estimator,
n_timepoints=n_timepoints,
add_nan=_has_tag(Estimator, "handles-missing-data"),
)
assert isinstance(out, (pd.Series, np.ndarray, pd.DataFrame))


def check_series_to_series_transform_multivariate(Estimator):
Expand Down Expand Up @@ -156,7 +159,10 @@ def check_transform_returns_same_time_index(Estimator):

def check_transform_inverse_transform_equivalent(Estimator):
estimator = _construct_instance(Estimator)
X = _make_args(estimator, "fit")[0]
if _has_tag(Estimator, "multivariate-only"):
X = _make_args(estimator, "fit", n_columns=3)[0]
else:
X = _make_args(estimator, "fit")[0]
Xt = estimator.fit_transform(X)
Xit = estimator.inverse_transform(Xt)
_assert_array_almost_equal(X, Xit)
Expand Down