From 65a530408b2b26978eff1f7961ff8de3f93d1acb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Tue, 27 Dec 2022 00:46:02 +0000 Subject: [PATCH] [ENH] dunders for time series distances and kernels (#3949) This PR adds dunders for time series distances and kernels (descendants of `BasePairwiseTransformerPanel`), behaving as per likely user expectation, description as below. It also adds tests for the different combinations of dunders and estimators. ### algebraic operations between time series distances and kernels * `d = dist1 * dist2` satisfies `d(X1, X2) == dist1(X1, X2) * dist2(X1, X2)`, for all pairwise transformers `dist1`, `dist2`, equality for all elements of the resulting matrix * `d = dist1 + dist2` gives `d(X1, X2) == dist1(X1, X2) + dist2(X1, X2)`, for all pairwise transformers `dist1`, `dist2` * for a pairwise distance `dist` and an int or float `const`, `d = dist * const` or `d = const * dist` gives `d(X1, X2) == dist(X1, X2) * const` * for a pairwise distance `dist` and an int or float `const`, `d = dist + const` or `d = const + dist` gives `d(X1, X2) == dist(X1, X2) + const` ### pipeline concatenation between ordinary transformers and time series distances or kernels * for a transformer `trafo` and a pairwise `dist`, the esteimator `pipe = trafo * dist` is also a pairwise distance, with `pipe(X1, X2) == dist(trafo.fit_transform(X1), trafo.fit_transform(X2))` * above, the transformer `trafo` can be an `sktime` transformer, or an `sklearn` transformer (which is coerced/wrapped) This especially may be interesting for users with a research interest in time series classification or clustering, as it allows to obtain common time series distances easily as composition of others. E.g., ddtw (for common definitions of ddtw) is the same as `Differencer() * DtwDist()` (first difference, then dtw distance). Higher order differences or other combinations are also easy to obtain by this. --- sktime/base/_meta.py | 26 ++- sktime/dists_kernels/_base.py | 121 +++++++++++++ sktime/dists_kernels/algebra.py | 71 ++++++++ .../tests/test_dist_kernels_dunders.py | 167 ++++++++++++++++++ 4 files changed, 382 insertions(+), 3 deletions(-) create mode 100644 sktime/dists_kernels/tests/test_dist_kernels_dunders.py diff --git a/sktime/base/_meta.py b/sktime/base/_meta.py index 687085cd9d5..75e166485ac 100644 --- a/sktime/base/_meta.py +++ b/sktime/base/_meta.py @@ -402,7 +402,13 @@ def _make_strings_unique(self, strlist): return self._make_strings_unique(uniquestr) def _dunder_concat( - self, other, base_class, composite_class, attr_name="steps", concat_order="left" + self, + other, + base_class, + composite_class, + attr_name="steps", + concat_order="left", + composite_params=None, ): """Concatenate pipelines for dunder parsing, helper function. @@ -426,6 +432,9 @@ def _dunder_concat( concat_order : str, one of "left" and "right", optional, default="left" if "left", result attr_name will be like self.attr_name + other.attr_name if "right", result attr_name will be like other.attr_name + self.attr_name + composite_params : dict, optional, default=None; else, pairs strname-value + if not None, parameters of the composite are always set accordingly + i.e., contains key-value pairs, and composite_class has key set to value Returns ------- @@ -488,11 +497,22 @@ def concat(x, y): else: return NotImplemented + # create the "steps" param for the composite # if all the names are equal to class names, we eat them away if all(type(x[1]).__name__ == x[0] for x in zip(new_names, new_ests)): - return composite_class(**{attr_name: list(new_ests)}) + step_param = {attr_name: list(new_ests)} else: - return composite_class(**{attr_name: list(zip(new_names, new_ests))}) + step_param = {attr_name: list(zip(new_names, new_ests))} + + # retrieve other parameters, from composite_params attribute + if composite_params is None: + composite_params = {} + else: + composite_params = composite_params.copy() + + # construct the composite with both step and additional params + composite_params.update(step_param) + return composite_class(**composite_params) def _anytagis(self, tag_name, value, estimators): """Return whether any estimator in list has tag `tag_name` of value `value`. diff --git a/sktime/dists_kernels/_base.py b/sktime/dists_kernels/_base.py index e2778296d83..ab2f6276a82 100644 --- a/sktime/dists_kernels/_base.py +++ b/sktime/dists_kernels/_base.py @@ -228,6 +228,127 @@ def __call__(self, X, X2=None): # this just defines __call__ as an alias for transform return self.transform(X=X, X2=X2) + def __mul__(self, other): + """Magic * method, return (right) multiplied CombinedDistance. + + Implemented for `other` being: + * a pairwise panel transformer, then `CombinedDistance([other, self], "*")` + + Parameters + ---------- + other: one of: + * `sktime` transformer, must inherit from BaseTransformer, + otherwise, `NotImplemented` is returned (leads to further dispatch by rmul) + + Returns + ------- + CombinedDistance object, + algebraic multiplication of `self` (first) with `other` (last). + not nested, contains only non-CombinedDistance `sktime` transformers + """ + from sktime.dists_kernels.algebra import CombinedDistance + from sktime.dists_kernels.dummy import ConstantPwTrafoPanel + + # when other is an integer or float, treat it as constant distance/kernel + if isinstance(other, (int, float)): + other = ConstantPwTrafoPanel(constant=other) + + # we wrap self in a CombinedDistance, and concatenate with the other + # the CombinedDistance does the rest, e.g., dispatch on other + if isinstance(other, BasePairwiseTransformerPanel): + if not isinstance(self, CombinedDistance): + self_as_pipeline = CombinedDistance(pw_trafos=[self], operation="*") + else: + self_as_pipeline = self + return self_as_pipeline * other + # otherwise, we let the right operation handle the remaining dispatch + else: + return NotImplemented + + def __rmul__(self, other): + """Magic * method, return (right) PwTrafoPanelPipeline or CombinedDistance. + + Implemented for `other` being: + * a transformer, then `PwTrafoPanelPipeline([other, self])` is returned + * sklearn transformers are coerced via TabularToSeriesAdaptor + + Parameters + ---------- + other: `sktime` transformer, must inherit from BaseTransformer + otherwise, `NotImplemented` is returned + + Returns + ------- + PwTrafoPanelPipeline object, + concatenation of `other` (first) with `self` (last). + not nested, contains only non-TransformerPipeline `sktime` steps + """ + from sktime.dists_kernels.compose import PwTrafoPanelPipeline + from sktime.dists_kernels.dummy import ConstantPwTrafoPanel + from sktime.transformations.base import BaseTransformer + from sktime.transformations.compose import TransformerPipeline + from sktime.transformations.series.adapt import TabularToSeriesAdaptor + from sktime.utils.sklearn import is_sklearn_transformer + + # when other is an integer or float, treat it as constant distance/kernel + if isinstance(other, (int, float)): + other = ConstantPwTrafoPanel(constant=other) + + # behaviour is implemented only if other inherits from BaseTransformer + # in that case, distinctions arise from whether self or other is a pipeline + # todo: this can probably be simplified further with "zero length" pipelines + if isinstance(other, BaseTransformer): + # PwTrafoPanelPipeline already has the dunder method defined + if isinstance(self, PwTrafoPanelPipeline): + return other * self + # if other is a TransformerPipeline but self is not, first unwrap it + elif isinstance(other, TransformerPipeline): + return PwTrafoPanelPipeline(pw_trafo=self, transformers=other.steps) + # if neither self nor other are a pipeline, construct a PwTrafoPanelPipeline + else: + return PwTrafoPanelPipeline(pw_trafo=self, transformers=[other]) + elif is_sklearn_transformer(other): + return TabularToSeriesAdaptor(other) * self + else: + return NotImplemented + + def __add__(self, other): + """Magic + method, return (right) added CombinedDistance. + + Implemented for `other` being: + * a pairwise panel transformer, then `CombinedDistance([other, self], "+")` + + Parameters + ---------- + other: one of: + * `sktime` transformer, must inherit from BaseTransformer, + otherwise, `NotImplemented` is returned (leads to further dispatch by rmul) + + Returns + ------- + CombinedDistance object, + algebraic addition of `self` (first) with `other` (last). + not nested, contains only non-CombinedDistance `sktime` transformers + """ + from sktime.dists_kernels.algebra import CombinedDistance + from sktime.dists_kernels.dummy import ConstantPwTrafoPanel + + # when other is an integer or float, treat it as constant distance/kernel + if isinstance(other, (int, float)): + other = ConstantPwTrafoPanel(constant=other) + + # we wrap self in a CombinedDistance, and concatenate with the other + # the CombinedDistance does the rest, e.g., dispatch on other + if isinstance(other, BasePairwiseTransformerPanel): + if not isinstance(self, CombinedDistance): + self_as_pipeline = CombinedDistance(pw_trafos=[self], operation="+") + else: + self_as_pipeline = self + return self_as_pipeline + other + # otherwise, we let the right operation handle the remaining dispatch + else: + return NotImplemented + def transform(self, X, X2=None): """Compute distance/kernel matrix. diff --git a/sktime/dists_kernels/algebra.py b/sktime/dists_kernels/algebra.py index 5a3c2235c30..04352be32de 100644 --- a/sktime/dists_kernels/algebra.py +++ b/sktime/dists_kernels/algebra.py @@ -108,6 +108,77 @@ def _pw_trafos(self): def _pw_trafos(self, value): self.pw_trafos = value + def _algebra_dunder_concat(self, other, operation): + """Return (right) concat CombinedDistance, common boilerplate for dunders. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` pairwise transformer, must inherit BasePairwiseTransformerPanel + otherwise, `NotImplemented` is returned + operation: operation string used in CombinedDistance for the dunder. + Must be equal to the operation of the dunder, not of self. + + Returns + ------- + CombinedDistance object, concat of `self` (first) with `other` (last). + does not contain CombinedDistance `sktime` transformers with same operation + (but may nest CombinedDistance with different operations) + """ + if self.operation == operation: + # if other is CombinedDistance but with different operation, + # we need to wrap it, or _dunder_concat would overwrite the operation + if isinstance(other, CombinedDistance) and not other.operation == operation: + other = CombinedDistance([other], operation=operation) + return self._dunder_concat( + other=other, + base_class=BasePairwiseTransformerPanel, + composite_class=CombinedDistance, + attr_name="pw_trafos", + concat_order="left", + composite_params={"operation": operation}, + ) + elif isinstance(other, BasePairwiseTransformerPanel): + return CombinedDistance([self, other], operation=operation) + else: + return NotImplemented + + def __mul__(self, other): + """Magic * method, return (right) multiplied CombinedDistance. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` pairwise transformer, must inherit BasePairwiseTransformerPanel + otherwise, `NotImplemented` is returned + + Returns + ------- + CombinedDistance object, algebraic * of `self` (first) with `other` (last). + does not contain CombinedDistance `sktime` transformers with same operation + (but may nest CombinedDistance with different operations) + """ + return self._algebra_dunder_concat(other=other, operation="*") + + def __add__(self, other): + """Magic + method, return (right) multiplied CombinedDistance. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` pairwise transformer, must inherit BasePairwiseTransformerPanel + otherwise, `NotImplemented` is returned + + Returns + ------- + CombinedDistance object, algebraic + of `self` (first) with `other` (last). + not nested, contains only non-CombinedDistance `sktime` transformers + """ + return self._algebra_dunder_concat(other=other, operation="+") + def _transform(self, X, X2=None): """Compute distance/kernel matrix. diff --git a/sktime/dists_kernels/tests/test_dist_kernels_dunders.py b/sktime/dists_kernels/tests/test_dist_kernels_dunders.py new file mode 100644 index 00000000000..a3c5f314dd0 --- /dev/null +++ b/sktime/dists_kernels/tests/test_dist_kernels_dunders.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +"""Tests for pairwise panel transformer dunders.""" + +__author__ = ["fkiraly"] + +import numpy as np +import pytest + +from sktime.dists_kernels.algebra import CombinedDistance +from sktime.dists_kernels.compose import PwTrafoPanelPipeline +from sktime.dists_kernels.edit_dist import EditDist +from sktime.utils._testing.panel import _make_panel_X + +X1 = _make_panel_X( + n_instances=5, n_columns=5, n_timepoints=5, random_state=1, all_positive=True +) +X2 = _make_panel_X( + n_instances=6, n_columns=5, n_timepoints=5, random_state=2, all_positive=True +) + + +def test_mul_algebra_dunder(): + """Test multiplication dunder, algebraic case (two panel distances).""" + t1 = EditDist() + t2 = EditDist(distance="edr") + t3 = EditDist(distance="erp") + + m1 = t1.transform(X1, X2) + m2 = t2.transform(X1, X2) + m3 = t3.transform(X1, X2) + + t12 = t1 * t2 + assert isinstance(t12, CombinedDistance) + assert len(t12.pw_trafos) == 2 + assert t12.get_params()["operation"] == "*" + + m12 = t12.transform(X1, X2) + assert np.allclose(m12, m1 * m2) + + t123 = t1 * t2 * t3 + assert isinstance(t123, CombinedDistance) + assert len(t123.pw_trafos) == 3 + assert t123.get_params()["operation"] == "*" + + m123 = t123.transform(X1, X2) + assert np.allclose(m123, m1 * m2 * m3) + + t123r = t1 * (t2 * t3) + assert isinstance(t123r, CombinedDistance) + assert len(t123r.pw_trafos) == 3 + assert t123r.get_params()["operation"] == "*" + + m123r = t123r.transform(X1, X2) + assert np.allclose(m123r, m1 * m2 * m3) + + +def test_add_algebra_dunder(): + """Test addition dunder, algebraic case (two panel distances).""" + t1 = EditDist() + t2 = EditDist(distance="edr") + t3 = EditDist(distance="erp") + + m1 = t1.transform(X1, X2) + m2 = t2.transform(X1, X2) + m3 = t3.transform(X1, X2) + + t12 = t1 + t2 + assert isinstance(t12, CombinedDistance) + assert len(t12.pw_trafos) == 2 + assert t12.get_params()["operation"] == "+" + + m12 = t12.transform(X1, X2) + assert np.allclose(m12, m1 + m2) + + t123 = t1 + t2 + t3 + assert isinstance(t123, CombinedDistance) + assert len(t123.pw_trafos) == 3 + assert t123.get_params()["operation"] == "+" + + m123 = t123.transform(X1, X2) + assert np.allclose(m123, m1 + m2 + m3) + + t123r = t1 + (t2 + t3) + assert isinstance(t123r, CombinedDistance) + assert len(t123r.pw_trafos) == 3 + assert t123r.get_params()["operation"] == "+" + + m123r = t123r.transform(X1, X2) + assert np.allclose(m123r, m1 + m2 + m3) + + +def test_mixed_algebra_dunders(): + """Test mix of algebraic dunders.""" + t1 = EditDist() + t2 = EditDist(distance="edr") + t3 = EditDist(distance="erp") + + m1 = t1.transform(X1, X2) + m2 = t2.transform(X1, X2) + m3 = t3.transform(X1, X2) + + t123 = t1 * t2 + t3 + assert isinstance(t123, CombinedDistance) + assert len(t123.pw_trafos) == 2 + assert t123.get_params()["operation"] == "+" + t12 = t123.pw_trafos[0] + assert isinstance(t12, CombinedDistance) + assert len(t12.pw_trafos) == 2 + assert t12.get_params()["operation"] == "*" + + m123 = t123.transform(X1, X2) + assert np.allclose(m123, m1 * m2 + m3) + + +def test_pw_trafo_pipeline_mul_dunder(): + """Tests creation of pairwise panel trafo pipeliens using mul dunder.""" + from sktime.transformations.series.exponent import ExponentTransformer + + t3 = EditDist() + t1 = ExponentTransformer(2) + t2 = ExponentTransformer(0.5) + + m3 = t3.transform(X1, X2) + + t23 = t2 * t3 + assert isinstance(t23, PwTrafoPanelPipeline) + assert len(t23.transformers) == 1 + + m23 = t23.transform(X1, X2) + X1t = t2.clone().fit_transform(X1) + X2t = t2.clone().fit_transform(X2) + m23_manual = t3.transform(X1t, X2t) + assert np.allclose(m23, m23_manual) + + t123 = t1 * t2 * t3 + assert isinstance(t123, PwTrafoPanelPipeline) + assert len(t123.transformers) == 2 + + m123 = t123.transform(X1, X2) + + assert np.allclose(m123, m3) + + +@pytest.mark.parametrize("constant", [0, 1, -0.25]) +def test_dunders_with_constants(constant): + """Tests creation of pairwise panel trafo pipeliens using mul dunder.""" + t = EditDist() + + m = t.transform(X1, X2) + + tplusc = t + constant + + assert isinstance(tplusc, CombinedDistance) + assert len(tplusc.pw_trafos) == 2 + assert tplusc.get_params()["operation"] == "+" + + mplusc = tplusc.transform(X1, X2) + assert np.allclose(m + constant, mplusc) + + ttimesc = t * constant + + assert isinstance(ttimesc, CombinedDistance) + assert len(ttimesc.pw_trafos) == 2 + assert ttimesc.get_params()["operation"] == "*" + + mtimesc = ttimesc.transform(X1, X2) + assert np.allclose(m * constant, mtimesc)