Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] generalize AggrDist and FlatDist to allow arbitrary callables, including sklearn kernel functions #3956

Merged
merged 3 commits into from Dec 25, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
72 changes: 56 additions & 16 deletions sktime/dists_kernels/compose_tab_to_panel.py
Expand Up @@ -12,7 +12,10 @@

import numpy as np

from sktime.dists_kernels._base import BasePairwiseTransformerPanel
from sktime.dists_kernels._base import (
BasePairwiseTransformer,
BasePairwiseTransformerPanel,
)
from sktime.utils._testing.deep_equals import deep_equals


Expand Down Expand Up @@ -49,6 +52,16 @@ class AggrDist(BasePairwiseTransformerPanel):
aggfunc(matrix) = aggfunc(np.transpose(matrix))
used for fast computation of the resultant matrix (if symmetric)
if unknown, False is the "safe" option that ensures correctness

Examples
--------
Mean pairwise euclidean distance between between time series
>>> from sktime.dists_kernels import AggrDist, ScipyDist
>>> mean_euc_tsdist = AggrDist(ScipyDist())

Mean pairwise Gaussian kernel between time series
>>> from sklearn.gaussian_process.kernels import RBF
>>> mean_gaussian_tskernel = AggrDist(RBF())
"""

def __init__(
Expand Down Expand Up @@ -96,11 +109,15 @@ def _transform(self, X, X2=None):

aggfunc = self.aggfunc
aggfunc_is_symm = self.aggfunc_is_symm
transformer = self.transformer
if aggfunc is None:
aggfunc = np.mean
aggfunc_is_symm = True

transformer_symm = self.transformer.get_tag("symmetric", False)
if isinstance(transformer, BasePairwiseTransformer):
transformer_symm = transformer.get_tag("symmetric", False)
else:
transformer_symm = False

# whether we know that resulting matrix must be symmetric
# a sufficient condition for this:
Expand All @@ -115,31 +132,36 @@ def _transform(self, X, X2=None):
if all_symm and j < i:
distmat[i, j] = distmat[j, i]
else:
distmat[i, j] = aggfunc(self.transformer.transform(X[i], X2[j]))
distmat[i, j] = aggfunc(self.transformer(X[i], X2[j]))

return distmat

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Test parameters for AggrDist."""
# importing inside to avoid circular dependencies
from sklearn.gaussian_process.kernels import RBF

from sktime.dists_kernels import ScipyDist

return [
{"transformer": ScipyDist(), "aggfunc_is_symm": True},
{"transformer": ScipyDist(), "aggfunc_is_symm": False},
]
params1 = {"transformer": ScipyDist(), "aggfunc_is_symm": True}
params2 = {"transformer": ScipyDist(), "aggfunc_is_symm": False}

# using callable from sklearn
params3 = {"transformer": RBF()}

return [params1, params2, params3]


class FlatDist(BasePairwiseTransformerPanel):
r"""Panel distance from applying tabular distance to flattened time series.
r"""Panel distance or kernel from applying tabular trafo to flattened time series.

Applies the wrapped tabular distance to flattened series.
Applies the wrapped tabular distance or kernel to flattened series.
Flattening is done to a 2D numpy array of shape (n_instances, (n_vars, n_timepts))

Formal details (for real valued objects, mixed typed rows in analogy):
Let :math:`d:\mathbb{R}^k \times \mathbb{R}^{k}\rightarrow \mathbb{R}`
be the pairwise function in `transformer`, when applied to `k`-vectors.
be the pairwise function in `transformer`, when applied to `k`-vectors
(here, :math:`d` could be a distance function or a kernel function).
Let :math:`x_1, \dots, x_N\in \mathbb{R}^{n \times \ell}`,
:math:`y_1, \dots y_M \in \mathbb{R}^{n \times \ell}` be collections of matrices,
representing time series panel valued inputs `X` and `X2`, as follows:
Expand All @@ -154,7 +176,18 @@ class FlatDist(BasePairwiseTransformerPanel):

Parameters
----------
transformer: pairwise transformer of BasePairwiseTransformer scitype
transformer: pairwise transformer of BasePairwiseTransformer scitype, or
callable np.ndarray (n_samples, d) x (n_samples, d) -> (n_samples x n_samples)

Examples
--------
Euclidean distance between time series of equal length, considered as vectors
>>> from sktime.dists_kernels import FlatDist, ScipyDist
>>> euc_tsdist = FlatDist(ScipyDist())

Gaussian kernel between time series of equal length, considered as vectors
>>> from sklearn.gaussian_process.kernels import RBF
>>> flat_gaussian_tskernel = FlatDist(RBF())
"""

_tags = {
Expand Down Expand Up @@ -196,14 +229,21 @@ def _transform(self, X, X2=None):
X2 = X2.reshape(n_inst2, n_vars2 * n_tts2)

if deep_equals(X, X2):
return self.transformer.transform(X)
return self.transformer(X)
else:
return self.transformer.transform(X, X2)
return self.transformer(X, X2)

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Test parameters for FlatDist."""
# importing inside to avoid circular dependencies
from sklearn.gaussian_process.kernels import RBF

from sktime.dists_kernels import ScipyDist

return {"transformer": ScipyDist()}
# using sktime pairwise transformer
params1 = {"transformer": ScipyDist()}

# using callable from sklearn
params2 = {"transformer": RBF()}

return [params1, params2]