Skip to content

Commit

Permalink
[ENH] remove unneccesary conversion in TSFreshFeatureExtractor (#4571)
Browse files Browse the repository at this point in the history
This PR:

* removes an unneccesary conversion in `TSFreshFeatureExtractor`, it
would convert first to `nested_univ` then to `pd-long`. This is
unnecessary, it should convert directly to `pd-long`.
* adds a direct converter from `pd-multiindex` to `pd-long`, removing
the need to rely on the dodgy `nested_univ` data format.

Depends on #4573
  • Loading branch information
fkiraly committed May 17, 2023
1 parent 1784531 commit 89abb93
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 22 deletions.
28 changes: 28 additions & 0 deletions sktime/datatypes/_panel/_convert.py
Expand Up @@ -569,6 +569,30 @@ def from_long_to_nested_adp(obj, store=None):
convert_dict[("pd-long", "nested_univ", "Panel")] = from_nested_to_long_adp


def from_multiindex_to_long(obj, store=None):

X_long = pd.melt(obj, value_vars=obj.columns, ignore_index=False)
X_long = X_long.reset_index()

return X_long


convert_dict[("pd-multiindex", "pd-long", "Panel")] = from_multiindex_to_long


def from_long_to_multiindex(obj, store=None):

ixcols = obj.columns[[0, 1]]
Xmi = pd.pivot(obj, columns=obj.columns[2], values=obj.columns[3], index=ixcols)

Xmi.columns.name = None

return Xmi


convert_dict[("pd-long", "pd-multiindex", "Panel")] = from_long_to_multiindex


def from_multi_index_to_3d_numpy(X):
"""Convert pandas multi-index Panel to numpy 3D Panel.
Expand Down Expand Up @@ -1097,6 +1121,10 @@ def from_numpyflat_to_numpy3d(obj, store=None):
"numpyflat", "numpy3D", convert_dict, mtype_universe=MTYPE_LIST_PANEL
)

_extend_conversions(
"pd-long", "pd-multiindex", convert_dict, mtype_universe=MTYPE_LIST_PANEL
)


if _check_soft_dependencies("dask", severity="none"):
from sktime.datatypes._adapter.dask_to_pd import (
Expand Down
32 changes: 10 additions & 22 deletions sktime/transformations/panel/tsfresh.py
Expand Up @@ -5,9 +5,6 @@
__author__ = ["AyushmaanSeth", "mloning", "alwinw", "MatthewMiddlehurst"]
__all__ = ["TSFreshFeatureExtractor", "TSFreshRelevantFeatureExtractor"]

from warnings import warn

from sktime.datatypes._panel._convert import from_nested_to_long
from sktime.transformations.base import BaseTransformer
from sktime.utils.validation import check_n_jobs

Expand All @@ -25,7 +22,6 @@ class _TSFreshFeatureExtractor(BaseTransformer):
"y_inner_mtype": "None", # which mtypes do _fit/_predict support for X?
"fit_is_empty": True, # is fit empty and can be skipped? Yes = True
"python_dependencies": "tsfresh",
"python_version": "<3.10",
}

def __init__(
Expand Down Expand Up @@ -229,6 +225,8 @@ class TSFreshFeatureExtractor(_TSFreshFeatureExtractor):
>>> X_transform2 = ts_custom.fit_transform(X_train) # doctest: +SKIP
"""

_tags = {"X_inner_mtype": "pd-long"}

def __init__(
self,
default_fc_parameters="efficient",
Expand Down Expand Up @@ -275,34 +273,24 @@ def _transform(self, X, y=None):
each cell of Xt contains pandas.Series
transformed version of X
"""
# tsfresh requires unique index, returns only values for
# unique index values
if X.index.nunique() < X.shape[0]:
warn(
"tsfresh requires a unique index, but found "
"non-unique. To avoid this warning, please make sure the index of X "
"contains only unique values."
)
X = X.reset_index(drop=True)

Xt = from_nested_to_long(X)

# lazy imports to avoid hard dependency
from tsfresh import extract_features

Xt = extract_features(
Xt,
column_id="index",
column_value="value",
column_kind="column",
column_sort="time_index",
X,
column_id=X.columns[0],
column_value=X.columns[3],
column_kind=X.columns[2],
column_sort=X.columns[1],
**self.default_fc_parameters_,
)

# When using the long input format, tsfresh seems to sort the index,
# here we make sure we return the dataframe in the sort order as the
# input data
return Xt.reindex(X.index)
instances = X.iloc[:, 0].unique()
Xt = Xt.reindex(instances)
return Xt

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down

0 comments on commit 89abb93

Please sign in to comment.