/
reduce.py
185 lines (155 loc) · 6.56 KB
/
reduce.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
"""Tabularizer transform, for pipelining."""
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
__author__ = ["mloning", "fkiraly", "kcc-lion"]
__all__ = ["Tabularizer"]
import warnings
import numpy as np
import pandas as pd
from sktime.datatypes import convert, convert_to
from sktime.transformations.base import BaseTransformer
from sktime.utils.pandas import df_map
class Tabularizer(BaseTransformer):
"""A transformer that turns time series/panel data into tabular data.
This estimator converts nested pandas dataframe containing time-series/panel data
with numpy arrays or pandas Series in dataframe cells into a tabular pandas
dataframe with only primitives in cells. This is useful for transforming time-
series/panel data into a format that is accepted by standard validation learning
algorithms (as in sklearn).
"""
_tags = {
"authors": ["mloning", "fkiraly", "kcc-lion"],
"fit_is_empty": True,
"univariate-only": False,
"scitype:transform-input": "Series",
# what is the scitype of X: Series, or Panel
"scitype:transform-output": "Primitives",
# what is the scitype of y: None (not needed), Primitives, Series, Panel
"scitype:instancewise": True, # is this an instance-wise transform?
"X_inner_mtype": ["nested_univ", "numpy3D"],
# which mtypes do _fit/_predict support for X?
"y_inner_mtype": "None", # and for y?
}
def _transform(self, X, y=None):
"""Transform nested pandas dataframe into tabular dataframe.
Parameters
----------
X : pandas DataFrame or 3D np.ndarray
panel of time series to transform
y : ignored argument for interface compatibility
Returns
-------
Xt : pandas DataFrame
Transformed dataframe with only primitives in cells.
"""
Xt = convert_to(X, to_type="numpyflat", as_scitype="Panel")
return Xt
def inverse_transform(self, X, y=None):
"""Transform tabular pandas dataframe into nested dataframe.
Parameters
----------
X : pandas DataFrame
Tabular dataframe with primitives in cells.
y : array-like, optional (default=None)
Returns
-------
Xt : pandas DataFrame
Transformed dataframe with series in cells.
"""
Xt = convert(X, from_type="numpyflat", to_type="numpy3D", as_scitype="Panel")
return Xt
class TimeBinner(BaseTransformer):
"""Turns time series/panel data into tabular data based on intervals.
This estimator converts nested pandas dataframe containing
time-series/panel data with numpy arrays or pandas Series in
dataframe cells into a tabular pandas dataframe with only primitives in
cells. The primitives are calculated based on Intervals defined
by the IntervalIndex and aggregated by aggfunc.
This is useful for transforming time-series/panel data
into a format that is accepted by standard validation learning
algorithms (as in sklearn).
Parameters
----------
idx : pd.IntervalIndex
IntervalIndex defining intervals considered by aggfunc
aggfunc : callable
Function used to aggregate the values in intervals.
Should have signature 1D -> float and defaults
to mean if None
"""
_tags = {
"authors": ["kcc-lion", "fkiraly"],
"maintainers": ["kcc-lion"],
"fit_is_empty": True,
"univariate-only": False,
"scitype:transform-input": "Series",
# what is the scitype of X: Series, or Panel
"scitype:transform-output": "Primitives",
# what is the scitype of y: None (not needed), Primitives, Series, Panel
"scitype:instancewise": True, # is this an instance-wise transform?
"X_inner_mtype": ["nested_univ"],
# which mtypes do _fit/_predict support for X?
"y_inner_mtype": "None", # and for y?
}
def __init__(self, idx, aggfunc=None):
assert isinstance(
idx, pd.IntervalIndex
), "idx should be of type pd.IntervalIndex"
self.aggfunc = aggfunc
if self.aggfunc is None:
self._aggfunc = np.mean
warnings.warn(
"No aggfunc was passed, defaulting to mean",
stacklevel=2,
)
else:
assert callable(aggfunc), (
"aggfunc should be callable with" "signature 1D -> float"
)
if aggfunc.__name__ == "<lambda>":
warnings.warn(
"Save and load will not work with lambda functions",
stacklevel=2,
)
self._aggfunc = self.aggfunc
self.idx = idx
super().__init__()
def _transform(self, X, y=None):
"""Transform X and return a transformed version.
private _transform containing core logic, called from transform
Parameters
----------
X : Series or Panel of mtype X_inner_mtype
if X_inner_mtype is list, _transform must support all types in it
Data to be transformed
y : Series or Panel of mtype y_inner_mtype, default=None
Additional data, e.g., labels for transformation
Returns
-------
transformed version of X
"""
idx = pd.cut(X.iloc[0, 0].index, bins=self.idx, include_lowest=True)
Xt = df_map(X)(lambda x: x.groupby(idx).apply(self._aggfunc))
Xt = convert_to(Xt, to_type="numpyflat", as_scitype="Panel")
return Xt
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return ``"default"`` set.
There are currently no reserved values for transformers.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
``MyClass(**params)`` or ``MyClass(**params[i])`` creates a valid test
instance.
``create_test_instance`` uses the first (or only) dictionary in ``params``
"""
import pandas as pd
idx = pd.interval_range(start=0, end=100, freq=10, closed="left")
params = {"idx": idx}
return params