-
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
_ytox.py
178 lines (152 loc) · 6.16 KB
/
_ytox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
"""Use endogeneous as exogeneous features transformer."""
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
__author__ = ["fkiraly"]
__all__ = ["YtoX"]
from sktime.transformations.base import BaseTransformer
class YtoX(BaseTransformer):
"""Create exogeneous features which are a copy of the endogenous data.
Replaces exogeneous features (``X``) by endogeneous data (``y``).
To *add* instead of *replace*, use ``FeatureUnion``.
Common use cases include:
* creating exogeneous variables from transformed endogenous variables
* creating exogeneous data from index, if no exogeneous data is available
* manual construction of reduction strategies, in combination with ``YfromX``
Parameters
----------
subset_index : boolean, optional, default=False
if True, subsets the output of ``transform`` to ``X.index``,
i.e., outputs ``y.loc[X.index]``
Examples
--------
Use case: creating exogenous data from index, if no exogenous data is available.
>>> from sktime.datasets import load_airline
>>> from sktime.transformations.compose import YtoX
>>> from sktime.transformations.series.fourier import FourierFeatures
>>> from sktime.forecasting.arima import ARIMA
>>> from sktime.forecasting.compose import ForecastingPipeline
>>>
>>> # data with no exogenous features
>>> y = load_airline()
>>>
>>> # create a pipeline with Fourier features and ARIMA
>>> pipe = ForecastingPipeline(
... [
... YtoX(),
... FourierFeatures(sp_list=[24, 24 * 7], fourier_terms_list=[10, 5]),
... ARIMA(order=(1, 1, 1)) # doctest: +SKIP,
... ]
... ) # doctest: +SKIP
>>>
>>> # fit and forecast, using Fourier features as exogenous data
>>> pred = pipe.fit_predict(y, fh=[1, 2, 3, 4, 5]) # doctest: +SKIP
Use case: using lagged endogenous variables as exogeneous data.
>>> from sktime.datasets import load_airline
>>> from sktime.transformations.compose import YtoX
>>> from sktime.transformations.series.lag import Lag
>>> from sktime.transformations.series.impute import Imputer
>>> from sktime.forecasting.sarimax import SARIMAX
>>>
>>> # data with no exogenous features
>>> y = load_airline()
>>>
>>> # create the pipeline
>>> lagged_y_trafo = YtoX() * Lag(1, index_out="original") * Imputer()
>>>
>>> # we need to specify index_out="original" as otherwise ARIMA gets 1 and 2 ahead
>>> # use lagged_y_trafo to generate X
>>> forecaster = lagged_y_trafo ** SARIMAX() # doctest: +SKIP
>>>
>>> # fit and forecast next value, with lagged y as exogenous data
>>> forecaster.fit(y, fh=[1]) # doctest: +SKIP
>>> y_pred = forecaster.predict() # doctest: +SKIP
Use case: using summarized endogenous variables as exogeneous data.
>>> from sktime.datasets import load_airline
>>> from sktime.transformations.series.summarize import WindowSummarizer
>>> from sktime.transformations.compose import YtoX
>>> from sktime.forecasting.compose import make_reduction
>>> from sktime.forecasting.compose import ForecastingPipeline
>>> from sklearn.ensemble import GradientBoostingRegressor # doctest: +SKIP
>>>
>>> # data with no exogenous features
>>> y = load_airline()
>>>
>>> # keyword arguments for WindowSummarizer
>>> kwargs = {
... "lag_feature": {
... "lag": [1],
... "mean": [[1, 3], [3, 6]],
... "std": [[1, 4]],
... },
... "truncate": 'bfill',
... }
>>>
>>> # create forecaster from sklearn regressor using make_reduction
>>> forecaster = make_reduction(
... GradientBoostingRegressor(),
... strategy="recursive",
... pooling="global",
... window_length=12,
... ) # doctest: +SKIP
>>>
>>> # create the pipeline
>>> pipe = ForecastingPipeline(
... steps=[
... ("ytox", YtoX()),
... ("summarizer", WindowSummarizer(**kwargs)),
... ("forecaster", forecaster),
... ]
... ) # doctest: +SKIP
>>>
>>> # fit and forecast, with summarized y as exogenous data
>>> preds = pipe.fit_predict(y=y, fh=range(1, 20)) # doctest: +SKIP
"""
_tags = {
"authors": ["fkiraly"],
"transform-returns-same-time-index": True,
"skip-inverse-transform": False,
"univariate-only": False,
"X_inner_mtype": ["pd.DataFrame", "pd-multiindex", "pd_multiindex_hier"],
"y_inner_mtype": ["pd.DataFrame", "pd-multiindex", "pd_multiindex_hier"],
"scitype:y": "both",
"fit_is_empty": True,
"requires_X": False,
"requires_y": True,
}
def __init__(self, subset_index=False):
self.subset_index = subset_index
super().__init__()
def _transform(self, X, y=None):
"""Transform X and return a transformed version.
private _transform containing core logic, called from transform
Parameters
----------
X : time series or panel in one of the pd.DataFrame formats
Data to be transformed
y : time series or panel in one of the pd.DataFrame formats
Additional data, e.g., labels for transformation
Returns
-------
y, as a transformed version of X
"""
if self.subset_index:
return y.loc[X.index.intersection(y.index)]
else:
return y
def _inverse_transform(self, X, y=None):
"""Inverse transform, inverse operation to transform.
Drops featurized column that was added in transform().
Parameters
----------
X : Series or Panel of mtype X_inner_mtype
if X_inner_mtype is list, _inverse_transform must support all types in it
Data to be inverse transformed
y : Series or Panel of mtype y_inner_mtype, optional (default=None)
Additional data, e.g., labels for transformation
Returns
-------
inverse transformed version of X
"""
if self.subset_index:
return y.loc[X.index.intersection(y.index)]
else:
return y