From 8610203849b40486bbb01fc6025934b38b1e91a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 9 Apr 2022 11:28:24 +0100 Subject: [PATCH 1/5] [ENH] `NaiveVariance`: verbose arg and extended docstring (#2395) This small PR adds two things to `NaiveVariance`: * a `verbose` arg which is by default `False`, suppressing the many warnings that are raised on toy data * an explanation in the docstring which methods are added and how the wrapped estimator relates to the composite --- sktime/forecasting/naive.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/sktime/forecasting/naive.py b/sktime/forecasting/naive.py index b81b1101959..a5bae3a1b9e 100644 --- a/sktime/forecasting/naive.py +++ b/sktime/forecasting/naive.py @@ -417,10 +417,17 @@ class NaiveVariance(BaseForecaster): - And for the covariance matrix prediction, the formula becomes :math:`Cov(y_k, y_l)=\frac{\sum_{i=1}^N \hat{r}_{k,k+i}*\hat{r}_{l,l+i}}{N}`. + The resulting forecaster will implement + `predict_interval`, `predict_quantiles`, `predict_var`, and `predict_proba`, + even if the wrapped forecaster `forecaster` did not have this capability; + for point forecasts (`predict`), behaves like the wrapped forecaster. + Parameters ---------- forecaster : estimator - Estimators to apply to the input series. + Estimator to which probabilistic forecasts are being added + verbose : bool, optional, default=False + whether to print warnings if windows with too few data points occur Examples -------- @@ -446,9 +453,10 @@ class NaiveVariance(BaseForecaster): # deprecated and likely to be removed in 0.12.0 } - def __init__(self, forecaster): + def __init__(self, forecaster, verbose=False): self.forecaster = forecaster + self.verbose = verbose super(NaiveVariance, self).__init__() tags_to_clone = [ @@ -502,7 +510,7 @@ def _predict_quantiles(self, fh, X=None, alpha=0.5): pred_var = self.predict_var(fh, X) z_scores = norm.ppf(alpha) - errors = [pred_var ** 0.5 * z for z in z_scores] + errors = [pred_var**0.5 * z for z in z_scores] index = pd.MultiIndex.from_product([["Quantiles"], alpha]) pred_quantiles = pd.DataFrame(columns=index) @@ -545,9 +553,11 @@ def _predict_var(self, fh, X=None, cov=False): try: forecaster.fit(subset) except ValueError: - warn( - f"Couldn't fit the model on time series of length {len(subset)}.\n" - ) + if self.verbose: + warn( + f"Couldn't fit the model on " + f"time series window length {len(subset)}.\n" + ) continue y_true = self._y[id:] # subset on which we predict From c8672f90eb6b76f150930e3221bb604739291b81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 9 Apr 2022 11:29:17 +0100 Subject: [PATCH 2/5] [BUG] fixed get_time_index for most mtypes (#2380) This PR fixes an unreported bug with `get_time_index`, as it would not work with most mtypes that it had claimed for in the docstring. From 1e844137729f6786b6a6ab440dc2e1a83c869ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 9 Apr 2022 12:49:01 +0100 Subject: [PATCH 3/5] [ENH] extend `_HeterogeneousMetaEstimator` estimator to allow mixed tuple/estimator list (#2406) This PR extends `_HeterogeneousMetaEstimator` estimator to optionally allow mixed tuple/estimator lists. Detail changes: * `_check_estimators` gets a new `allow_mix` argument, whether lists of mixed estimator/tuple type should be allowed. Default is now `True`. * `get_estimator_sth` functions are refactored and extended to yield the correct result even in the mixed case * `_is_name_and_trafo` utility is generalized to `_is_name_and_est` and moved to the `_HeterogeneousMetaEstimator` As a result, concatenating two pipelines which different internal type (one tuple, one only estimator) should not cause problems anymore. Since this relaxes input assumptions while retaining output guarantees for previously admissible inputs, this is fully downwards compatible and does not require deprecation. --- sktime/base/_meta.py | 134 +++++++++++++++++++++--------- sktime/transformations/compose.py | 22 +---- 2 files changed, 99 insertions(+), 57 deletions(-) diff --git a/sktime/base/_meta.py b/sktime/base/_meta.py index 51dce7547d4..6a0b0ed5575 100644 --- a/sktime/base/_meta.py +++ b/sktime/base/_meta.py @@ -7,6 +7,7 @@ __all__ = ["_HeterogenousMetaEstimator"] from abc import ABCMeta +from inspect import isclass from sklearn import clone @@ -100,7 +101,35 @@ def _subset_dict_keys(self, dict_to_subset, keys): subsetted_dict = dict((k, dict_to_subset[k]) for k in keys_in_both) return subsetted_dict - def _check_estimators(self, estimators, attr_name="steps", cls_type=None): + @staticmethod + def _is_name_and_est(obj, cls_type=None): + """Check whether obj is a tuple of type (str, cls_type). + + Parameters + ---------- + cls_type : class or tuple of class, optional. Default = BaseEstimator. + class(es) that all estimators are checked to be an instance of + + Returns + ------- + bool : True if obj is (str, cls_type) tuple, False otherise + """ + if cls_type is None: + cls_type = BaseEstimator + if not isinstance(obj, tuple) or len(obj) != 2: + return False + if not isinstance(obj[0], str) or not isinstance(obj[1], cls_type): + return False + return True + + def _check_estimators( + self, + estimators, + attr_name="steps", + cls_type=None, + allow_mix=True, + clone_ests=True, + ): """Check that estimators is a list of estimators or list of str/est tuples. Parameters @@ -110,8 +139,12 @@ def _check_estimators(self, estimators, attr_name="steps", cls_type=None): estimators should inherit from cls_type class attr_name : str, optional. Default = "steps" Name of checked attribute in error messages - cls_type : class, optional. Default = BaseEstimator. - class that all estimators are checked to be an instance of + cls_type : class or tuple of class, optional. Default = BaseEstimator. + class(es) that all estimators are checked to be an instance of + allow_mix : boolean, optional. Default = True. + whether mix of estimator and (str, estimator) is allowed in `estimators` + clone_ests : boolean, optional. Default = True. + whether estimators in return are cloned (True) or references (False). Returns ------- @@ -129,9 +162,15 @@ class that all estimators are checked to be an instance of " of estimators, or a list of (string, estimator) tuples. " ) if cls_type is None: + msg += f"All estimators in '{attr_name}' must be of type BaseEstimator." cls_type = BaseEstimator + elif isclass(cls_type) or isinstance(cls_type, tuple): + msg += ( + f"All estimators in '{attr_name}' must be of type " + f"{cls_type.__name__}." + ) else: - msg += f"All estimators must be of type {cls_type}." + raise TypeError("cls_type must be a class or tuple of classes") if ( estimators is None @@ -140,27 +179,60 @@ class that all estimators are checked to be an instance of ): raise TypeError(msg) - if not isinstance(estimators[0], (cls_type, tuple)): + def is_est_is_tuple(obj): + """Check whether obj is estimator of right type, or (str, est) tuple.""" + is_est = isinstance(obj, cls_type) + is_tuple = self._is_name_and_est(obj, cls_type) + + return is_est, is_tuple + + if not all(any(is_est_is_tuple(x)) for x in estimators): raise TypeError(msg) - if isinstance(estimators[0], cls_type): - if not all(isinstance(est, cls_type) for est in estimators): - raise TypeError(msg) - if isinstance(estimators[0], tuple): - if not all(isinstance(est, tuple) for est in estimators): - raise TypeError(msg) - if not all(isinstance(est[0], str) for est in estimators): - raise TypeError(msg) - if not all(isinstance(est[1], cls_type) for est in estimators): - raise TypeError(msg) + msg_no_mix = ( + f"elements of {attr_name} must either all be estimators, " + f"or all (str, estimator) tuples, mix of the two is not allowed" + ) + + if not allow_mix and not all(is_est_is_tuple(x)[0] for x in estimators): + if not all(is_est_is_tuple(x)[1] for x in estimators): + raise TypeError(msg_no_mix) + + return self._get_estimator_tuples(estimators, clone_ests=clone_ests) - return self._get_estimator_tuples(estimators, clone_ests=True) + def _coerce_estimator_tuple(self, obj, clone_est=False): + """Coerce estimator or (str, estimator) tuple to (str, estimator) tuple. + + Parameters + ---------- + obj : estimator or (str, estimator) tuple + assumes that this has been checked, no checks are performed + clone_est : boolean, optional. Default = False. + Whether to return clone of estimator in obj (True) or a reference (False). + + Returns + ------- + est_tuple : (str, stimator tuple) + obj if obj was (str, estimator) tuple + (obj class name, obj) if obj was estimator + """ + if isinstance(obj, tuple): + est = obj[1] + name = obj[0] + else: + est = obj + name = type(obj).__name__ + + if clone_est: + return (name, clone(est)) + else: + return (name, est) def _get_estimator_list(self, estimators): """Return list of estimators, from a list or tuple. - Arguments - --------- + Parameters + ---------- estimators : list of estimators, or list of (str, estimator tuples) Returns @@ -168,16 +240,13 @@ def _get_estimator_list(self, estimators): list of estimators - identical with estimators if list of estimators if list of (str, estimator) tuples, the str get removed """ - if isinstance(estimators[0], tuple): - return [x[1] for x in estimators] - else: - return estimators + return [self._coerce_estimator_tuple(x)[1] for x in estimators] def _get_estimator_names(self, estimators, make_unique=False): """Return names for the estimators, optionally made unique. - Arguments - --------- + Parameters + ---------- estimators : list of estimators, or list of (str, estimator tuples) make_unique : bool, optional, default=False whether names should be made unique in the return @@ -188,18 +257,7 @@ def _get_estimator_names(self, estimators, make_unique=False): names for estimators in estimators if make_unique=True, made unique using _make_strings_unique """ - if estimators is None or len(estimators) == 0: - names = [] - elif isinstance(estimators[0], tuple): - names = [x[0] for x in estimators] - elif isinstance(estimators[0], BaseEstimator): - names = [type(e).__name__ for e in estimators] - else: - raise RuntimeError( - "unreachable condition in _get_estimator_names, " - " likely input assumptions are violated," - " run _check_estimators before running _get_estimator_names" - ) + names = [self._coerce_estimator_tuple(x)[0] for x in estimators] if make_unique: names = self._make_strings_unique(names) return names @@ -207,8 +265,8 @@ def _get_estimator_names(self, estimators, make_unique=False): def _get_estimator_tuples(self, estimators, clone_ests=False): """Return list of estimator tuples, from a list or tuple. - Arguments - --------- + Parameters + ---------- estimators : list of estimators, or list of (str, estimator tuples) clone_ests : bool, whether estimators get cloned in the process diff --git a/sktime/transformations/compose.py b/sktime/transformations/compose.py index 73ea226ec8c..c5d4b46378d 100644 --- a/sktime/transformations/compose.py +++ b/sktime/transformations/compose.py @@ -171,7 +171,7 @@ def __mul__(self, other): elif isinstance(other, BaseTransformer): new_names = names + (type(other).__name__,) new_trafos = trafos + (other,) - elif self._is_name_and_trafo(other): + elif self._is_name_and_est(other, BaseTransformer): other_name = other[0] other_trafo = other[1] new_names = names + (other_name,) @@ -210,7 +210,7 @@ def __rmul__(self, other): elif isinstance(other, BaseTransformer): new_names = (type(other).__name__,) + names new_trafos = (other,) + trafos - elif self._is_name_and_trafo(other): + elif self._is_name_and_est(other, BaseTransformer): other_name = other[0] other_trafo = other[1] new_names = (other_name,) + names @@ -224,14 +224,6 @@ def __rmul__(self, other): else: return TransformerPipeline(steps=list(zip(new_names, new_trafos))) - @staticmethod - def _is_name_and_trafo(obj): - if not isinstance(obj, tuple) or len(obj) != 2: - return False - if not isinstance(obj[0], str) or not isinstance(obj[1], BaseTransformer): - return False - return True - def _fit(self, X, y=None): """Fit transformer to X and y. @@ -536,7 +528,7 @@ def __add__(self, other): elif isinstance(other, BaseTransformer): new_names = names + (type(other).__name__,) new_trafos = trafos + (other,) - elif self._is_name_and_trafo(other): + elif self._is_name_and_est(other, BaseTransformer): other_name = other[0] other_trafo = other[1] new_names = names + (other_name,) @@ -550,14 +542,6 @@ def __add__(self, other): else: return FeatureUnion(transformer_list=list(zip(new_names, new_trafos))) - @staticmethod - def _is_name_and_trafo(obj): - if not isinstance(obj, tuple) or len(obj) != 2: - return False - if not isinstance(obj[0], str) or not isinstance(obj[1], BaseTransformer): - return False - return True - def _fit(self, X, y=None): """Fit transformer to X and y. From 0f7a3c9c38ecbaae126d1b4dafa902e62b041095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 9 Apr 2022 12:49:22 +0100 Subject: [PATCH 4/5] [ENH] post-processing in `TransformedTargetForecaster`, dunder method for (transformed `y`) forecasting pipelines (#2404) This PR: * adds the capability to `TransformedTargetForecaster` to postprocess the forecasts with transformers. * adds dunder `*` feature to construct forecasting pipelines with automatic name generation, similar to classifier and transformer pipeline This allows, for instance, reconciliation of results, and quick constructions of reconcilers, e.g., `aggregate * my_forecaster * my_reconciler`. In detail makes the following changes to `TransformedTargetForecaster` and `BaseForecaster`: * in line with `ClassifierPipeline` and `TransformerPipeline`, allows list of estimators as input, and name/estimator tuples with non-unique names. In either case, unique names are automatically generated. * adds dunder method `*` to forecasters via `BaseForecaster`. Any forecaster, when multiplied with transformers, creates a non-nested `TransformedTargetForecaster`. Earlier transformers preprocess and inverse transform; later transformers postprocess. --- sktime/forecasting/base/_base.py | 62 +++++ sktime/forecasting/compose/_pipeline.py | 320 ++++++++++++++++++++---- 2 files changed, 334 insertions(+), 48 deletions(-) diff --git a/sktime/forecasting/base/_base.py b/sktime/forecasting/base/_base.py index b1dcb0649d7..1620c0b4bd1 100644 --- a/sktime/forecasting/base/_base.py +++ b/sktime/forecasting/base/_base.py @@ -108,6 +108,68 @@ def __init__(self): super(BaseForecaster, self).__init__() + def __mul__(self, other): + """Magic * method, return (right) concatenated TransformedTargetForecaster. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` transformer, must inherit from BaseTransformer + otherwise, `NotImplemented` is returned + + Returns + ------- + TransformedTargetForecaster object, + concatenation of `self` (first) with `other` (last). + not nested, contains only non-TransformerPipeline `sktime` transformers + """ + from sktime.forecasting.compose import TransformedTargetForecaster + from sktime.transformations.base import BaseTransformer + from sktime.transformations.series.adapt import TabularToSeriesAdaptor + from sktime.utils.sklearn import is_sklearn_transformer + + # we wrap self in a pipeline, and concatenate with the other + # the TransformedTargetForecaster does the rest, e.g., dispatch on other + if isinstance(other, BaseTransformer): + self_as_pipeline = TransformedTargetForecaster(steps=[self]) + return self_as_pipeline * other + elif is_sklearn_transformer(other): + return self * TabularToSeriesAdaptor(other) + else: + return NotImplemented + + def __rmul__(self, other): + """Magic * method, return (left) concatenated TransformerPipeline. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` transformer, must inherit from BaseTransformer + otherwise, `NotImplemented` is returned + + Returns + ------- + TransformedTargetForecaster object, + concatenation of `other` (first) with `self` (last). + not nested, contains only non-TransformerPipeline `sktime` steps + """ + from sktime.forecasting.compose import TransformedTargetForecaster + from sktime.transformations.base import BaseTransformer + from sktime.transformations.series.adapt import TabularToSeriesAdaptor + from sktime.utils.sklearn import is_sklearn_transformer + + # we wrap self in a pipeline, and concatenate with the other + # the TransformedTargetForecaster does the rest, e.g., dispatch on other + if isinstance(other, BaseTransformer): + self_as_pipeline = TransformedTargetForecaster(steps=[self]) + return other * self_as_pipeline + elif is_sklearn_transformer(other): + return TabularToSeriesAdaptor(other) * self + else: + return NotImplemented + def fit(self, y, X=None, fh=None): """Fit forecaster to training data. diff --git a/sktime/forecasting/compose/_pipeline.py b/sktime/forecasting/compose/_pipeline.py index 1540c493881..4222548dde9 100644 --- a/sktime/forecasting/compose/_pipeline.py +++ b/sktime/forecasting/compose/_pipeline.py @@ -10,7 +10,8 @@ from sktime.base import _HeterogenousMetaEstimator from sktime.forecasting.base._base import BaseForecaster -from sktime.transformations.base import BaseTransformer, _SeriesToSeriesTransformer +from sktime.registry import scitype +from sktime.transformations.base import _SeriesToSeriesTransformer from sktime.utils.validation.series import check_series @@ -18,45 +19,66 @@ class _Pipeline( BaseForecaster, _HeterogenousMetaEstimator, ): - def _check_steps(self): + """Abstract class for forecasting pipelines.""" + + def _get_pipeline_scitypes(self, estimators): + """Get list of scityes (str) from names/estimator list.""" + return [scitype(x[1]) for x in estimators] + + def _get_forecaster_index(self, estimators): + """Get the index of the first forecaster in the list.""" + return self._get_pipeline_scitypes(estimators).index("forecaster") + + def _check_steps(self, estimators, allow_postproc=False): """Check Steps. Parameters ---------- - self : an instance of self + estimators : list of estimators, or list of (name, estimator) pairs + allow_postproc : bool, optional, default=False + whether transformers after the forecaster are allowed Returns ------- - step : Returns step. + step : list of (name, estimator) pairs, estimators are cloned (not references) + if estimators was a list of (str, estimator) tuples, then just cloned + if was a list of estimators, then str are generated via _get_estimator_names + + Raises + ------ + TypeError if names in `estimators` are not unique + TypeError if estimators in `estimators` are not all forecaster or transformer + TypeError if there is not exactly one forecaster in `estimators` + TypeError if not allow_postproc and forecaster is not last estimator """ - names, estimators = zip(*self.steps) + estimator_tuples = self._get_estimator_tuples(estimators, clone_ests=True) + names, estimators = zip(*estimator_tuples) # validate names self._check_names(names) - # validate estimators - transformers = estimators[:-1] - forecaster = estimators[-1] - - valid_transformer_type = BaseTransformer - for transformer in transformers: - if not isinstance(transformer, valid_transformer_type): - raise TypeError( - f"All intermediate steps should be " - f"instances of {valid_transformer_type}, " - f"but transformer: {transformer} is not." - ) - - valid_forecaster_type = BaseForecaster - if not isinstance(forecaster, valid_forecaster_type): + scitypes = self._get_pipeline_scitypes(estimator_tuples) + if not set(scitypes).issubset(["forecaster", "transformer"]): raise TypeError( - f"Last step of {self.__class__.__name__} must be of type: " - f"{valid_forecaster_type}, " - f"but forecaster: {forecaster} is not." + f"estimators passed to {type(self).__name__} " + f"must be either transformer or forecaster" + ) + if scitypes.count("forecaster") != 1: + raise TypeError( + f"exactly one forecaster must be contained in the chain, " + f"but found {scitypes.count('forecaster')}" + ) + + forecaster_ind = self._get_forecaster_index(estimator_tuples) + + if not allow_postproc and forecaster_ind != len(estimators) - 1: + TypeError( + f"in {type(self).__name__}, last estimator must be a forecaster, " + f"but found a transformer" ) # Shallow copy - return list(self.steps) + return estimator_tuples def _iter_transformers(self, reverse=False): @@ -225,9 +247,8 @@ class ForecastingPipeline(_Pipeline): def __init__(self, steps): self.steps = steps - self.steps_ = self._check_steps() + self.steps_ = self._check_steps(steps, allow_postproc=False) super(ForecastingPipeline, self).__init__() - _, forecaster = self.steps[-1] tags_to_clone = [ "scitype:y", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X", # does estimator ignore the exogeneous X? @@ -237,8 +258,8 @@ def __init__(self, steps): "X-y-must-have-same-index", # can estimator handle different X/y index? "enforce_index_type", # index type that needs to be enforced in X/y ] - self.clone_tags(forecaster, tags_to_clone) - self._anytagis_then_set("fit_is_empty", False, True, steps) + self.clone_tags(self.forecaster_, tags_to_clone) + self._anytagis_then_set("fit_is_empty", False, True, self.steps_) @property def forecaster_(self): @@ -490,10 +511,64 @@ class TransformedTargetForecaster(_Pipeline, _SeriesToSeriesTransformer): X data is not transformed. If you want to transform X, please use the ForecastingPipeline. + For a list `t1`, `t2`, ..., `tN`, `f`, `tp1`, `tp2`, ..., `tpM` + where `t[i]` and `tp[i]` are transformers (`t` to pre-, `tp` to post-process), + and `f` is an sktime forecaster, + the pipeline behaves as follows: + `fit(y, X, fh)` - changes state by running `t1.fit_transform` with `X=y`, `y=X` + then `t2.fit_transform` on `X=` the output of `t1.fit_transform`, `y=X`, etc + sequentially, with `t[i]` receiving the output of `t[i-1]` as `X`, + then running `f.fit` with `y` being the output of `t[N]`, and `X=X`, + then running `tp1.fit_transform` with `X=` the output of `t[N]`, `y=X`, + then `tp2.fit_transform` on `X=` the output of `tp1.fit_transform`, etc + sequentially, with `tp[i]` receiving the output of `tp[i-1]`, + `predict(X, fh)` - result is of executing `f.predict`, with `X=X`, `fh=fh`, + then running `tp1.inverse_transform` with `X=` the output of `f`, `y=X`, + then `t2.inverse_transform` on `X=` the output of `t1.inverse_transform`, etc + sequentially, with `t[i]` receiving the output of `t[i-1]` as `X`, + then running `tp1.fit_transform` with `X=` the output of `t[N]s`, `y=X`, + then `tp2.fit_transform` on `X=` the output of `tp1.fit_transform`, etc + sequentially, with `tp[i]` receiving the output of `tp[i-1]`, + `predict_interval(X, fh)`, `predict_quantiles(X, fh)` - as `predict(X, fh)`, + with `predict_interval` or `predict_quantiles` substituted for `predict` + `predict_var`, `predict_proba` - uses base class default to obtain + crude estimates from `predict_quantiles`. + Recommended to replace with better custom implementations if needed. + + `get_params`, `set_params` uses `sklearn` compatible nesting interface + if list is unnamed, names are generated as names of classes + if names are non-unique, `f"_{str(i)}"` is appended to each name string + where `i` is the total count of occurrence of a non-unique string + inside the list of names leading up to it (inclusive) + + `TransformedTargetForecaster` can also be created by using the magic multiplication + on any forecaster, i.e., if `my_forecaster` inherits from `BaseForecaster`, + and `my_t1`, `my_t2`, `my_tp` inherit from `BaseTransformer`, + then, for instance, `my_t1 * my_t2 * my_forecaster * my_tp` + will result in the same object as obtained from the constructor + `TransformedTargetForecaster([my_t1, my_t2, my_forecaster, my_tp])` + magic multiplication can also be used with (str, transformer) pairs, + as long as one element in the chain is a transformer + Parameters ---------- - steps : list - List of tuples like ("name", forecaster/transformer) + steps : list of sktime transformers and forecasters, or + list of tuples (str, estimator) of sktime transformers or forecasters + must contain exactly one transformer + these are "blueprint" transformers, states do not change when `fit` is called + + Attributes + ---------- + steps_ : list of tuples (str, estimator) of sktime transformers or forecasters + clones of estimators in `steps` which are fitted in the pipeline + is always in (str, estimator) format, even if `steps` is just a list + strings not passed in `steps` are replaced by unique generated strings + i-th transformer in `steps_` is clone of i-th in `steps` + forecaster_ : estimator, reference to the unique forecaster in steps_ + transformers_pre_ : list of tuples (str, transformer) of sktime transformers + reference to pairs in steps_ that precede forecaster_ + transformers_ost_ : list of tuples (str, transformer) of sktime transformers + reference to pairs in steps_ that succeed forecaster_ Examples -------- @@ -502,14 +577,31 @@ class TransformedTargetForecaster(_Pipeline, _SeriesToSeriesTransformer): >>> from sktime.forecasting.compose import TransformedTargetForecaster >>> from sktime.transformations.series.impute import Imputer >>> from sktime.transformations.series.detrend import Deseasonalizer + >>> from sktime.transformations.series.exponent import ExponentTransformer >>> y = load_airline() + + Example 1: string/estimator pairs >>> pipe = TransformedTargetForecaster(steps=[ ... ("imputer", Imputer(method="mean")), ... ("detrender", Deseasonalizer()), - ... ("forecaster", NaiveForecaster(strategy="drift"))]) + ... ("forecaster", NaiveForecaster(strategy="drift")), + ... ]) >>> pipe.fit(y) TransformedTargetForecaster(...) >>> y_pred = pipe.predict(fh=[1,2,3]) + + Example 2: without strings + >>> pipe = TransformedTargetForecaster([ + ... Imputer(method="mean"), + ... Deseasonalizer(), + ... NaiveForecaster(strategy="drift"), + ... ExponentTransformer(), + ... ]) + + Example 3: using the dunder method + >>> forecaster = NaiveForecaster(strategy="drift") + >>> imputer = Imputer(method="mean") + >>> pipe = imputer * Deseasonalizer() * forecaster * ExponentTransformer() """ _required_parameters = ["steps"] @@ -525,9 +617,10 @@ class TransformedTargetForecaster(_Pipeline, _SeriesToSeriesTransformer): def __init__(self, steps): self.steps = steps - self.steps_ = self._check_steps() + self.steps_ = self._check_steps(steps, allow_postproc=True) super(TransformedTargetForecaster, self).__init__() - _, forecaster = self.steps[-1] + + # set the tags based on forecaster tags_to_clone = [ "scitype:y", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X", # does estimator ignore the exogeneous X? @@ -537,13 +630,129 @@ def __init__(self, steps): "X-y-must-have-same-index", # can estimator handle different X/y index? "enforce_index_type", # index type that needs to be enforced in X/y ] - self.clone_tags(forecaster, tags_to_clone) - self._anytagis_then_set("fit_is_empty", False, True, steps) + self.clone_tags(self.forecaster_, tags_to_clone) + self._anytagis_then_set("fit_is_empty", False, True, self.steps_) @property def forecaster_(self): - """Return reference to the forecaster in the pipeline. Valid after _fit.""" - return self.steps_[-1][1] + """Return reference to the forecaster in the pipeline. + + Returns + ------- + sktime forecaster + reference to unique forecaster in steps_ (without the name) + """ + return self.steps_[self._get_forecaster_index(self.steps_)][1] + + @property + def transformers_pre_(self): + """Return reference to the list of pre-forecast transformers. + + Returns + ------- + list of tuples (str, estimator) of sktime transformers + reference to tuples that come before the unique (str, forecaster) in steps_ + """ + return self.steps_[: self._get_forecaster_index(self.steps_)] + + @property + def transformers_post_(self): + """Return reference to the list of post-forecast transformers. + + Returns + ------- + list of tuples (str, estimator) of sktime transformers + reference to tuples that come after the unique (str, forecaster) in steps_ + """ + return self.steps_[(1 + self._get_forecaster_index(self.steps_)) :] + + def __mul__(self, other): + """Magic * method, return (right) concatenated TransformedTargetForecaster. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` transformer, must inherit from BaseTransformer + otherwise, `NotImplemented` is returned + + Returns + ------- + TransformedTargetForecaster object, + concatenation of `self` (first) with `other` (last). + not nested, contains only non-TransformerPipeline `sktime` transformers + """ + from sktime.transformations.base import BaseTransformer + from sktime.transformations.compose import TransformerPipeline + + # we don't use names but _get_estimator_names to get the *original* names + # to avoid multiple "make unique" calls which may grow strings too much + _, ests = zip(*self.steps_) + names = tuple(self._get_estimator_names(self.steps)) + if isinstance(other, TransformerPipeline): + _, trafos_o = zip(*other.steps_) + names_o = tuple(other._get_estimator_names(other.steps)) + new_names = names + names_o + new_ests = ests + trafos_o + elif isinstance(other, BaseTransformer): + new_names = names + (type(other).__name__,) + new_ests = ests + (other,) + elif self._is_name_and_est(other, BaseTransformer): + other_name = other[0] + other_trafo = other[1] + new_names = names + (other_name,) + new_ests = ests + (other_trafo,) + else: + return NotImplemented + + # if all the names are equal to class names, we eat them away + if all(type(x[1]).__name__ == x[0] for x in zip(new_names, new_ests)): + return TransformedTargetForecaster(steps=list(new_ests)) + else: + return TransformedTargetForecaster(steps=list(zip(new_names, new_ests))) + + def __rmul__(self, other): + """Magic * method, return (left) concatenated TransformerPipeline. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` transformer, must inherit from BaseTransformer + otherwise, `NotImplemented` is returned + + Returns + ------- + TransformedTargetForecaster object, + concatenation of `other` (first) with `self` (last). + not nested, contains only non-TransformerPipeline `sktime` steps + """ + from sktime.transformations.base import BaseTransformer + from sktime.transformations.compose import TransformerPipeline + + _, ests = zip(*self.steps_) + names = tuple(self._get_estimator_names(self.steps)) + if isinstance(other, TransformerPipeline): + _, trafos_o = zip(*other.steps_) + names_o = tuple(other._get_estimator_names(other.steps)) + new_names = names_o + names + new_ests = trafos_o + ests + elif isinstance(other, BaseTransformer): + new_names = (type(other).__name__,) + names + new_ests = (other,) + ests + elif self._is_name_and_est(other, BaseTransformer): + other_name = other[0] + other_trafo = other[1] + new_names = (other_name,) + names + new_ests = (other_trafo,) + ests + else: + return NotImplemented + + # if all the names are equal to class names, we eat them away + if all(type(x[1]).__name__ == x[0] for x in zip(new_names, new_ests)): + return TransformedTargetForecaster(steps=list(new_ests)) + else: + return TransformedTargetForecaster(steps=list(zip(new_names, new_ests))) def _fit(self, y, X=None, fh=None): """Fit to training data. @@ -561,17 +770,20 @@ def _fit(self, y, X=None, fh=None): ------- self : returns an instance of self. """ - # transform - for step_idx, name, transformer in self._iter_transformers(): - t = clone(transformer) + self.steps_ = self._get_estimator_tuples(self.steps, clone_ests=True) + + # transform pre + for _, t in self.transformers_pre_: y = t.fit_transform(X=y, y=X) - self.steps_[step_idx] = (name, t) # fit forecaster - name, forecaster = self.steps[-1] - f = clone(forecaster) + f = self.forecaster_ f.fit(y=y, X=X, fh=fh) - self.steps_[-1] = (name, f) + + # transform post + for _, t in self.transformers_post_: + y = t.fit_transform(X=y, y=X) + return self def _predict(self, fh=None, X=None): @@ -592,6 +804,11 @@ def _predict(self, fh=None, X=None): y_pred = self.forecaster_.predict(fh=fh, X=X) # inverse transform y_pred y_pred = self._get_inverse_transform(y_pred, X) + + # transform post + for _, t in self.transformers_post_: + y_pred = t.transform(X=y_pred, y=X) + return y_pred def _update(self, y, X=None, update_params=True): @@ -607,12 +824,19 @@ def _update(self, y, X=None, update_params=True): ------- self : an instance of self """ - for _, _, transformer in self._iter_transformers(): - if hasattr(transformer, "update"): - transformer.update(X=y, y=X, update_params=update_params) - y = transformer.transform(X=y, y=X) + # transform pre + for _, t in self.transformers_pre_: + if hasattr(t, "update"): + t.update(X=y, y=X, update_params=update_params) + y = t.transform(X=y, y=X) self.forecaster_.update(y=y, X=X, update_params=update_params) + + # transform post + for _, t in self.transformers_post_: + t.update(X=y, y=X, update_params=update_params) + y = t.transform(X=y, y=X) + return self def transform(self, Z, X=None): From 84f79557fc4761a5be092c776f3f94133e5bd409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 9 Apr 2022 12:50:52 +0100 Subject: [PATCH 5/5] [ENH] tests for `check_estimator` tests passing (#2408) This PR adds tests to test that tests conducted by the `check_estimator` utlity are passing, on three example estimators that are known to pass all tests in the full test suite. This ensures that future tests catch unintentional side effects to changes in the test module that cause a `check_estimator` test to fail, such as the problem addressed recently in https://github.com/alan-turing-institute/sktime/pull/2405 Also turns some instances of `pytest.skip` inside tests into `return None`, as that currently breaks `check_estimator`. An issue to replace this workaroudn has been opened here: https://github.com/alan-turing-institute/sktime/issues/2419 --- .../model_selection/tests/test_split.py | 10 +++--- .../forecasting/tests/test_all_forecasters.py | 30 ++++++++++++---- sktime/utils/tests/test_check_estimator.py | 35 +++++++++++++++++++ 3 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 sktime/utils/tests/test_check_estimator.py diff --git a/sktime/forecasting/model_selection/tests/test_split.py b/sktime/forecasting/model_selection/tests/test_split.py index 73fd25834c5..d6cd26c35cd 100644 --- a/sktime/forecasting/model_selection/tests/test_split.py +++ b/sktime/forecasting/model_selection/tests/test_split.py @@ -451,10 +451,12 @@ def test_window_splitter_in_sample_fh_greater_than_window_length(CV): def test_split_by_fh(index_type, fh_type, is_relative, values): """Test temporal_train_test_split.""" if fh_type == "timedelta": - pytest.skip( - "ForecastingHorizon with timedelta values " - "is currently experimental and not supported everywhere" - ) + return None + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) y = _make_series(20, index_type=index_type) cutoff = y.index[10] fh = _make_fh(cutoff, values, fh_type, is_relative) diff --git a/sktime/forecasting/tests/test_all_forecasters.py b/sktime/forecasting/tests/test_all_forecasters.py index c6998a85917..d210798b4b2 100644 --- a/sktime/forecasting/tests/test_all_forecasters.py +++ b/sktime/forecasting/tests/test_all_forecasters.py @@ -209,8 +209,12 @@ def test_predict_time_index( """Check that predicted time index matches forecasting horizon.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": - pytest.skip(pytest_skip_msg) - + return None + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) y_train = _make_series( n_columns=n_columns, index_type=index_type, n_timepoints=50 ) @@ -235,7 +239,11 @@ def test_predict_residuals( index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": return None - + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) y_train = _make_series( n_columns=n_columns, index_type=index_type, n_timepoints=50 ) @@ -266,8 +274,12 @@ def test_predict_time_index_with_X( """Check that predicted time index matches forecasting horizon.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": - pytest.skip(pytest_skip_msg) - + return None + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) z, X = make_forecasting_problem(index_type=index_type, make_X=True) # Some estimators may not support all time index types and fh types, hence we @@ -294,8 +306,12 @@ def test_predict_time_index_in_sample_full( """Check that predicted time index equals fh for full in-sample predictions.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": - pytest.skip(pytest_skip_msg) - + return None + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) y_train = _make_series(n_columns=n_columns, index_type=index_type) cutoff = y_train.index[-1] steps = -np.arange(len(y_train)) diff --git a/sktime/utils/tests/test_check_estimator.py b/sktime/utils/tests/test_check_estimator.py new file mode 100644 index 00000000000..4527fcfc399 --- /dev/null +++ b/sktime/utils/tests/test_check_estimator.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +"""Tests for check_estimator.""" + +__author__ = ["fkiraly"] + +import pytest + +from sktime.classification.feature_based import Catch22Classifier +from sktime.forecasting.naive import NaiveForecaster +from sktime.transformations.series.exponent import ExponentTransformer +from sktime.utils.estimator_checks import check_estimator + +EXAMPLE_CLASSES = [Catch22Classifier, NaiveForecaster, ExponentTransformer] + + +@pytest.mark.parametrize("estimator_class", EXAMPLE_CLASSES) +def test_check_estimator_passed(estimator_class): + """Test that check_estimator returns only passed tests for examples we know pass.""" + estimator_instance = estimator_class.create_test_instance() + + result_class = check_estimator(estimator_class, verbose=False) + assert all(x == "PASSED" for x in result_class.values()) + + result_instance = check_estimator(estimator_instance, verbose=False) + assert all(x == "PASSED" for x in result_instance.values()) + + +@pytest.mark.parametrize("estimator_class", EXAMPLE_CLASSES) +def test_check_estimator_does_not_raise(estimator_class): + """Test that check_estimator does not raise exceptions on examples we know pass.""" + estimator_instance = estimator_class.create_test_instance() + + check_estimator(estimator_class, return_exceptions=False, verbose=False) + + check_estimator(estimator_instance, return_exceptions=False, verbose=False)