diff --git a/sktime/base/_meta.py b/sktime/base/_meta.py index 51dce7547d4..6a0b0ed5575 100644 --- a/sktime/base/_meta.py +++ b/sktime/base/_meta.py @@ -7,6 +7,7 @@ __all__ = ["_HeterogenousMetaEstimator"] from abc import ABCMeta +from inspect import isclass from sklearn import clone @@ -100,7 +101,35 @@ def _subset_dict_keys(self, dict_to_subset, keys): subsetted_dict = dict((k, dict_to_subset[k]) for k in keys_in_both) return subsetted_dict - def _check_estimators(self, estimators, attr_name="steps", cls_type=None): + @staticmethod + def _is_name_and_est(obj, cls_type=None): + """Check whether obj is a tuple of type (str, cls_type). + + Parameters + ---------- + cls_type : class or tuple of class, optional. Default = BaseEstimator. + class(es) that all estimators are checked to be an instance of + + Returns + ------- + bool : True if obj is (str, cls_type) tuple, False otherise + """ + if cls_type is None: + cls_type = BaseEstimator + if not isinstance(obj, tuple) or len(obj) != 2: + return False + if not isinstance(obj[0], str) or not isinstance(obj[1], cls_type): + return False + return True + + def _check_estimators( + self, + estimators, + attr_name="steps", + cls_type=None, + allow_mix=True, + clone_ests=True, + ): """Check that estimators is a list of estimators or list of str/est tuples. Parameters @@ -110,8 +139,12 @@ def _check_estimators(self, estimators, attr_name="steps", cls_type=None): estimators should inherit from cls_type class attr_name : str, optional. Default = "steps" Name of checked attribute in error messages - cls_type : class, optional. Default = BaseEstimator. - class that all estimators are checked to be an instance of + cls_type : class or tuple of class, optional. Default = BaseEstimator. + class(es) that all estimators are checked to be an instance of + allow_mix : boolean, optional. Default = True. + whether mix of estimator and (str, estimator) is allowed in `estimators` + clone_ests : boolean, optional. Default = True. + whether estimators in return are cloned (True) or references (False). Returns ------- @@ -129,9 +162,15 @@ class that all estimators are checked to be an instance of " of estimators, or a list of (string, estimator) tuples. " ) if cls_type is None: + msg += f"All estimators in '{attr_name}' must be of type BaseEstimator." cls_type = BaseEstimator + elif isclass(cls_type) or isinstance(cls_type, tuple): + msg += ( + f"All estimators in '{attr_name}' must be of type " + f"{cls_type.__name__}." + ) else: - msg += f"All estimators must be of type {cls_type}." + raise TypeError("cls_type must be a class or tuple of classes") if ( estimators is None @@ -140,27 +179,60 @@ class that all estimators are checked to be an instance of ): raise TypeError(msg) - if not isinstance(estimators[0], (cls_type, tuple)): + def is_est_is_tuple(obj): + """Check whether obj is estimator of right type, or (str, est) tuple.""" + is_est = isinstance(obj, cls_type) + is_tuple = self._is_name_and_est(obj, cls_type) + + return is_est, is_tuple + + if not all(any(is_est_is_tuple(x)) for x in estimators): raise TypeError(msg) - if isinstance(estimators[0], cls_type): - if not all(isinstance(est, cls_type) for est in estimators): - raise TypeError(msg) - if isinstance(estimators[0], tuple): - if not all(isinstance(est, tuple) for est in estimators): - raise TypeError(msg) - if not all(isinstance(est[0], str) for est in estimators): - raise TypeError(msg) - if not all(isinstance(est[1], cls_type) for est in estimators): - raise TypeError(msg) + msg_no_mix = ( + f"elements of {attr_name} must either all be estimators, " + f"or all (str, estimator) tuples, mix of the two is not allowed" + ) + + if not allow_mix and not all(is_est_is_tuple(x)[0] for x in estimators): + if not all(is_est_is_tuple(x)[1] for x in estimators): + raise TypeError(msg_no_mix) + + return self._get_estimator_tuples(estimators, clone_ests=clone_ests) - return self._get_estimator_tuples(estimators, clone_ests=True) + def _coerce_estimator_tuple(self, obj, clone_est=False): + """Coerce estimator or (str, estimator) tuple to (str, estimator) tuple. + + Parameters + ---------- + obj : estimator or (str, estimator) tuple + assumes that this has been checked, no checks are performed + clone_est : boolean, optional. Default = False. + Whether to return clone of estimator in obj (True) or a reference (False). + + Returns + ------- + est_tuple : (str, stimator tuple) + obj if obj was (str, estimator) tuple + (obj class name, obj) if obj was estimator + """ + if isinstance(obj, tuple): + est = obj[1] + name = obj[0] + else: + est = obj + name = type(obj).__name__ + + if clone_est: + return (name, clone(est)) + else: + return (name, est) def _get_estimator_list(self, estimators): """Return list of estimators, from a list or tuple. - Arguments - --------- + Parameters + ---------- estimators : list of estimators, or list of (str, estimator tuples) Returns @@ -168,16 +240,13 @@ def _get_estimator_list(self, estimators): list of estimators - identical with estimators if list of estimators if list of (str, estimator) tuples, the str get removed """ - if isinstance(estimators[0], tuple): - return [x[1] for x in estimators] - else: - return estimators + return [self._coerce_estimator_tuple(x)[1] for x in estimators] def _get_estimator_names(self, estimators, make_unique=False): """Return names for the estimators, optionally made unique. - Arguments - --------- + Parameters + ---------- estimators : list of estimators, or list of (str, estimator tuples) make_unique : bool, optional, default=False whether names should be made unique in the return @@ -188,18 +257,7 @@ def _get_estimator_names(self, estimators, make_unique=False): names for estimators in estimators if make_unique=True, made unique using _make_strings_unique """ - if estimators is None or len(estimators) == 0: - names = [] - elif isinstance(estimators[0], tuple): - names = [x[0] for x in estimators] - elif isinstance(estimators[0], BaseEstimator): - names = [type(e).__name__ for e in estimators] - else: - raise RuntimeError( - "unreachable condition in _get_estimator_names, " - " likely input assumptions are violated," - " run _check_estimators before running _get_estimator_names" - ) + names = [self._coerce_estimator_tuple(x)[0] for x in estimators] if make_unique: names = self._make_strings_unique(names) return names @@ -207,8 +265,8 @@ def _get_estimator_names(self, estimators, make_unique=False): def _get_estimator_tuples(self, estimators, clone_ests=False): """Return list of estimator tuples, from a list or tuple. - Arguments - --------- + Parameters + ---------- estimators : list of estimators, or list of (str, estimator tuples) clone_ests : bool, whether estimators get cloned in the process diff --git a/sktime/forecasting/base/_base.py b/sktime/forecasting/base/_base.py index b1dcb0649d7..1620c0b4bd1 100644 --- a/sktime/forecasting/base/_base.py +++ b/sktime/forecasting/base/_base.py @@ -108,6 +108,68 @@ def __init__(self): super(BaseForecaster, self).__init__() + def __mul__(self, other): + """Magic * method, return (right) concatenated TransformedTargetForecaster. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` transformer, must inherit from BaseTransformer + otherwise, `NotImplemented` is returned + + Returns + ------- + TransformedTargetForecaster object, + concatenation of `self` (first) with `other` (last). + not nested, contains only non-TransformerPipeline `sktime` transformers + """ + from sktime.forecasting.compose import TransformedTargetForecaster + from sktime.transformations.base import BaseTransformer + from sktime.transformations.series.adapt import TabularToSeriesAdaptor + from sktime.utils.sklearn import is_sklearn_transformer + + # we wrap self in a pipeline, and concatenate with the other + # the TransformedTargetForecaster does the rest, e.g., dispatch on other + if isinstance(other, BaseTransformer): + self_as_pipeline = TransformedTargetForecaster(steps=[self]) + return self_as_pipeline * other + elif is_sklearn_transformer(other): + return self * TabularToSeriesAdaptor(other) + else: + return NotImplemented + + def __rmul__(self, other): + """Magic * method, return (left) concatenated TransformerPipeline. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` transformer, must inherit from BaseTransformer + otherwise, `NotImplemented` is returned + + Returns + ------- + TransformedTargetForecaster object, + concatenation of `other` (first) with `self` (last). + not nested, contains only non-TransformerPipeline `sktime` steps + """ + from sktime.forecasting.compose import TransformedTargetForecaster + from sktime.transformations.base import BaseTransformer + from sktime.transformations.series.adapt import TabularToSeriesAdaptor + from sktime.utils.sklearn import is_sklearn_transformer + + # we wrap self in a pipeline, and concatenate with the other + # the TransformedTargetForecaster does the rest, e.g., dispatch on other + if isinstance(other, BaseTransformer): + self_as_pipeline = TransformedTargetForecaster(steps=[self]) + return other * self_as_pipeline + elif is_sklearn_transformer(other): + return TabularToSeriesAdaptor(other) * self + else: + return NotImplemented + def fit(self, y, X=None, fh=None): """Fit forecaster to training data. diff --git a/sktime/forecasting/compose/_pipeline.py b/sktime/forecasting/compose/_pipeline.py index 1540c493881..4222548dde9 100644 --- a/sktime/forecasting/compose/_pipeline.py +++ b/sktime/forecasting/compose/_pipeline.py @@ -10,7 +10,8 @@ from sktime.base import _HeterogenousMetaEstimator from sktime.forecasting.base._base import BaseForecaster -from sktime.transformations.base import BaseTransformer, _SeriesToSeriesTransformer +from sktime.registry import scitype +from sktime.transformations.base import _SeriesToSeriesTransformer from sktime.utils.validation.series import check_series @@ -18,45 +19,66 @@ class _Pipeline( BaseForecaster, _HeterogenousMetaEstimator, ): - def _check_steps(self): + """Abstract class for forecasting pipelines.""" + + def _get_pipeline_scitypes(self, estimators): + """Get list of scityes (str) from names/estimator list.""" + return [scitype(x[1]) for x in estimators] + + def _get_forecaster_index(self, estimators): + """Get the index of the first forecaster in the list.""" + return self._get_pipeline_scitypes(estimators).index("forecaster") + + def _check_steps(self, estimators, allow_postproc=False): """Check Steps. Parameters ---------- - self : an instance of self + estimators : list of estimators, or list of (name, estimator) pairs + allow_postproc : bool, optional, default=False + whether transformers after the forecaster are allowed Returns ------- - step : Returns step. + step : list of (name, estimator) pairs, estimators are cloned (not references) + if estimators was a list of (str, estimator) tuples, then just cloned + if was a list of estimators, then str are generated via _get_estimator_names + + Raises + ------ + TypeError if names in `estimators` are not unique + TypeError if estimators in `estimators` are not all forecaster or transformer + TypeError if there is not exactly one forecaster in `estimators` + TypeError if not allow_postproc and forecaster is not last estimator """ - names, estimators = zip(*self.steps) + estimator_tuples = self._get_estimator_tuples(estimators, clone_ests=True) + names, estimators = zip(*estimator_tuples) # validate names self._check_names(names) - # validate estimators - transformers = estimators[:-1] - forecaster = estimators[-1] - - valid_transformer_type = BaseTransformer - for transformer in transformers: - if not isinstance(transformer, valid_transformer_type): - raise TypeError( - f"All intermediate steps should be " - f"instances of {valid_transformer_type}, " - f"but transformer: {transformer} is not." - ) - - valid_forecaster_type = BaseForecaster - if not isinstance(forecaster, valid_forecaster_type): + scitypes = self._get_pipeline_scitypes(estimator_tuples) + if not set(scitypes).issubset(["forecaster", "transformer"]): raise TypeError( - f"Last step of {self.__class__.__name__} must be of type: " - f"{valid_forecaster_type}, " - f"but forecaster: {forecaster} is not." + f"estimators passed to {type(self).__name__} " + f"must be either transformer or forecaster" + ) + if scitypes.count("forecaster") != 1: + raise TypeError( + f"exactly one forecaster must be contained in the chain, " + f"but found {scitypes.count('forecaster')}" + ) + + forecaster_ind = self._get_forecaster_index(estimator_tuples) + + if not allow_postproc and forecaster_ind != len(estimators) - 1: + TypeError( + f"in {type(self).__name__}, last estimator must be a forecaster, " + f"but found a transformer" ) # Shallow copy - return list(self.steps) + return estimator_tuples def _iter_transformers(self, reverse=False): @@ -225,9 +247,8 @@ class ForecastingPipeline(_Pipeline): def __init__(self, steps): self.steps = steps - self.steps_ = self._check_steps() + self.steps_ = self._check_steps(steps, allow_postproc=False) super(ForecastingPipeline, self).__init__() - _, forecaster = self.steps[-1] tags_to_clone = [ "scitype:y", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X", # does estimator ignore the exogeneous X? @@ -237,8 +258,8 @@ def __init__(self, steps): "X-y-must-have-same-index", # can estimator handle different X/y index? "enforce_index_type", # index type that needs to be enforced in X/y ] - self.clone_tags(forecaster, tags_to_clone) - self._anytagis_then_set("fit_is_empty", False, True, steps) + self.clone_tags(self.forecaster_, tags_to_clone) + self._anytagis_then_set("fit_is_empty", False, True, self.steps_) @property def forecaster_(self): @@ -490,10 +511,64 @@ class TransformedTargetForecaster(_Pipeline, _SeriesToSeriesTransformer): X data is not transformed. If you want to transform X, please use the ForecastingPipeline. + For a list `t1`, `t2`, ..., `tN`, `f`, `tp1`, `tp2`, ..., `tpM` + where `t[i]` and `tp[i]` are transformers (`t` to pre-, `tp` to post-process), + and `f` is an sktime forecaster, + the pipeline behaves as follows: + `fit(y, X, fh)` - changes state by running `t1.fit_transform` with `X=y`, `y=X` + then `t2.fit_transform` on `X=` the output of `t1.fit_transform`, `y=X`, etc + sequentially, with `t[i]` receiving the output of `t[i-1]` as `X`, + then running `f.fit` with `y` being the output of `t[N]`, and `X=X`, + then running `tp1.fit_transform` with `X=` the output of `t[N]`, `y=X`, + then `tp2.fit_transform` on `X=` the output of `tp1.fit_transform`, etc + sequentially, with `tp[i]` receiving the output of `tp[i-1]`, + `predict(X, fh)` - result is of executing `f.predict`, with `X=X`, `fh=fh`, + then running `tp1.inverse_transform` with `X=` the output of `f`, `y=X`, + then `t2.inverse_transform` on `X=` the output of `t1.inverse_transform`, etc + sequentially, with `t[i]` receiving the output of `t[i-1]` as `X`, + then running `tp1.fit_transform` with `X=` the output of `t[N]s`, `y=X`, + then `tp2.fit_transform` on `X=` the output of `tp1.fit_transform`, etc + sequentially, with `tp[i]` receiving the output of `tp[i-1]`, + `predict_interval(X, fh)`, `predict_quantiles(X, fh)` - as `predict(X, fh)`, + with `predict_interval` or `predict_quantiles` substituted for `predict` + `predict_var`, `predict_proba` - uses base class default to obtain + crude estimates from `predict_quantiles`. + Recommended to replace with better custom implementations if needed. + + `get_params`, `set_params` uses `sklearn` compatible nesting interface + if list is unnamed, names are generated as names of classes + if names are non-unique, `f"_{str(i)}"` is appended to each name string + where `i` is the total count of occurrence of a non-unique string + inside the list of names leading up to it (inclusive) + + `TransformedTargetForecaster` can also be created by using the magic multiplication + on any forecaster, i.e., if `my_forecaster` inherits from `BaseForecaster`, + and `my_t1`, `my_t2`, `my_tp` inherit from `BaseTransformer`, + then, for instance, `my_t1 * my_t2 * my_forecaster * my_tp` + will result in the same object as obtained from the constructor + `TransformedTargetForecaster([my_t1, my_t2, my_forecaster, my_tp])` + magic multiplication can also be used with (str, transformer) pairs, + as long as one element in the chain is a transformer + Parameters ---------- - steps : list - List of tuples like ("name", forecaster/transformer) + steps : list of sktime transformers and forecasters, or + list of tuples (str, estimator) of sktime transformers or forecasters + must contain exactly one transformer + these are "blueprint" transformers, states do not change when `fit` is called + + Attributes + ---------- + steps_ : list of tuples (str, estimator) of sktime transformers or forecasters + clones of estimators in `steps` which are fitted in the pipeline + is always in (str, estimator) format, even if `steps` is just a list + strings not passed in `steps` are replaced by unique generated strings + i-th transformer in `steps_` is clone of i-th in `steps` + forecaster_ : estimator, reference to the unique forecaster in steps_ + transformers_pre_ : list of tuples (str, transformer) of sktime transformers + reference to pairs in steps_ that precede forecaster_ + transformers_ost_ : list of tuples (str, transformer) of sktime transformers + reference to pairs in steps_ that succeed forecaster_ Examples -------- @@ -502,14 +577,31 @@ class TransformedTargetForecaster(_Pipeline, _SeriesToSeriesTransformer): >>> from sktime.forecasting.compose import TransformedTargetForecaster >>> from sktime.transformations.series.impute import Imputer >>> from sktime.transformations.series.detrend import Deseasonalizer + >>> from sktime.transformations.series.exponent import ExponentTransformer >>> y = load_airline() + + Example 1: string/estimator pairs >>> pipe = TransformedTargetForecaster(steps=[ ... ("imputer", Imputer(method="mean")), ... ("detrender", Deseasonalizer()), - ... ("forecaster", NaiveForecaster(strategy="drift"))]) + ... ("forecaster", NaiveForecaster(strategy="drift")), + ... ]) >>> pipe.fit(y) TransformedTargetForecaster(...) >>> y_pred = pipe.predict(fh=[1,2,3]) + + Example 2: without strings + >>> pipe = TransformedTargetForecaster([ + ... Imputer(method="mean"), + ... Deseasonalizer(), + ... NaiveForecaster(strategy="drift"), + ... ExponentTransformer(), + ... ]) + + Example 3: using the dunder method + >>> forecaster = NaiveForecaster(strategy="drift") + >>> imputer = Imputer(method="mean") + >>> pipe = imputer * Deseasonalizer() * forecaster * ExponentTransformer() """ _required_parameters = ["steps"] @@ -525,9 +617,10 @@ class TransformedTargetForecaster(_Pipeline, _SeriesToSeriesTransformer): def __init__(self, steps): self.steps = steps - self.steps_ = self._check_steps() + self.steps_ = self._check_steps(steps, allow_postproc=True) super(TransformedTargetForecaster, self).__init__() - _, forecaster = self.steps[-1] + + # set the tags based on forecaster tags_to_clone = [ "scitype:y", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X", # does estimator ignore the exogeneous X? @@ -537,13 +630,129 @@ def __init__(self, steps): "X-y-must-have-same-index", # can estimator handle different X/y index? "enforce_index_type", # index type that needs to be enforced in X/y ] - self.clone_tags(forecaster, tags_to_clone) - self._anytagis_then_set("fit_is_empty", False, True, steps) + self.clone_tags(self.forecaster_, tags_to_clone) + self._anytagis_then_set("fit_is_empty", False, True, self.steps_) @property def forecaster_(self): - """Return reference to the forecaster in the pipeline. Valid after _fit.""" - return self.steps_[-1][1] + """Return reference to the forecaster in the pipeline. + + Returns + ------- + sktime forecaster + reference to unique forecaster in steps_ (without the name) + """ + return self.steps_[self._get_forecaster_index(self.steps_)][1] + + @property + def transformers_pre_(self): + """Return reference to the list of pre-forecast transformers. + + Returns + ------- + list of tuples (str, estimator) of sktime transformers + reference to tuples that come before the unique (str, forecaster) in steps_ + """ + return self.steps_[: self._get_forecaster_index(self.steps_)] + + @property + def transformers_post_(self): + """Return reference to the list of post-forecast transformers. + + Returns + ------- + list of tuples (str, estimator) of sktime transformers + reference to tuples that come after the unique (str, forecaster) in steps_ + """ + return self.steps_[(1 + self._get_forecaster_index(self.steps_)) :] + + def __mul__(self, other): + """Magic * method, return (right) concatenated TransformedTargetForecaster. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` transformer, must inherit from BaseTransformer + otherwise, `NotImplemented` is returned + + Returns + ------- + TransformedTargetForecaster object, + concatenation of `self` (first) with `other` (last). + not nested, contains only non-TransformerPipeline `sktime` transformers + """ + from sktime.transformations.base import BaseTransformer + from sktime.transformations.compose import TransformerPipeline + + # we don't use names but _get_estimator_names to get the *original* names + # to avoid multiple "make unique" calls which may grow strings too much + _, ests = zip(*self.steps_) + names = tuple(self._get_estimator_names(self.steps)) + if isinstance(other, TransformerPipeline): + _, trafos_o = zip(*other.steps_) + names_o = tuple(other._get_estimator_names(other.steps)) + new_names = names + names_o + new_ests = ests + trafos_o + elif isinstance(other, BaseTransformer): + new_names = names + (type(other).__name__,) + new_ests = ests + (other,) + elif self._is_name_and_est(other, BaseTransformer): + other_name = other[0] + other_trafo = other[1] + new_names = names + (other_name,) + new_ests = ests + (other_trafo,) + else: + return NotImplemented + + # if all the names are equal to class names, we eat them away + if all(type(x[1]).__name__ == x[0] for x in zip(new_names, new_ests)): + return TransformedTargetForecaster(steps=list(new_ests)) + else: + return TransformedTargetForecaster(steps=list(zip(new_names, new_ests))) + + def __rmul__(self, other): + """Magic * method, return (left) concatenated TransformerPipeline. + + Implemented for `other` being a transformer, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` transformer, must inherit from BaseTransformer + otherwise, `NotImplemented` is returned + + Returns + ------- + TransformedTargetForecaster object, + concatenation of `other` (first) with `self` (last). + not nested, contains only non-TransformerPipeline `sktime` steps + """ + from sktime.transformations.base import BaseTransformer + from sktime.transformations.compose import TransformerPipeline + + _, ests = zip(*self.steps_) + names = tuple(self._get_estimator_names(self.steps)) + if isinstance(other, TransformerPipeline): + _, trafos_o = zip(*other.steps_) + names_o = tuple(other._get_estimator_names(other.steps)) + new_names = names_o + names + new_ests = trafos_o + ests + elif isinstance(other, BaseTransformer): + new_names = (type(other).__name__,) + names + new_ests = (other,) + ests + elif self._is_name_and_est(other, BaseTransformer): + other_name = other[0] + other_trafo = other[1] + new_names = (other_name,) + names + new_ests = (other_trafo,) + ests + else: + return NotImplemented + + # if all the names are equal to class names, we eat them away + if all(type(x[1]).__name__ == x[0] for x in zip(new_names, new_ests)): + return TransformedTargetForecaster(steps=list(new_ests)) + else: + return TransformedTargetForecaster(steps=list(zip(new_names, new_ests))) def _fit(self, y, X=None, fh=None): """Fit to training data. @@ -561,17 +770,20 @@ def _fit(self, y, X=None, fh=None): ------- self : returns an instance of self. """ - # transform - for step_idx, name, transformer in self._iter_transformers(): - t = clone(transformer) + self.steps_ = self._get_estimator_tuples(self.steps, clone_ests=True) + + # transform pre + for _, t in self.transformers_pre_: y = t.fit_transform(X=y, y=X) - self.steps_[step_idx] = (name, t) # fit forecaster - name, forecaster = self.steps[-1] - f = clone(forecaster) + f = self.forecaster_ f.fit(y=y, X=X, fh=fh) - self.steps_[-1] = (name, f) + + # transform post + for _, t in self.transformers_post_: + y = t.fit_transform(X=y, y=X) + return self def _predict(self, fh=None, X=None): @@ -592,6 +804,11 @@ def _predict(self, fh=None, X=None): y_pred = self.forecaster_.predict(fh=fh, X=X) # inverse transform y_pred y_pred = self._get_inverse_transform(y_pred, X) + + # transform post + for _, t in self.transformers_post_: + y_pred = t.transform(X=y_pred, y=X) + return y_pred def _update(self, y, X=None, update_params=True): @@ -607,12 +824,19 @@ def _update(self, y, X=None, update_params=True): ------- self : an instance of self """ - for _, _, transformer in self._iter_transformers(): - if hasattr(transformer, "update"): - transformer.update(X=y, y=X, update_params=update_params) - y = transformer.transform(X=y, y=X) + # transform pre + for _, t in self.transformers_pre_: + if hasattr(t, "update"): + t.update(X=y, y=X, update_params=update_params) + y = t.transform(X=y, y=X) self.forecaster_.update(y=y, X=X, update_params=update_params) + + # transform post + for _, t in self.transformers_post_: + t.update(X=y, y=X, update_params=update_params) + y = t.transform(X=y, y=X) + return self def transform(self, Z, X=None): diff --git a/sktime/forecasting/model_selection/tests/test_split.py b/sktime/forecasting/model_selection/tests/test_split.py index 73fd25834c5..d6cd26c35cd 100644 --- a/sktime/forecasting/model_selection/tests/test_split.py +++ b/sktime/forecasting/model_selection/tests/test_split.py @@ -451,10 +451,12 @@ def test_window_splitter_in_sample_fh_greater_than_window_length(CV): def test_split_by_fh(index_type, fh_type, is_relative, values): """Test temporal_train_test_split.""" if fh_type == "timedelta": - pytest.skip( - "ForecastingHorizon with timedelta values " - "is currently experimental and not supported everywhere" - ) + return None + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) y = _make_series(20, index_type=index_type) cutoff = y.index[10] fh = _make_fh(cutoff, values, fh_type, is_relative) diff --git a/sktime/forecasting/naive.py b/sktime/forecasting/naive.py index b81b1101959..a5bae3a1b9e 100644 --- a/sktime/forecasting/naive.py +++ b/sktime/forecasting/naive.py @@ -417,10 +417,17 @@ class NaiveVariance(BaseForecaster): - And for the covariance matrix prediction, the formula becomes :math:`Cov(y_k, y_l)=\frac{\sum_{i=1}^N \hat{r}_{k,k+i}*\hat{r}_{l,l+i}}{N}`. + The resulting forecaster will implement + `predict_interval`, `predict_quantiles`, `predict_var`, and `predict_proba`, + even if the wrapped forecaster `forecaster` did not have this capability; + for point forecasts (`predict`), behaves like the wrapped forecaster. + Parameters ---------- forecaster : estimator - Estimators to apply to the input series. + Estimator to which probabilistic forecasts are being added + verbose : bool, optional, default=False + whether to print warnings if windows with too few data points occur Examples -------- @@ -446,9 +453,10 @@ class NaiveVariance(BaseForecaster): # deprecated and likely to be removed in 0.12.0 } - def __init__(self, forecaster): + def __init__(self, forecaster, verbose=False): self.forecaster = forecaster + self.verbose = verbose super(NaiveVariance, self).__init__() tags_to_clone = [ @@ -502,7 +510,7 @@ def _predict_quantiles(self, fh, X=None, alpha=0.5): pred_var = self.predict_var(fh, X) z_scores = norm.ppf(alpha) - errors = [pred_var ** 0.5 * z for z in z_scores] + errors = [pred_var**0.5 * z for z in z_scores] index = pd.MultiIndex.from_product([["Quantiles"], alpha]) pred_quantiles = pd.DataFrame(columns=index) @@ -545,9 +553,11 @@ def _predict_var(self, fh, X=None, cov=False): try: forecaster.fit(subset) except ValueError: - warn( - f"Couldn't fit the model on time series of length {len(subset)}.\n" - ) + if self.verbose: + warn( + f"Couldn't fit the model on " + f"time series window length {len(subset)}.\n" + ) continue y_true = self._y[id:] # subset on which we predict diff --git a/sktime/forecasting/tests/test_all_forecasters.py b/sktime/forecasting/tests/test_all_forecasters.py index c6998a85917..d210798b4b2 100644 --- a/sktime/forecasting/tests/test_all_forecasters.py +++ b/sktime/forecasting/tests/test_all_forecasters.py @@ -209,8 +209,12 @@ def test_predict_time_index( """Check that predicted time index matches forecasting horizon.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": - pytest.skip(pytest_skip_msg) - + return None + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) y_train = _make_series( n_columns=n_columns, index_type=index_type, n_timepoints=50 ) @@ -235,7 +239,11 @@ def test_predict_residuals( index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": return None - + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) y_train = _make_series( n_columns=n_columns, index_type=index_type, n_timepoints=50 ) @@ -266,8 +274,12 @@ def test_predict_time_index_with_X( """Check that predicted time index matches forecasting horizon.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": - pytest.skip(pytest_skip_msg) - + return None + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) z, X = make_forecasting_problem(index_type=index_type, make_X=True) # Some estimators may not support all time index types and fh types, hence we @@ -294,8 +306,12 @@ def test_predict_time_index_in_sample_full( """Check that predicted time index equals fh for full in-sample predictions.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": - pytest.skip(pytest_skip_msg) - + return None + # todo: ensure check_estimator works with pytest.skip like below + # pytest.skip( + # "ForecastingHorizon with timedelta values " + # "is currently experimental and not supported everywhere" + # ) y_train = _make_series(n_columns=n_columns, index_type=index_type) cutoff = y_train.index[-1] steps = -np.arange(len(y_train)) diff --git a/sktime/transformations/compose.py b/sktime/transformations/compose.py index 73ea226ec8c..c5d4b46378d 100644 --- a/sktime/transformations/compose.py +++ b/sktime/transformations/compose.py @@ -171,7 +171,7 @@ def __mul__(self, other): elif isinstance(other, BaseTransformer): new_names = names + (type(other).__name__,) new_trafos = trafos + (other,) - elif self._is_name_and_trafo(other): + elif self._is_name_and_est(other, BaseTransformer): other_name = other[0] other_trafo = other[1] new_names = names + (other_name,) @@ -210,7 +210,7 @@ def __rmul__(self, other): elif isinstance(other, BaseTransformer): new_names = (type(other).__name__,) + names new_trafos = (other,) + trafos - elif self._is_name_and_trafo(other): + elif self._is_name_and_est(other, BaseTransformer): other_name = other[0] other_trafo = other[1] new_names = (other_name,) + names @@ -224,14 +224,6 @@ def __rmul__(self, other): else: return TransformerPipeline(steps=list(zip(new_names, new_trafos))) - @staticmethod - def _is_name_and_trafo(obj): - if not isinstance(obj, tuple) or len(obj) != 2: - return False - if not isinstance(obj[0], str) or not isinstance(obj[1], BaseTransformer): - return False - return True - def _fit(self, X, y=None): """Fit transformer to X and y. @@ -536,7 +528,7 @@ def __add__(self, other): elif isinstance(other, BaseTransformer): new_names = names + (type(other).__name__,) new_trafos = trafos + (other,) - elif self._is_name_and_trafo(other): + elif self._is_name_and_est(other, BaseTransformer): other_name = other[0] other_trafo = other[1] new_names = names + (other_name,) @@ -550,14 +542,6 @@ def __add__(self, other): else: return FeatureUnion(transformer_list=list(zip(new_names, new_trafos))) - @staticmethod - def _is_name_and_trafo(obj): - if not isinstance(obj, tuple) or len(obj) != 2: - return False - if not isinstance(obj[0], str) or not isinstance(obj[1], BaseTransformer): - return False - return True - def _fit(self, X, y=None): """Fit transformer to X and y. diff --git a/sktime/utils/tests/test_check_estimator.py b/sktime/utils/tests/test_check_estimator.py new file mode 100644 index 00000000000..4527fcfc399 --- /dev/null +++ b/sktime/utils/tests/test_check_estimator.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +"""Tests for check_estimator.""" + +__author__ = ["fkiraly"] + +import pytest + +from sktime.classification.feature_based import Catch22Classifier +from sktime.forecasting.naive import NaiveForecaster +from sktime.transformations.series.exponent import ExponentTransformer +from sktime.utils.estimator_checks import check_estimator + +EXAMPLE_CLASSES = [Catch22Classifier, NaiveForecaster, ExponentTransformer] + + +@pytest.mark.parametrize("estimator_class", EXAMPLE_CLASSES) +def test_check_estimator_passed(estimator_class): + """Test that check_estimator returns only passed tests for examples we know pass.""" + estimator_instance = estimator_class.create_test_instance() + + result_class = check_estimator(estimator_class, verbose=False) + assert all(x == "PASSED" for x in result_class.values()) + + result_instance = check_estimator(estimator_instance, verbose=False) + assert all(x == "PASSED" for x in result_instance.values()) + + +@pytest.mark.parametrize("estimator_class", EXAMPLE_CLASSES) +def test_check_estimator_does_not_raise(estimator_class): + """Test that check_estimator does not raise exceptions on examples we know pass.""" + estimator_instance = estimator_class.create_test_instance() + + check_estimator(estimator_class, return_exceptions=False, verbose=False) + + check_estimator(estimator_instance, return_exceptions=False, verbose=False)