[ENH] fixes for forecasters to retain name attribute in predict (#…

…4161) This PR collects fixes for a number of forecasters to preserve the `name` attribute in `predict`. See #4144. The issues were found by #4157. Does *not* contain changes to the conversion logic, or tests (e.g., #4157). Testing is via merging this PR into the options dealing with the `name` attribute. Contains fixes for: * `ARDL` * `ARIMA` and `AutoARIMA` * `AutoEnsembleForecaster` * `AutoETS` * `Croston` * `NaiveForecaster` * `OnlineEnsembleForecaster` * reducers * `SquaringResiduals` * `StackingForecaster` * `StatsForecastAutoARIMA` * `STLForecaster` * `ThetaModularForecaster` * `TrendForecaster` and `PolynomialTrendForecaster` As tempting as it might be to add the fix to `BaseForecaster` somewhere in `predict`: whilt it might be DRY-er, I think it would muddle concerns. If at all, this should go in `convert`-like boilerplate rather than in the base class itself, but unclear how that would look like.
sktime · Feb 1, 2023 · 21a67ef · 21a67ef
1 parent 9a666d5
commit 21a67ef
Show file tree

Hide file tree

Showing 14 changed files with 51 additions and 20 deletions.
diff --git a/sktime/forecasting/ardl.py b/sktime/forecasting/ardl.py
@@ -415,6 +415,7 @@ def _predict(self, fh, X=None):
         y_pred = self._fitted_forecaster.predict(
             start=start, end=end, exog=self._X, exog_oos=X, fixed_oos=self.fixed_oos
         )
+        y_pred.name = self._y.name
         return y_pred.loc[valid_indices]
 
     def _update(self, y, X=None, update_params=True):

diff --git a/sktime/forecasting/base/_sktime.py b/sktime/forecasting/base/_sktime.py
@@ -28,21 +28,27 @@ def _predict(self, fh, X=None):
 
         # all values are out-of-sample
         if fh.is_all_out_of_sample(self.cutoff):
-            return self._predict_fixed_cutoff(
+            y_pred = self._predict_fixed_cutoff(
                 fh.to_out_of_sample(self.cutoff), **kwargs
             )
 
         # all values are in-sample
         elif fh.is_all_in_sample(self.cutoff):
-            return self._predict_in_sample(fh.to_in_sample(self.cutoff), **kwargs)
+            y_pred = self._predict_in_sample(fh.to_in_sample(self.cutoff), **kwargs)
 
         # both in-sample and out-of-sample values
         else:
             y_ins = self._predict_in_sample(fh.to_in_sample(self.cutoff), **kwargs)
             y_oos = self._predict_fixed_cutoff(
                 fh.to_out_of_sample(self.cutoff), **kwargs
             )
-            return pd.concat([y_ins, y_oos])
+            y_pred = pd.concat([y_ins, y_oos])
+
+        # ensure pd.Series name attribute is preserved
+        if isinstance(y_pred, pd.Series) and isinstance(self._y, pd.Series):
+            y_pred.name = self._y.name
+
+        return y_pred
 
     def _predict_fixed_cutoff(
         self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA

diff --git a/sktime/forecasting/base/adapters/_pmdarima.py b/sktime/forecasting/base/adapters/_pmdarima.py
@@ -93,17 +93,22 @@ def _predict(self, fh, X=None):
 
         # all values are out-of-sample
         if fh.is_all_out_of_sample(self.cutoff):
-            return self._predict_fixed_cutoff(fh_oos, X=X)
+            y_pred = self._predict_fixed_cutoff(fh_oos, X=X)
 
         # all values are in-sample
         elif fh.is_all_in_sample(self.cutoff):
-            return self._predict_in_sample(fh_ins, X=X)
+            y_pred = self._predict_in_sample(fh_ins, X=X)
 
         # both in-sample and out-of-sample values
         else:
             y_ins = self._predict_in_sample(fh_ins, X=X)
             y_oos = self._predict_fixed_cutoff(fh_oos, X=X)
-            return pd.concat([y_ins, y_oos])
+            y_pred = pd.concat([y_ins, y_oos])
+
+        # ensure that name is not added nor removed
+        # otherwise this may upset conversion to pd.DataFrame
+        y_pred.name = self._y.name
+        return y_pred
 
     def _predict_in_sample(
         self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA

diff --git a/sktime/forecasting/base/adapters/_statsforecast.py b/sktime/forecasting/base/adapters/_statsforecast.py
@@ -100,17 +100,22 @@ def _predict(self, fh, X=None):
 
         # all values are out-of-sample
         if fh.is_all_out_of_sample(self.cutoff):
-            return self._predict_fixed_cutoff(fh_oos, X=X)
+            y_pred = self._predict_fixed_cutoff(fh_oos, X=X)
 
         # all values are in-sample
         elif fh.is_all_in_sample(self.cutoff):
-            return self._predict_in_sample(fh_ins, X=X)
+            y_pred = self._predict_in_sample(fh_ins, X=X)
 
         # both in-sample and out-of-sample values
         else:
             y_ins = self._predict_in_sample(fh_ins, X=X)
             y_oos = self._predict_fixed_cutoff(fh_oos, X=X)
-            return pd.concat([y_ins, y_oos])
+            y_pred = pd.concat([y_ins, y_oos])
+
+        # ensure that name is not added nor removed
+        # otherwise this may upset conversion to pd.DataFrame
+        y_pred.name = self._y.name
+        return y_pred
 
     def _predict_in_sample(
         self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA

diff --git a/sktime/forecasting/compose/_bagging.py b/sktime/forecasting/compose/_bagging.py
@@ -226,7 +226,7 @@ def _predict(self, fh, X=None):
         """
         y_bootstraps_pred = self.forecaster_.predict(fh=fh, X=None)
         y_pred = y_bootstraps_pred.groupby(level=-1).mean().iloc[:, 0]
-        y_pred.name = None
+        y_pred.name = self._y.name
         return y_pred
 
     def _predict_quantiles(self, fh, X=None, alpha=None):

diff --git a/sktime/forecasting/compose/_ensemble.py b/sktime/forecasting/compose/_ensemble.py
@@ -166,6 +166,7 @@ def _fit(self, y, X=None, fh=None):
                 regressor=self.regressor, random_state=self.random_state
             )
             X_meta = pd.concat(self._predict_forecasters(fh_test, X_test), axis=1)
+            X_meta.columns = pd.RangeIndex(len(X_meta.columns))
 
             # fit meta-model (regressor) on predictions of ensemble models
             # with y_test as endog/target
@@ -215,6 +216,7 @@ def _predict(self, fh, X=None):
         y_pred_df = pd.concat(self._predict_forecasters(fh, X), axis=1)
         # apply weights
         y_pred = y_pred_df.apply(lambda x: np.average(x, weights=self.weights_), axis=1)
+        y_pred.name = self._y.name
         return y_pred
 
     @classmethod

diff --git a/sktime/forecasting/compose/_stack.py b/sktime/forecasting/compose/_stack.py
@@ -166,7 +166,7 @@ def _predict(self, fh=None, X=None):
         y_pred = self.regressor_.predict(y_preds)
         # index = y_preds.index
         index = self.fh.to_absolute(self.cutoff)
-        return pd.Series(y_pred, index=index)
+        return pd.Series(y_pred, index=index, name=self._y.name)
 
     @classmethod
     def get_test_params(cls, parameter_set="default"):

diff --git a/sktime/forecasting/croston.py b/sktime/forecasting/croston.py
@@ -152,7 +152,7 @@ def _predict(
         y_pred = np.full(len_fh, f[-1])
 
         index = self.fh.to_absolute(self.cutoff)
-        return pd.Series(y_pred, index=index)
+        return pd.Series(y_pred, index=index, name=self._y.name)
 
     @classmethod
     def get_test_params(cls, parameter_set="default"):

diff --git a/sktime/forecasting/ets.py b/sktime/forecasting/ets.py
@@ -412,6 +412,7 @@ def _predict(self, fh, X=None, **simulate_kwargs):
         valid_indices = fh.to_absolute(self.cutoff).to_pandas()
 
         y_pred = self._fitted_forecaster.predict(start=start, end=end)
+        y_pred.name = self._y.name
         return y_pred.loc[valid_indices]
 
     def _predict_interval(self, fh, X=None, coverage=None):

diff --git a/sktime/forecasting/naive.py b/sktime/forecasting/naive.py
@@ -348,6 +348,8 @@ def _predict(self, fh=None, X=None):
                 # fill NaN with observed values
                 y_pred.loc[self._y.index[0]] = self._y[self._y.index[1]]
 
+        y_pred.name = self._y.name
+
         return y_pred
 
     def _predict_quantiles(self, fh, X=None, alpha=0.5):

diff --git a/sktime/forecasting/online_learning/_online_ensemble.py b/sktime/forecasting/online_learning/_online_ensemble.py
@@ -104,9 +104,10 @@ def _update(self, y, X=None, update_params=False):
     def _predict(self, fh=None, X=None):
         if self.ensemble_algorithm is not None:
             self.weights = self.ensemble_algorithm.weights
-        return (pd.concat(self._predict_forecasters(fh, X), axis=1) * self.weights).sum(
-            axis=1
-        )
+        y_pred = pd.concat(self._predict_forecasters(fh, X), axis=1) * self.weights
+        y_pred = y_pred.sum(axis=1)
+        y_pred.name = self._y.name
+        return y_pred
 
     @classmethod
     def get_test_params(cls, parameter_set="default"):

diff --git a/sktime/forecasting/squaring_residuals.py b/sktime/forecasting/squaring_residuals.py
@@ -225,6 +225,7 @@ def _predict(self, fh, X=None):
         """
         fh_abs = fh.to_absolute(self.cutoff)
         y_pred = self._forecaster_.predict(X=X, fh=fh_abs)
+        y_pred.name = self._y.name
         return y_pred
 
     def _update(self, y, X=None, update_params=True):

diff --git a/sktime/forecasting/theta.py b/sktime/forecasting/theta.py
@@ -432,7 +432,9 @@ def _predict(self, fh, X=None, return_pred_int=False):
         # Call predict on the forecaster directly, not on the pipeline
         # because of output conversion
         Y_pred = self.pipe_.steps_[-1][-1].predict(fh, X)
-        return _aggregate(Y_pred, aggfunc=self.aggfunc, weights=self.weights)
+        y_pred = _aggregate(Y_pred, aggfunc=self.aggfunc, weights=self.weights)
+        y_pred.name = self._y.name
+        return y_pred
 
     def _update(self, y, X=None, update_params=True):
         self.pipe_._update(y, X=None, update_params=update_params)

diff --git a/sktime/forecasting/trend.py b/sktime/forecasting/trend.py
@@ -120,8 +120,10 @@ def _predict(self, fh=None, X=None):
         # use relative fh as time index to predict
         fh = self.fh.to_absolute(self.cutoff)
         X_sklearn = _get_X_numpy_int_from_pandas(fh.to_pandas())
-        y_pred = self.regressor_.predict(X_sklearn)
-        return pd.Series(y_pred, index=self.fh.to_absolute(self.cutoff))
+        y_pred_sklearn = self.regressor_.predict(X_sklearn)
+        y_pred = pd.Series(y_pred_sklearn, index=fh)
+        y_pred.name = self._y.name
+        return y_pred
 
     @classmethod
     def get_test_params(cls, parameter_set="default"):
@@ -266,8 +268,10 @@ def _predict(self, fh=None, X=None):
         # use relative fh as time index to predict
         fh = self.fh.to_absolute(self.cutoff)
         X_sklearn = _get_X_numpy_int_from_pandas(fh.to_pandas())
-        y_pred = self.regressor_.predict(X_sklearn)
-        return pd.Series(y_pred, index=self.fh.to_absolute(self.cutoff))
+        y_pred_sklearn = self.regressor_.predict(X_sklearn)
+        y_pred = pd.Series(y_pred_sklearn, index=fh)
+        y_pred.name = self._y.name
+        return y_pred
 
     @classmethod
     def get_test_params(cls, parameter_set="default"):
@@ -548,6 +552,7 @@ def _predict(self, fh, X=None):
         y_pred_trend = self.forecaster_trend_.predict(fh=fh, X=X)
         y_pred_resid = self.forecaster_resid_.predict(fh=fh, X=X)
         y_pred = y_pred_seasonal + y_pred_trend + y_pred_resid
+        y_pred.name = self._y.name
         return y_pred
 
     def _update(self, y, X=None, update_params=True):