Skip to content

Commit

Permalink
[ENH] fixes for forecasters to retain name attribute in predict (#…
Browse files Browse the repository at this point in the history
…4161)

This PR collects fixes for a number of forecasters to preserve the `name` attribute in `predict`.
See #4144. The issues were found by #4157.

Does *not* contain changes to the conversion logic, or tests (e.g., #4157).
Testing is via merging this PR into the options dealing with the `name` attribute.

Contains fixes for:

* `ARDL`
* `ARIMA` and `AutoARIMA`
* `AutoEnsembleForecaster`
* `AutoETS`
* `Croston`
* `NaiveForecaster`
* `OnlineEnsembleForecaster`
* reducers
* `SquaringResiduals`
* `StackingForecaster`
* `StatsForecastAutoARIMA`
* `STLForecaster`
* `ThetaModularForecaster`
* `TrendForecaster` and `PolynomialTrendForecaster`

As tempting as it might be to add the fix to `BaseForecaster` somewhere in `predict`: whilt it might be DRY-er, I think it would muddle concerns. If at all, this should go in `convert`-like boilerplate rather than in the base class itself, but unclear how that would look like.
  • Loading branch information
fkiraly committed Feb 1, 2023
1 parent 9a666d5 commit 21a67ef
Show file tree
Hide file tree
Showing 14 changed files with 51 additions and 20 deletions.
1 change: 1 addition & 0 deletions sktime/forecasting/ardl.py
Expand Up @@ -415,6 +415,7 @@ def _predict(self, fh, X=None):
y_pred = self._fitted_forecaster.predict(
start=start, end=end, exog=self._X, exog_oos=X, fixed_oos=self.fixed_oos
)
y_pred.name = self._y.name
return y_pred.loc[valid_indices]

def _update(self, y, X=None, update_params=True):
Expand Down
12 changes: 9 additions & 3 deletions sktime/forecasting/base/_sktime.py
Expand Up @@ -28,21 +28,27 @@ def _predict(self, fh, X=None):

# all values are out-of-sample
if fh.is_all_out_of_sample(self.cutoff):
return self._predict_fixed_cutoff(
y_pred = self._predict_fixed_cutoff(
fh.to_out_of_sample(self.cutoff), **kwargs
)

# all values are in-sample
elif fh.is_all_in_sample(self.cutoff):
return self._predict_in_sample(fh.to_in_sample(self.cutoff), **kwargs)
y_pred = self._predict_in_sample(fh.to_in_sample(self.cutoff), **kwargs)

# both in-sample and out-of-sample values
else:
y_ins = self._predict_in_sample(fh.to_in_sample(self.cutoff), **kwargs)
y_oos = self._predict_fixed_cutoff(
fh.to_out_of_sample(self.cutoff), **kwargs
)
return pd.concat([y_ins, y_oos])
y_pred = pd.concat([y_ins, y_oos])

# ensure pd.Series name attribute is preserved
if isinstance(y_pred, pd.Series) and isinstance(self._y, pd.Series):
y_pred.name = self._y.name

return y_pred

def _predict_fixed_cutoff(
self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA
Expand Down
11 changes: 8 additions & 3 deletions sktime/forecasting/base/adapters/_pmdarima.py
Expand Up @@ -93,17 +93,22 @@ def _predict(self, fh, X=None):

# all values are out-of-sample
if fh.is_all_out_of_sample(self.cutoff):
return self._predict_fixed_cutoff(fh_oos, X=X)
y_pred = self._predict_fixed_cutoff(fh_oos, X=X)

# all values are in-sample
elif fh.is_all_in_sample(self.cutoff):
return self._predict_in_sample(fh_ins, X=X)
y_pred = self._predict_in_sample(fh_ins, X=X)

# both in-sample and out-of-sample values
else:
y_ins = self._predict_in_sample(fh_ins, X=X)
y_oos = self._predict_fixed_cutoff(fh_oos, X=X)
return pd.concat([y_ins, y_oos])
y_pred = pd.concat([y_ins, y_oos])

# ensure that name is not added nor removed
# otherwise this may upset conversion to pd.DataFrame
y_pred.name = self._y.name
return y_pred

def _predict_in_sample(
self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA
Expand Down
11 changes: 8 additions & 3 deletions sktime/forecasting/base/adapters/_statsforecast.py
Expand Up @@ -100,17 +100,22 @@ def _predict(self, fh, X=None):

# all values are out-of-sample
if fh.is_all_out_of_sample(self.cutoff):
return self._predict_fixed_cutoff(fh_oos, X=X)
y_pred = self._predict_fixed_cutoff(fh_oos, X=X)

# all values are in-sample
elif fh.is_all_in_sample(self.cutoff):
return self._predict_in_sample(fh_ins, X=X)
y_pred = self._predict_in_sample(fh_ins, X=X)

# both in-sample and out-of-sample values
else:
y_ins = self._predict_in_sample(fh_ins, X=X)
y_oos = self._predict_fixed_cutoff(fh_oos, X=X)
return pd.concat([y_ins, y_oos])
y_pred = pd.concat([y_ins, y_oos])

# ensure that name is not added nor removed
# otherwise this may upset conversion to pd.DataFrame
y_pred.name = self._y.name
return y_pred

def _predict_in_sample(
self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA
Expand Down
2 changes: 1 addition & 1 deletion sktime/forecasting/compose/_bagging.py
Expand Up @@ -226,7 +226,7 @@ def _predict(self, fh, X=None):
"""
y_bootstraps_pred = self.forecaster_.predict(fh=fh, X=None)
y_pred = y_bootstraps_pred.groupby(level=-1).mean().iloc[:, 0]
y_pred.name = None
y_pred.name = self._y.name
return y_pred

def _predict_quantiles(self, fh, X=None, alpha=None):
Expand Down
2 changes: 2 additions & 0 deletions sktime/forecasting/compose/_ensemble.py
Expand Up @@ -166,6 +166,7 @@ def _fit(self, y, X=None, fh=None):
regressor=self.regressor, random_state=self.random_state
)
X_meta = pd.concat(self._predict_forecasters(fh_test, X_test), axis=1)
X_meta.columns = pd.RangeIndex(len(X_meta.columns))

# fit meta-model (regressor) on predictions of ensemble models
# with y_test as endog/target
Expand Down Expand Up @@ -215,6 +216,7 @@ def _predict(self, fh, X=None):
y_pred_df = pd.concat(self._predict_forecasters(fh, X), axis=1)
# apply weights
y_pred = y_pred_df.apply(lambda x: np.average(x, weights=self.weights_), axis=1)
y_pred.name = self._y.name
return y_pred

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion sktime/forecasting/compose/_stack.py
Expand Up @@ -166,7 +166,7 @@ def _predict(self, fh=None, X=None):
y_pred = self.regressor_.predict(y_preds)
# index = y_preds.index
index = self.fh.to_absolute(self.cutoff)
return pd.Series(y_pred, index=index)
return pd.Series(y_pred, index=index, name=self._y.name)

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down
2 changes: 1 addition & 1 deletion sktime/forecasting/croston.py
Expand Up @@ -152,7 +152,7 @@ def _predict(
y_pred = np.full(len_fh, f[-1])

index = self.fh.to_absolute(self.cutoff)
return pd.Series(y_pred, index=index)
return pd.Series(y_pred, index=index, name=self._y.name)

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down
1 change: 1 addition & 0 deletions sktime/forecasting/ets.py
Expand Up @@ -412,6 +412,7 @@ def _predict(self, fh, X=None, **simulate_kwargs):
valid_indices = fh.to_absolute(self.cutoff).to_pandas()

y_pred = self._fitted_forecaster.predict(start=start, end=end)
y_pred.name = self._y.name
return y_pred.loc[valid_indices]

def _predict_interval(self, fh, X=None, coverage=None):
Expand Down
2 changes: 2 additions & 0 deletions sktime/forecasting/naive.py
Expand Up @@ -348,6 +348,8 @@ def _predict(self, fh=None, X=None):
# fill NaN with observed values
y_pred.loc[self._y.index[0]] = self._y[self._y.index[1]]

y_pred.name = self._y.name

return y_pred

def _predict_quantiles(self, fh, X=None, alpha=0.5):
Expand Down
7 changes: 4 additions & 3 deletions sktime/forecasting/online_learning/_online_ensemble.py
Expand Up @@ -104,9 +104,10 @@ def _update(self, y, X=None, update_params=False):
def _predict(self, fh=None, X=None):
if self.ensemble_algorithm is not None:
self.weights = self.ensemble_algorithm.weights
return (pd.concat(self._predict_forecasters(fh, X), axis=1) * self.weights).sum(
axis=1
)
y_pred = pd.concat(self._predict_forecasters(fh, X), axis=1) * self.weights
y_pred = y_pred.sum(axis=1)
y_pred.name = self._y.name
return y_pred

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down
1 change: 1 addition & 0 deletions sktime/forecasting/squaring_residuals.py
Expand Up @@ -225,6 +225,7 @@ def _predict(self, fh, X=None):
"""
fh_abs = fh.to_absolute(self.cutoff)
y_pred = self._forecaster_.predict(X=X, fh=fh_abs)
y_pred.name = self._y.name
return y_pred

def _update(self, y, X=None, update_params=True):
Expand Down
4 changes: 3 additions & 1 deletion sktime/forecasting/theta.py
Expand Up @@ -432,7 +432,9 @@ def _predict(self, fh, X=None, return_pred_int=False):
# Call predict on the forecaster directly, not on the pipeline
# because of output conversion
Y_pred = self.pipe_.steps_[-1][-1].predict(fh, X)
return _aggregate(Y_pred, aggfunc=self.aggfunc, weights=self.weights)
y_pred = _aggregate(Y_pred, aggfunc=self.aggfunc, weights=self.weights)
y_pred.name = self._y.name
return y_pred

def _update(self, y, X=None, update_params=True):
self.pipe_._update(y, X=None, update_params=update_params)
Expand Down
13 changes: 9 additions & 4 deletions sktime/forecasting/trend.py
Expand Up @@ -120,8 +120,10 @@ def _predict(self, fh=None, X=None):
# use relative fh as time index to predict
fh = self.fh.to_absolute(self.cutoff)
X_sklearn = _get_X_numpy_int_from_pandas(fh.to_pandas())
y_pred = self.regressor_.predict(X_sklearn)
return pd.Series(y_pred, index=self.fh.to_absolute(self.cutoff))
y_pred_sklearn = self.regressor_.predict(X_sklearn)
y_pred = pd.Series(y_pred_sklearn, index=fh)
y_pred.name = self._y.name
return y_pred

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down Expand Up @@ -266,8 +268,10 @@ def _predict(self, fh=None, X=None):
# use relative fh as time index to predict
fh = self.fh.to_absolute(self.cutoff)
X_sklearn = _get_X_numpy_int_from_pandas(fh.to_pandas())
y_pred = self.regressor_.predict(X_sklearn)
return pd.Series(y_pred, index=self.fh.to_absolute(self.cutoff))
y_pred_sklearn = self.regressor_.predict(X_sklearn)
y_pred = pd.Series(y_pred_sklearn, index=fh)
y_pred.name = self._y.name
return y_pred

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down Expand Up @@ -548,6 +552,7 @@ def _predict(self, fh, X=None):
y_pred_trend = self.forecaster_trend_.predict(fh=fh, X=X)
y_pred_resid = self.forecaster_resid_.predict(fh=fh, X=X)
y_pred = y_pred_seasonal + y_pred_trend + y_pred_resid
y_pred.name = self._y.name
return y_pred

def _update(self, y, X=None, update_params=True):
Expand Down

0 comments on commit 21a67ef

Please sign in to comment.