sktime · fkiraly · Apr 22, 2023 · Apr 18, 2023 · Apr 18, 2023 · Apr 18, 2023
@@ -3,7 +3,7 @@
 # copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
 """Implements adapter for using tbats forecasters in sktime framework."""
 
-__author__ = ["mloning", "aiwalter", "k1m190r"]
+__author__ = ["mloning", "aiwalter", "k1m190r", "fkiraly"]
 __all__ = ["_TbatsAdapter"]
 
 import numpy as np
@@ -178,7 +178,7 @@ def _get_y_pred(self, y_in_sample, y_out_sample):
         return y_pred
 
     def _tbats_forecast(self, fh):
-        """TBATS forecast without confidence interval.
+        """TBATS point forecast adapter function.
 
         Parameters
         ----------
@@ -208,8 +208,8 @@ def _tbats_forecast(self, fh):
 
         return y_pred
 
-    def _tbats_forecast_with_interval(self, fh, conf_lev):
-        """TBATS forecast with confidence interval.
+    def _tbats_forecast_interval(self, fh, conf_lev):
+        """TBATS prediction interval forecast adapter function.
 
         Parameters
         ----------
@@ -226,42 +226,22 @@ def _tbats_forecast_with_interval(self, fh, conf_lev):
             Prediction intervals
         """
         fh = fh.to_relative(cutoff=self.cutoff)
-        len_fh = len(fh)
+        fh_out = fh.to_out_of_sample(cutoff=self.cutoff)
+        steps = fh_out.to_pandas().max()
 
-        if not fh.is_all_in_sample(cutoff=self.cutoff):
-            fh_out = fh.to_out_of_sample(cutoff=self.cutoff)
-            steps = fh_out.to_pandas().max()
-            _, tbats_ci = self._forecaster.forecast(
-                steps=steps, confidence_level=conf_lev
-            )
-            out = pd.DataFrame(tbats_ci)
-            y_out = out["mean"]  # aka tbats y_hat out of sample
-
-            # pred_int
-            lower = pd.Series(out["lower_bound"])
-            upper = pd.Series(out["upper_bound"])
-            pred_int = self._get_pred_int(lower=lower, upper=upper)
-
-            if len(fh) != len(fh_out):
-                epred_int = pd.DataFrame({"lower": nans(len_fh), "upper": nans(len_fh)})
-                epred_int.index = fh.to_absolute(self.cutoff).to_pandas()
-
-                in_pred_int = epred_int.index.isin(pred_int.index)
-                epred_int[in_pred_int] = pred_int
-                pred_int = epred_int
+        _, tbats_ci = self._forecaster.forecast(steps=steps, confidence_level=conf_lev)
+        out = pd.DataFrame(tbats_ci)
 
-        else:
-            y_out = nans(len_fh)
-            pred_int = pd.DataFrame({"lower": nans(len_fh), "upper": nans(len_fh)})
-            pred_int.index = fh.to_absolute(self.cutoff).to_pandas()
-
-        # y_pred
-        y_in_sample = pd.Series(self._forecaster.y_hat)
-        y_out_sample = pd.Series(y_out)
-        y_pred = self._get_y_pred(y_in_sample=y_in_sample, y_out_sample=y_out_sample)
-        y_pred.name = self._yname
+        # pred_int
+        lower = pd.Series(out["lower_bound"])
+        upper = pd.Series(out["upper_bound"])
+        pred_int_oos = pd.DataFrame({"lower": lower, "upper": upper})
+        pred_int_oos = pred_int_oos.iloc[fh_out.to_indexer()]
+        pred_int_oos.index = fh_out.to_absolute(self.cutoff).to_pandas()
+        full_ix = fh.to_absolute(self.cutoff).to_pandas()
+        pred_int = pred_int_oos.reindex(full_ix)
 
-        return y_pred, pred_int
+        return pred_int
 
     def _predict_interval(self, fh, X, coverage):
         """Compute/return prediction quantiles for a forecast.
@@ -317,8 +297,8 @@ def _predict_interval(self, fh, X, coverage):
                 pred_int[("Coverage", 0, "upper")] = pred_int[("Coverage", 0, "lower")]
                 continue
 
-            # tbats with CI intervals
-            _, tbats_pred_int = self._tbats_forecast_with_interval(fh, c)
+            # tbats prediction intervals
+            tbats_pred_int = self._tbats_forecast_interval(fh, c)
 
             pred_int[("Coverage", c, "lower")] = tbats_pred_int["lower"]
             pred_int[("Coverage", c, "upper")] = tbats_pred_int["upper"]
@@ -341,39 +321,6 @@ def _get_fitted_param_names(self):
         """Get names of fitted parameters."""
         return self._fitted_param_names
 
-    def _get_pred_int(self, lower, upper):
-        """Combine lower/upper bounds of pred.intervals, slice on fh.
-
-        Parameters
-        ----------
-        lower : pd.Series
-            Lower bound (can contain also in-sample bound)
-        upper : pd.Series
-            Upper bound (can contain also in-sample bound)
-
-        Returns
-        -------
-        pd.DataFrame
-            pred_int, prediction intervals (out-sample, sliced by fh)
-        """
-        pred_int = pd.DataFrame({"lower": lower, "upper": upper})
-        # Out-sample fh
-        fh_out = self.fh.to_out_of_sample(cutoff=self.cutoff)
-        # If pred_int contains in-sample prediction intervals
-        if len(pred_int) > len(self._y):
-            len_out = len(pred_int) - len(self._y)
-            # Workaround for slicing with negative index
-            pred_int["idx"] = [x for x in range(-len(self._y), len_out)]
-        # If pred_int does not contain in-sample prediction intervals
-        else:
-            pred_int["idx"] = [x for x in range(len(pred_int))]
-        pred_int = pred_int.loc[
-            pred_int["idx"].isin(fh_out.to_indexer(self.cutoff).values)
-        ]
-        pred_int.index = fh_out.to_absolute(self.cutoff).to_pandas()
-        pred_int = pred_int.drop(columns=["idx"])
-        return pred_int
-
 
 def nans(length):
     """Return a vector of NaNs, of length `length`."""

@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+"""Tests for TBATS."""
+# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
+
+__author__ = ["fkiraly", "ngupta23"]
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from sktime.forecasting.tbats import TBATS
+from sktime.utils.validation._dependencies import _check_estimator_deps
+
+
+@pytest.mark.skipif(
+    not _check_estimator_deps(TBATS, severity="none"),
+    reason="skip test if required soft dependency not available",
+)
+def test_tbats_long_fh():
+    """Test TBATS with long fh, checks for failure condition in bug #4491."""
+    np.random.seed(42)
+    LEN_HISTORY = 50
+    train = pd.Series(data=np.random.randint(1, 100, LEN_HISTORY))
+
+    # train model
+    estimator = TBATS(
+        use_box_cox=False,
+        use_trend=True,
+        use_damped_trend=False,
+        sp=10,
+        use_arma_errors=False,
+    )
+    estimator.fit(train)
+
+    # failure condition is fh being longer than training data
+    long_fh = np.array(range(1, LEN_HISTORY + 2))
+
+    fcst = estimator.predict_interval(coverage=0.8, fh=long_fh)
+    assert len(fcst) == len(long_fh)
@@ -1,8 +1,6 @@
 # -*- coding: utf-8 -*-
-"""Tests for ThetaForecaster.
-
+"""Tests for ThetaForecaster."""
 # copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
-"""
 
 __author__ = ["big-o", "kejsitake"]