[ENH] NeuralForecastRNN should auto-detect freq (#6039)

Enhances `NeuralForecastRNN` to interpret `freq` from `ForecastingHorizon` when passed as `"auto"`  #### Reference Issues/PRs  Fixes #6003. #### What does this implement/fix? Explain your changes.  The `NeuralForecastRNN` constructor previously required a `freq` argument, which is now proposed to default to `"auto"` in which case it interprets `freq` from `ForecastingHorizon`, leveraging `fh.freq` in the `fit` method. #### What should a reviewer concentrate their feedback on?  I have run the tests with the updated estimator ```py results = check_estimator(NeuralForecastRNN) # All tests PASSED! ``` `freq` can now be passed like this: ```py y, X = load_longley() y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=4) model = NeuralForecastRNN( "auto", # interprets to be "A-DEC" futr_exog_list=["ARMED", "POP"], max_steps=5) model.fit(y_train, X=X_train, fh=[1, 2, 3, 4]) model.predict(X=X_test) # Seed set to 1 # 1959 66241.984375 # 1960 66700.132812 # 1961 66550.195312 # 1962 67310.007812 # Freq: A-DEC, Name: TOTEMP, dtype: float64 ```
sktime · Mar 20, 2024 · d19fda1 · d19fda1
1 parent 3f26c11
commit d19fda1
Show file tree

Hide file tree

Showing 4 changed files with 138 additions and 15 deletions.
diff --git a/.all-contributorsrc b/.all-contributorsrc
@@ -2624,6 +2624,7 @@
       "avatar_url": "https://avatars.githubusercontent.com/u/90601662?s=96&v=4",
       "profile": "https://github.com/geetu040",
       "contributions": [
+        "code",
         "doc",
         "maintenance"
       ]

diff --git a/sktime/forecasting/base/adapters/_neuralforecast.py b/sktime/forecasting/base/adapters/_neuralforecast.py
@@ -17,8 +17,10 @@ class _NeuralForecastAdapter(BaseForecaster):
 
     Parameters
     ----------
-    freq : str
+    freq : str (default="auto")
         frequency of the data, see available frequencies [1]_ from ``pandas``
+
+        default ("auto") interprets freq from ForecastingHorizon in ``fit``
     local_scaler_type : str (default=None)
         scaler to apply per-series to all features before fitting, which is inverted
         after predicting
@@ -66,7 +68,7 @@ class _NeuralForecastAdapter(BaseForecaster):
 
     def __init__(
         self: "_NeuralForecastAdapter",
-        freq: str,
+        freq: str = "auto",
         local_scaler_type: typing.Optional[
             typing.Literal["standard", "robust", "robust-iqr", "minmax", "boxcox"]
         ] = None,
@@ -84,6 +86,9 @@ def __init__(
 
         super().__init__()
 
+        # initiate internal variables to avoid AttributeError in future
+        self._freq = None
+
         self.id_col = "unique_id"
         self.time_col = "ds"
         self.target_col = "y"
@@ -143,7 +148,7 @@ def _instantiate_model(self: "_NeuralForecastAdapter", fh: ForecastingHorizon):
         from neuralforecast import NeuralForecast
 
         model = NeuralForecast(
-            [algorithm_instance], self.freq, local_scaler_type=self.local_scaler_type
+            [algorithm_instance], self._freq, local_scaler_type=self.local_scaler_type
         )
 
         return model
@@ -175,13 +180,28 @@ def _fit(
         -------
         self : _NeuralForecastAdapter
             reference to self
+
+        Raises
+        ------
+        ValueError
+            When ``freq="auto"`` and cannot be interpreted from ``ForecastingHorizon``
         """
         if not fh.is_all_out_of_sample(cutoff=self.cutoff):
             raise NotImplementedError("in-sample prediction is currently not supported")
 
+        if self.freq == "auto" and fh.freq is None:
+            # when freq cannot be interpreted from ForecastingHorizon
+            raise ValueError(
+                f"Error in {self.__class__.__name__}, "
+                f"could not interpret freq, "
+                f"try passing freq in model initialization"
+            )
+
+        self._freq = fh.freq if self.freq == "auto" else self.freq
+
         train_indices = y.index
         if isinstance(train_indices, pandas.PeriodIndex):
-            train_indices = train_indices.to_timestamp(freq=self.freq)
+            train_indices = train_indices.to_timestamp(freq=self._freq)
 
         train_data = {
             self.id_col: 1,
@@ -252,7 +272,7 @@ def _predict(
         if self.futr_exog_list:
             predict_indices = X.index
             if isinstance(predict_indices, pandas.PeriodIndex):
-                predict_indices = predict_indices.to_timestamp(freq=self.freq)
+                predict_indices = predict_indices.to_timestamp(freq=self._freq)
 
             predict_data = {self.id_col: 1, self.time_col: predict_indices.to_numpy()}
 

diff --git a/sktime/forecasting/neuralforecast.py b/sktime/forecasting/neuralforecast.py
@@ -22,8 +22,10 @@ class NeuralForecastRNN(_NeuralForecastAdapter):
 
     Parameters
     ----------
-    freq : str
+    freq : str (default="auto")
         frequency of the data, see available frequencies [4]_ from ``pandas``
+
+        default ("auto") interprets freq from ForecastingHorizon in ``fit``
     local_scaler_type : str (default=None)
         scaler to apply per-series to all features before fitting, which is inverted
         after predicting
@@ -160,7 +162,7 @@ class NeuralForecastRNN(_NeuralForecastAdapter):
 
     def __init__(
         self: "NeuralForecastRNN",
-        freq: str,
+        freq: str = "auto",
         local_scaler_type: typing.Optional[
             typing.Literal["standard", "robust", "robust-iqr", "minmax", "boxcox"]
         ] = None,
@@ -381,8 +383,10 @@ class NeuralForecastLSTM(_NeuralForecastAdapter):
 
     Parameters
     ----------
-    freq : str
+    freq : str (default="auto")
         frequency of the data, see available frequencies [4]_ from ``pandas``
+
+        default ("auto") interprets freq from ForecastingHorizon in ``fit``
     local_scaler_type : str (default=None)
         scaler to apply per-series to all features before fitting, which is inverted
         after predicting
@@ -513,7 +517,7 @@ class NeuralForecastLSTM(_NeuralForecastAdapter):
 
     def __init__(
         self: "NeuralForecastLSTM",
-        freq: str,
+        freq: str = "auto",
         local_scaler_type: typing.Optional[
             typing.Literal["standard", "robust", "robust-iqr", "minmax", "boxcox"]
         ] = None,

diff --git a/sktime/forecasting/tests/test_neuralforecast.py b/sktime/forecasting/tests/test_neuralforecast.py
@@ -8,7 +8,7 @@
 from sktime.split import temporal_train_test_split
 from sktime.tests.test_switch import run_test_for_class
 
-__author__ = ["yarnabrina", "pranavvp16"]
+__author__ = ["yarnabrina", "pranavvp16", "geetu040"]
 
 y, X = load_longley()
 y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=4)
@@ -22,7 +22,7 @@
 def test_neural_forecast_univariate_y_without_X(model_class) -> None:
     """Test with single endogenous without exogenous."""
     # define model
-    model = model_class("A-DEC", max_steps=5, trainer_kwargs={"logger": False})
+    model = model_class(freq="A-DEC", max_steps=5, trainer_kwargs={"logger": False})
 
     # attempt fit with negative fh
     with pytest.raises(
@@ -52,7 +52,10 @@ def test_neural_forecast_univariate_y_with_X(model_class) -> None:
 
     # define model
     model = model_class(
-        "A-DEC", futr_exog_list=exog_list, max_steps=5, trainer_kwargs={"logger": False}
+        freq="A-DEC",
+        futr_exog_list=exog_list,
+        max_steps=5,
+        trainer_kwargs={"logger": False},
     )
 
     # attempt fit without X
@@ -86,7 +89,7 @@ def test_neural_forecast_univariate_y_with_X(model_class) -> None:
 def test_neural_forecast_multivariate_y_without_X(model_class) -> None:
     """Test with multiple endogenous without exogenous."""
     # define model
-    model = model_class("A-DEC", max_steps=5, trainer_kwargs={"logger": False})
+    model = model_class(freq="A-DEC", max_steps=5, trainer_kwargs={"logger": False})
 
     # train model
     model.fit(X_train, fh=[1, 2, 3, 4])
@@ -110,7 +113,7 @@ def test_neural_forecast_with_non_default_loss(model_class) -> None:
 
     # define model
     model = model_class(
-        "A-DEC",
+        freq="A-DEC",
         loss=HuberQLoss(0.5),
         valid_loss=MASE(1),
         max_steps=5,
@@ -139,7 +142,7 @@ def test_neural_forecast_fail_with_multiple_predictions(model_class) -> None:
 
     # define model
     model = model_class(
-        "A-DEC",
+        freq="A-DEC",
         loss=MQLoss(quantiles=[0.25, 0.5, 0.75]),
         max_steps=5,
         trainer_kwargs={"logger": False},
@@ -153,3 +156,98 @@ def test_neural_forecast_fail_with_multiple_predictions(model_class) -> None:
         NotImplementedError, match="Multiple prediction columns are not supported."
     ):
         model.predict()
+
+
+@pytest.mark.parametrize("model_class", [NeuralForecastLSTM, NeuralForecastRNN])
+@pytest.mark.skipif(
+    not run_test_for_class([NeuralForecastLSTM, NeuralForecastRNN]),
+    reason="run test only if softdeps are present and incrementally (if requested)",
+)
+def test_neural_forecast_with_auto_freq(model_class) -> None:
+    """Test with freq set to 'auto'."""
+    # define model
+    model = model_class(freq="auto", max_steps=5, trainer_kwargs={"logger": False})
+
+    # train model
+    model.fit(y_train, fh=[1, 2, 3, 4])
+
+    # predict with trained model
+    y_pred = model.predict()
+
+    # check interpreted freq
+    assert y_pred.index.freq == "A-DEC"
+
+
+@pytest.mark.parametrize("model_class", [NeuralForecastLSTM, NeuralForecastRNN])
+@pytest.mark.parametrize(
+    "freq",
+    [
+        "B",
+        "D",
+        "W",
+        "M",
+        "Q",
+        "A",
+        "Y",
+        "H",
+        "T",
+        "min",
+        "S",
+        "L",
+        "ms",
+        "U",
+        "us",
+        "N",
+    ],
+)
+@pytest.mark.skipif(
+    not run_test_for_class([NeuralForecastLSTM, NeuralForecastRNN]),
+    reason="run test only if softdeps are present and incrementally (if requested)",
+)
+def test_neural_forecast_with_auto_against_given_freq(model_class, freq) -> None:
+    """Test NeuralForecastRNN with freq set to 'auto' on all freqs."""
+    # prepare data
+    y = pandas.Series(
+        data=range(10),
+        index=pandas.date_range(start="2024-01-01", periods=10, freq=freq),
+    )
+
+    # define model
+    model = model_class(freq="auto", max_steps=1, trainer_kwargs={"logger": False})
+
+    # attempt train
+    model.fit(y, fh=[1, 2, 3, 4])
+
+    # convert freq str to DateOffset object for comparison
+    offset_freq = pandas.tseries.frequencies.to_offset(freq)
+    offset_auto_freq = pandas.tseries.frequencies.to_offset(model._freq)
+
+    assert offset_freq == offset_auto_freq
+
+
+@pytest.mark.parametrize("model_class", [NeuralForecastLSTM, NeuralForecastRNN])
+@pytest.mark.skipif(
+    not run_test_for_class([NeuralForecastLSTM, NeuralForecastRNN]),
+    reason="run test only if softdeps are present and incrementally (if requested)",
+)
+def test_neural_forecast_fail_with_auto_freq_on_range_index(model_class) -> None:
+    """Test fail with freq set to 'auto' on pd.RangeIndex."""
+    # prepare data
+    y = pandas.Series(data=range(10), index=pandas.RangeIndex(start=0, stop=10))
+
+    # should fail to interpret auto freq
+    with pytest.raises(
+        ValueError,
+        match="could not interpret freq, try passing freq in model initialization",
+    ):
+        # define model
+        model = model_class(freq="auto", max_steps=5, trainer_kwargs={"logger": False})
+
+        # attempt train
+        model.fit(y, fh=[1, 2, 3, 4])
+
+    # should work with freq passed as param
+    model = model_class(freq="W", max_steps=5, trainer_kwargs={"logger": False})
+
+    # attempt train
+    model.fit(y, fh=[1, 2, 3, 4])