sktime · mloning · Jan 29, 2021 · Jan 19, 2021 · Jan 19, 2021 · Jan 19, 2021
diff --git a/.all-contributorsrc b/.all-contributorsrc
@@ -774,6 +774,16 @@
         "bug"
       ]
     },
+    {
+      "login": "koralturkk",
+      "name": "Kutay Koralturk",
+      "avatar_url": "https://avatars2.githubusercontent.com/u/18037789?s=460&v=4",
+      "profile": "https://github.com/koralturkk",
+      "contributions": [
+        "code",
+        "bug"
+      ]
+    },
     {
       "login": "vnmabus",
       "name": "Carlos Ramos Carreño",

diff --git a/CODEOWNERS b/CODEOWNERS
@@ -21,6 +21,7 @@ sktime/forecasting/fbprophet @aiwalter
 sktime/forecasting/bats @aiwalter
 sktime/forecasting/tbats @aiwalter
 sktime/forecasting/arima @HYang1996
+sktime/forecasting/model_selection/_split @koralturkk
 
 sktime/forecasting/online_learning/ @magittan
 

diff --git a/examples/window_splitters.ipynb b/examples/window_splitters.ipynb
diff --git a/sktime/forecasting/model_selection/__init__.py b/sktime/forecasting/model_selection/__init__.py
@@ -1,16 +1,18 @@
 #!/usr/bin/env python3 -u
-# coding: utf-8
+# -*- coding: utf-8 -*-
 # copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
 
-__author__ = ["Markus Löning"]
+__author__ = ["Markus Löning", "Kutay Koralturk"]
 __all__ = [
     "CutoffSplitter",
     "SingleWindowSplitter",
     "SlidingWindowSplitter",
     "temporal_train_test_split",
-    "ForecastingGridSearchCV"
+    "ExpandingWindowSplitter",
+    "ForecastingGridSearchCV",
 ]
 
+from sktime.forecasting.model_selection._split import ExpandingWindowSplitter
 from sktime.forecasting.model_selection._split import CutoffSplitter
 from sktime.forecasting.model_selection._split import SingleWindowSplitter
 from sktime.forecasting.model_selection._split import SlidingWindowSplitter

diff --git a/sktime/forecasting/model_selection/_split.py b/sktime/forecasting/model_selection/_split.py
@@ -3,15 +3,17 @@
 # copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
 
 __all__ = [
+    "ExpandingWindowSplitter",
     "SlidingWindowSplitter",
     "CutoffSplitter",
     "SingleWindowSplitter",
     "temporal_train_test_split",
 ]
-__author__ = ["Markus Löning"]
+__author__ = ["Markus Löning, Kutay Koralturk"]
 
-import numpy as np
 import pandas as pd
+import numpy as np
+
 from sklearn.model_selection import train_test_split as _train_test_split
 
 from sktime.utils.validation import check_window_length
@@ -290,6 +292,137 @@ def _get_start(self):
             return 0
 
 
+class ExpandingWindowSplitter(BaseWindowSplitter):
+    """Expanding window splitter
+
+    Parameters
+    ----------
+    fh : int, list or np.array
+        Forecasting horizon
+    window_length : int
+    step_length : int
+    initial_window : int
+    start_with_window : bool, optional (default=False)
+
+    Examples
+    --------
+    For example for `window_length = 5`, `step_length = 1` and `fh = 3`
+    here is a representation of the folds::
+
+    |-----------------------|
+    | * * * * * x x x - - - |
+    | * * * * * * x x x - - |
+    | * * * * * * * x x x - |
+    | * * * * * * * * x x x |
+
+
+    ``*`` = training fold.
+
+    ``x`` = test fold.
+    """
+
+    def __init__(
+        self,
+        fh=DEFAULT_FH,
+        window_length=DEFAULT_WINDOW_LENGTH,
+        step_length=DEFAULT_STEP_LENGTH,
+        initial_window=None,
+        start_with_window=False,
+    ):
+
+        self.step_length = step_length
+        self.start_with_window = start_with_window
+        self.initial_window = initial_window
+        super(ExpandingWindowSplitter, self).__init__(
+            fh=fh, window_length=window_length
+        )
+
+    def _split_windows(self, y):
+        step_length = check_step_length(self.step_length)
+        window_length = check_window_length(self.window_length)
+        fh = self._check_fh()
+
+        end = self._get_end(y)
+        start = self._get_start()
+        fixed_start = start
+        for split_point in range(start, end, step_length):
+            training_window = np.arange(fixed_start - window_length, split_point)
+            test_window = split_point + fh - 1
+            yield training_window, test_window
+
+    def split_initial(self, y):
+        """Split initial window
+
+        This is useful during forecasting model selection where we want to
+        fit the forecaster on some part of the
+        data first before doing temporal cross-validation
+
+        Parameters
+        ----------
+        y : pd.Series
+
+        Returns
+        -------
+        intial_training_window : np.array
+        initial_test_window : np.array
+        """
+        if self.initial_window is None:
+            raise ValueError(
+                "Please specify initial window, found: `initial_window`=None"
+            )
+
+        initial = check_window_length(self.initial_window)
+        initial_training_window = np.arange(initial)
+        initial_test_window = np.arange(initial, len(y))
+        return initial_training_window, initial_test_window
+
+    def get_n_splits(self, y=None):
+        """Return number of splits
+
+        Parameters
+        ----------
+        y : pd.Series or pd.Index, optional (default=None)
+
+        Returns
+        -------
+        n_splits : int
+        """
+        if y is None:
+            raise ValueError(
+                f"{self.__class__.__name__} requires `y` to compute the "
+                f"number of splits."
+            )
+        return len(self.get_cutoffs(y))
+
+    def get_cutoffs(self, y=None):
+        """Get the cutoff time points.
+
+        Parameters
+        ----------
+        y : pd.Series or pd.Index, optional (default=None)
+
+        Returns
+        -------
+        cutoffs : np.array
+        """
+        if y is None:
+            raise ValueError(
+                f"{self.__class__.__name__} requires `y` to compute the " f"cutoffs."
+            )
+        y = self._check_y(y)
+        end = self._get_end(y)
+        start = self._get_start()
+        step_length = check_step_length(self.step_length)
+        return np.arange(start, end, step_length) - 1
+
+    def _get_start(self):
+        window_length = check_window_length(self.window_length)
+        if self.start_with_window:
+            return window_length
+        else:
+            return 0
+
+
 class SingleWindowSplitter(BaseWindowSplitter):
     """Single window splitter
 

diff --git a/sktime/forecasting/model_selection/tests/test_split.py b/sktime/forecasting/model_selection/tests/test_split.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 # copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
 
-__author__ = ["Markus Löning"]
+__author__ = ["Markus Löning", "Kutay Koralturk"]
 
 import numpy as np
 import pandas as pd
@@ -11,6 +11,7 @@
 from sktime.forecasting.model_selection import CutoffSplitter
 from sktime.forecasting.model_selection import SingleWindowSplitter
 from sktime.forecasting.model_selection import SlidingWindowSplitter
+from sktime.forecasting.model_selection import ExpandingWindowSplitter
 from sktime.forecasting.model_selection import temporal_train_test_split
 from sktime.forecasting.tests._config import TEST_FHS
 from sktime.forecasting.tests._config import TEST_OOS_FHS
@@ -246,6 +247,82 @@ def test_sliding_window_split_start_with_fh(y, fh, window_length, step_length):
     check_test_windows(test_windows, fh, cutoffs)
 
 
+@pytest.mark.parametrize("y", TEST_YS)
+@pytest.mark.parametrize("fh", TEST_FHS)
+@pytest.mark.parametrize("window_length", TEST_WINDOW_LENGTHS)
+@pytest.mark.parametrize("step_length", TEST_STEP_LENGTHS)
+def test_expanding_window_split_start_with_fh(y, fh, window_length, step_length):
+
+    cv = ExpandingWindowSplitter(
+        fh=fh,
+        window_length=window_length,
+        step_length=step_length,
+        start_with_window=False,
+    )
+
+    # generate and keep splits
+    training_windows, test_windows, n_splits, cutoffs = generate_and_check_windows(
+        y, cv
+    )
+
+    # check first windows
+    assert len(training_windows[0]) == 0
+    assert len(training_windows[1]) <= max(step_length, window_length)
+
+    # check training windows
+    n_incomplete_windows = np.int(np.ceil(window_length / step_length))
+    assert n_incomplete_windows == get_n_incomplete_windows(
+        training_windows, window_length
+    )
+
+    # check incomplete windows
+    if n_incomplete_windows > 1:
+        incomplete_windows = training_windows[:n_incomplete_windows]
+        check_incomplete_windows_dimensions(
+            incomplete_windows, n_incomplete_windows, window_length
+        )
+    # check test windows
+    check_test_windows(test_windows, fh, cutoffs)
+
+
+@pytest.mark.parametrize("y", TEST_YS)
+@pytest.mark.parametrize("fh", TEST_FHS)
+@pytest.mark.parametrize("window_length", TEST_WINDOW_LENGTHS)
+@pytest.mark.parametrize("step_length", TEST_STEP_LENGTHS)
+def test_expanding_window_split_start_with_window(y, fh, window_length, step_length):
+    # initiate rolling window cv iterator
+    cv = ExpandingWindowSplitter(
+        fh=fh,
+        window_length=window_length,
+        step_length=step_length,
+        start_with_window=True,
+    )
+
+    # generate and keep splits
+    training_windows, test_windows, n_splits, cutoffs = generate_and_check_windows(
+        y, cv
+    )
+
+    # check against cutoffs
+    last_elements = np.array([window[-1:][-1] for window in training_windows])
+    np.testing.assert_array_equal(cutoffs, last_elements)
+
+    # check for window lenghts
+    for i in range(n_splits):
+        assert len(training_windows[i]) == window_length + step_length * i
+
+    # check values of first window
+    np.testing.assert_array_equal(training_windows[0], np.arange(window_length))
+
+    # last_elements = np.array([window[-1:][-1] for window in training_windows])
+    # check against step length
+    remainders = last_elements % step_length
+    assert min(remainders) == max(remainders)
+
+    # check test windows
+    check_test_windows(test_windows, fh, cutoffs)
+
+
 @pytest.mark.parametrize(
     "index_type, fh_type, is_relative", VALID_INDEX_FH_COMBINATIONS
 )