Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add expanding window splitter #627

Merged
merged 8 commits into from Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions .all-contributorsrc
Expand Up @@ -774,6 +774,16 @@
"bug"
]
},
{
"login": "koralturkk",
"name": "Kutay Koralturk",
"avatar_url": "https://avatars2.githubusercontent.com/u/18037789?s=460&v=4",
"profile": "https://github.com/koralturkk",
"contributions": [
"code",
"bug"
]
},
{
"login": "vnmabus",
"name": "Carlos Ramos Carreño",
Expand Down
1 change: 1 addition & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Expand Up @@ -21,6 +21,7 @@ sktime/forecasting/fbprophet @aiwalter
sktime/forecasting/bats @aiwalter
sktime/forecasting/tbats @aiwalter
sktime/forecasting/arima @HYang1996
sktime/forecasting/model_selection/_split @koralturkk

sktime/forecasting/online_learning/ @magittan

Expand Down
324 changes: 242 additions & 82 deletions examples/window_splitters.ipynb

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions sktime/forecasting/model_selection/__init__.py
@@ -1,16 +1,18 @@
#!/usr/bin/env python3 -u
# coding: utf-8
# -*- coding: utf-8 -*-
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)

__author__ = ["Markus Löning"]
__author__ = ["Markus Löning", "Kutay Koralturk"]
__all__ = [
"CutoffSplitter",
"SingleWindowSplitter",
"SlidingWindowSplitter",
"temporal_train_test_split",
"ForecastingGridSearchCV"
"ExpandingWindowSplitter",
"ForecastingGridSearchCV",
]

from sktime.forecasting.model_selection._split import ExpandingWindowSplitter
from sktime.forecasting.model_selection._split import CutoffSplitter
from sktime.forecasting.model_selection._split import SingleWindowSplitter
from sktime.forecasting.model_selection._split import SlidingWindowSplitter
Expand Down
137 changes: 135 additions & 2 deletions sktime/forecasting/model_selection/_split.py
Expand Up @@ -3,15 +3,17 @@
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)

__all__ = [
"ExpandingWindowSplitter",
"SlidingWindowSplitter",
"CutoffSplitter",
"SingleWindowSplitter",
"temporal_train_test_split",
]
__author__ = ["Markus Löning"]
__author__ = ["Markus Löning, Kutay Koralturk"]

import numpy as np
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split as _train_test_split

from sktime.utils.validation import check_window_length
Expand Down Expand Up @@ -290,6 +292,137 @@ def _get_start(self):
return 0


class ExpandingWindowSplitter(BaseWindowSplitter):
"""Expanding window splitter

Parameters
----------
fh : int, list or np.array
Forecasting horizon
window_length : int
step_length : int
initial_window : int
start_with_window : bool, optional (default=False)

Examples
--------
For example for `window_length = 5`, `step_length = 1` and `fh = 3`
here is a representation of the folds::

|-----------------------|
| * * * * * x x x - - - |
| * * * * * * x x x - - |
| * * * * * * * x x x - |
| * * * * * * * * x x x |


``*`` = training fold.

``x`` = test fold.
"""

def __init__(
self,
fh=DEFAULT_FH,
window_length=DEFAULT_WINDOW_LENGTH,
step_length=DEFAULT_STEP_LENGTH,
initial_window=None,
start_with_window=False,
):

self.step_length = step_length
self.start_with_window = start_with_window
self.initial_window = initial_window
super(ExpandingWindowSplitter, self).__init__(
fh=fh, window_length=window_length
)

def _split_windows(self, y):
step_length = check_step_length(self.step_length)
window_length = check_window_length(self.window_length)
fh = self._check_fh()

end = self._get_end(y)
start = self._get_start()
fixed_start = start
for split_point in range(start, end, step_length):
training_window = np.arange(fixed_start - window_length, split_point)
test_window = split_point + fh - 1
yield training_window, test_window

def split_initial(self, y):
"""Split initial window

This is useful during forecasting model selection where we want to
fit the forecaster on some part of the
data first before doing temporal cross-validation

Parameters
----------
y : pd.Series

Returns
-------
intial_training_window : np.array
initial_test_window : np.array
"""
if self.initial_window is None:
raise ValueError(
"Please specify initial window, found: `initial_window`=None"
)

initial = check_window_length(self.initial_window)
initial_training_window = np.arange(initial)
initial_test_window = np.arange(initial, len(y))
return initial_training_window, initial_test_window

def get_n_splits(self, y=None):
"""Return number of splits

Parameters
----------
y : pd.Series or pd.Index, optional (default=None)

Returns
-------
n_splits : int
"""
if y is None:
raise ValueError(
f"{self.__class__.__name__} requires `y` to compute the "
f"number of splits."
)
return len(self.get_cutoffs(y))

def get_cutoffs(self, y=None):
"""Get the cutoff time points.

Parameters
----------
y : pd.Series or pd.Index, optional (default=None)

Returns
-------
cutoffs : np.array
"""
if y is None:
raise ValueError(
f"{self.__class__.__name__} requires `y` to compute the " f"cutoffs."
)
y = self._check_y(y)
end = self._get_end(y)
start = self._get_start()
step_length = check_step_length(self.step_length)
return np.arange(start, end, step_length) - 1

def _get_start(self):
window_length = check_window_length(self.window_length)
if self.start_with_window:
return window_length
else:
return 0


class SingleWindowSplitter(BaseWindowSplitter):
"""Single window splitter

Expand Down
79 changes: 78 additions & 1 deletion sktime/forecasting/model_selection/tests/test_split.py
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)

__author__ = ["Markus Löning"]
__author__ = ["Markus Löning", "Kutay Koralturk"]

import numpy as np
import pandas as pd
Expand All @@ -11,6 +11,7 @@
from sktime.forecasting.model_selection import CutoffSplitter
from sktime.forecasting.model_selection import SingleWindowSplitter
from sktime.forecasting.model_selection import SlidingWindowSplitter
from sktime.forecasting.model_selection import ExpandingWindowSplitter
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.tests._config import TEST_FHS
from sktime.forecasting.tests._config import TEST_OOS_FHS
Expand Down Expand Up @@ -246,6 +247,82 @@ def test_sliding_window_split_start_with_fh(y, fh, window_length, step_length):
check_test_windows(test_windows, fh, cutoffs)


@pytest.mark.parametrize("y", TEST_YS)
@pytest.mark.parametrize("fh", TEST_FHS)
@pytest.mark.parametrize("window_length", TEST_WINDOW_LENGTHS)
@pytest.mark.parametrize("step_length", TEST_STEP_LENGTHS)
def test_expanding_window_split_start_with_fh(y, fh, window_length, step_length):

cv = ExpandingWindowSplitter(
fh=fh,
window_length=window_length,
step_length=step_length,
start_with_window=False,
)

# generate and keep splits
training_windows, test_windows, n_splits, cutoffs = generate_and_check_windows(
y, cv
)

# check first windows
assert len(training_windows[0]) == 0
assert len(training_windows[1]) <= max(step_length, window_length)

# check training windows
n_incomplete_windows = np.int(np.ceil(window_length / step_length))
assert n_incomplete_windows == get_n_incomplete_windows(
training_windows, window_length
)

# check incomplete windows
if n_incomplete_windows > 1:
incomplete_windows = training_windows[:n_incomplete_windows]
check_incomplete_windows_dimensions(
incomplete_windows, n_incomplete_windows, window_length
)
# check test windows
check_test_windows(test_windows, fh, cutoffs)


@pytest.mark.parametrize("y", TEST_YS)
@pytest.mark.parametrize("fh", TEST_FHS)
@pytest.mark.parametrize("window_length", TEST_WINDOW_LENGTHS)
@pytest.mark.parametrize("step_length", TEST_STEP_LENGTHS)
def test_expanding_window_split_start_with_window(y, fh, window_length, step_length):
# initiate rolling window cv iterator
cv = ExpandingWindowSplitter(
fh=fh,
window_length=window_length,
step_length=step_length,
start_with_window=True,
)

# generate and keep splits
training_windows, test_windows, n_splits, cutoffs = generate_and_check_windows(
y, cv
)

# check against cutoffs
last_elements = np.array([window[-1:][-1] for window in training_windows])
np.testing.assert_array_equal(cutoffs, last_elements)

# check for window lenghts
for i in range(n_splits):
assert len(training_windows[i]) == window_length + step_length * i

# check values of first window
np.testing.assert_array_equal(training_windows[0], np.arange(window_length))

# last_elements = np.array([window[-1:][-1] for window in training_windows])
# check against step length
remainders = last_elements % step_length
assert min(remainders) == max(remainders)

# check test windows
check_test_windows(test_windows, fh, cutoffs)


@pytest.mark.parametrize(
"index_type, fh_type, is_relative", VALID_INDEX_FH_COMBINATIONS
)
Expand Down