Skip to content

Commit

Permalink
Evaluate (example and fix) (#690)
Browse files Browse the repository at this point in the history
* Added Imputer class

* Merge conflict solved

* Evaluate forecaster function added

* Evaluate forecaster function added

* Doctstring

* Added tqdm dependency

* Added hard dependency

* Added new submodule. Changed initial_window fh in splitters

* Bug fix for split_initial. Added markers argument in plot_series

* Bug fix split_initial in evaluate

* Added scoring check. Added unittest

* Plot fix

* Added example in notebook. Fix tqdm progress

* Removed tqdm dependency. Refactored cv classes

Co-authored-by: Walter <walmar2@emea.corpdir.net>
  • Loading branch information
Martin Walter and Walter committed Feb 25, 2021
1 parent 704859b commit 9a9b732
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 140 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Expand Up @@ -65,7 +65,7 @@ install:

# Install requirements from inside conda environment
- cmd: activate testenv
- cmd: conda install -c conda-forge "matplotlib>=3.3.2" "seaborn>=0.11.0" "tsfresh>=0.17.0" "hcrystalball>=0.1.9" "stumpy>=1.5.1" "fbprophet>=0.7.1" "numba>=0.50,<0.53" "numpy>=1.19" "pandas>=1.1.0,<1.2" "statsmodels>=0.12.1" "scikit-learn>=0.23.0" "scikit-posthocs>=0.6.5" pystan tqdm
- cmd: conda install -c conda-forge "matplotlib>=3.3.2" "seaborn>=0.11.0" "tsfresh>=0.17.0" "hcrystalball>=0.1.9" "stumpy>=1.5.1" "fbprophet>=0.7.1" "numba>=0.50,<0.53" "numpy>=1.19" "pandas>=1.1.0,<1.2" "statsmodels>=0.12.1" "scikit-learn>=0.23.0" "scikit-posthocs>=0.6.5" pystan
- cmd: pip install -r %REQUIREMENTS%

# List installed environment
Expand Down
1 change: 0 additions & 1 deletion .binder/requirements.txt
Expand Up @@ -14,5 +14,4 @@ seaborn
statsmodels>=0.12.1
stumpy>=1.5.1
tbats>=1.1.0
tqdm
tsfresh>=0.17.0
1 change: 0 additions & 1 deletion build_tools/hard_dependencies.txt
Expand Up @@ -5,4 +5,3 @@ pandas==1.1.5
scikit-learn==0.23.*
statsmodels>=0.12.1
wheel
tqdm
1 change: 0 additions & 1 deletion build_tools/requirements.txt
Expand Up @@ -13,6 +13,5 @@ scikit-posthocs
statsmodels>=0.12.1
stumpy>=1.5.1
tbats>=1.1.0
tqdm
tsfresh>=0.17.0
wheel
161 changes: 121 additions & 40 deletions examples/01_forecasting.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion setup.py
Expand Up @@ -26,7 +26,6 @@
"scikit-learn": "0.23.0",
"statsmodels": "0.12.1",
"numba": "0.50",
"tqdm": "4.10.0",
}
EXTRAS_REQUIRE = {
"all_extras": [
Expand Down
4 changes: 1 addition & 3 deletions sktime/forecasting/model_evaluation/_functions.py
Expand Up @@ -2,7 +2,6 @@
import numpy as np
import pandas as pd
import time
from tqdm.auto import tqdm
from sktime.utils.validation.forecasting import check_y
from sktime.utils.validation.forecasting import check_cv
from sktime.forecasting.base import ForecastingHorizon
Expand Down Expand Up @@ -66,11 +65,10 @@ def evaluate(
_check_strategies(strategy)
scoring = check_scoring(scoring)

n_splits = cv.get_n_splits(y)
results = pd.DataFrame()
cv.start_with_window = True

for i, (train, test) in enumerate(tqdm(cv.split(y), total=n_splits)):
for i, (train, test) in enumerate(cv.split(y)):
# get initial window, if required
if i == 0 and cv.initial_window and strategy == "update":
train, test = cv.split_initial(y)
Expand Down
138 changes: 46 additions & 92 deletions sktime/forecasting/model_selection/_split.py
Expand Up @@ -184,6 +184,52 @@ def split_initial(self, y):
initial_test_window = np.arange(initial, len(y))
return initial_training_window, initial_test_window

def _get_start(self):
window_length = check_window_length(self.window_length)
if self.start_with_window:
return window_length
else:
return 0

def get_n_splits(self, y=None):
"""Return number of splits
Parameters
----------
y : pd.Series or pd.Index, optional (default=None)
Returns
-------
n_splits : int
"""
if y is None:
raise ValueError(
f"{self.__class__.__name__} requires `y` to compute the "
f"number of splits."
)
return len(self.get_cutoffs(y))

def get_cutoffs(self, y=None):
"""Get the cutoff time points.
Parameters
----------
y : pd.Series or pd.Index, optional (default=None)
Returns
-------
cutoffs : np.array
"""
if y is None:
raise ValueError(
f"{self.__class__.__name__} requires `y` to compute the " f"cutoffs."
)
y = self._check_y(y)
end = self._get_end(y)
start = self._get_start()
step_length = check_step_length(self.step_length)
return np.arange(start, end, step_length) - 1


class SlidingWindowSplitter(BaseWindowSplitter):
"""Sliding window splitter
Expand Down Expand Up @@ -240,52 +286,6 @@ def _split_windows(self, y):
test_window = split_point + fh - 1
yield training_window, test_window

def get_n_splits(self, y=None):
"""Return number of splits
Parameters
----------
y : pd.Series or pd.Index, optional (default=None)
Returns
-------
n_splits : int
"""
if y is None:
raise ValueError(
f"{self.__class__.__name__} requires `y` to compute the "
f"number of splits."
)
return len(self.get_cutoffs(y))

def get_cutoffs(self, y=None):
"""Get the cutoff time points.
Parameters
----------
y : pd.Series or pd.Index, optional (default=None)
Returns
-------
cutoffs : np.array
"""
if y is None:
raise ValueError(
f"{self.__class__.__name__} requires `y` to compute the " f"cutoffs."
)
y = self._check_y(y)
end = self._get_end(y)
start = self._get_start()
step_length = check_step_length(self.step_length)
return np.arange(start, end, step_length) - 1

def _get_start(self):
window_length = check_window_length(self.window_length)
if self.start_with_window:
return window_length
else:
return 0


class ExpandingWindowSplitter(BaseWindowSplitter):
"""Expanding window splitter
Expand Down Expand Up @@ -345,52 +345,6 @@ def _split_windows(self, y):
test_window = split_point + fh - 1
yield training_window, test_window

def get_n_splits(self, y=None):
"""Return number of splits
Parameters
----------
y : pd.Series or pd.Index, optional (default=None)
Returns
-------
n_splits : int
"""
if y is None:
raise ValueError(
f"{self.__class__.__name__} requires `y` to compute the "
f"number of splits."
)
return len(self.get_cutoffs(y))

def get_cutoffs(self, y=None):
"""Get the cutoff time points.
Parameters
----------
y : pd.Series or pd.Index, optional (default=None)
Returns
-------
cutoffs : np.array
"""
if y is None:
raise ValueError(
f"{self.__class__.__name__} requires `y` to compute the " f"cutoffs."
)
y = self._check_y(y)
end = self._get_end(y)
start = self._get_start()
step_length = check_step_length(self.step_length)
return np.arange(start, end, step_length) - 1

def _get_start(self):
window_length = check_window_length(self.window_length)
if self.start_with_window:
return window_length
else:
return 0


class SingleWindowSplitter(BaseWindowSplitter):
"""Single window splitter
Expand Down

0 comments on commit 9a9b732

Please sign in to comment.