Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

&SmirnGregHM [BUG] ensure forecasting tuners do not vectorize over columns (variables) #5145

Merged
merged 12 commits into from Sep 2, 2023
9 changes: 8 additions & 1 deletion sktime/forecasting/model_selection/_tune.py
Expand Up @@ -69,7 +69,6 @@ def __init__(
"capability:pred_int",
"capability:pred_int:insample",
"capability:insample",
"scitype:y",
"ignores-exogeneous-X",
"handles-missing-data",
"y_inner_mtype",
Expand All @@ -91,6 +90,8 @@ def _extend_to_all_scitypes(self, tagname):

Mutates self tag with name `tagname`.
If no mtypes are present of a time series scitype, adds a pandas based one.
If only univariate pandas scitype is present for Series ("pd.Series"),
also adds the multivariate one ("pd.DataFrame").

Parameters
----------
Expand All @@ -104,10 +105,16 @@ def _extend_to_all_scitypes(self, tagname):
if not isinstance(tagval, list):
tagval = [tagval]
scitypes = mtype_to_scitype(tagval, return_unique=True)
# if no Series mtypes are present, add pd.DataFrame
if "Series" not in scitypes:
tagval = tagval + ["pd.DataFrame"]
# ensure we have a Series mtype capable of multivariate
elif "pd.Series" in tagval and "pd.DataFrame" not in tagval:
tagval = ["pd.DataFrame"] + tagval
# if no Panel mtypes are present, add pd.DataFrame based one
if "Panel" not in scitypes:
tagval = tagval + ["pd-multiindex"]
# if no Hierarchical mtypes are present, add pd.DataFrame based one
if "Hierarchical" not in scitypes:
tagval = tagval + ["pd_multiindex_hier"]
self.set_tags(**{tagname: tagval})
Expand Down
23 changes: 18 additions & 5 deletions sktime/forecasting/model_selection/tests/test_tune.py
Expand Up @@ -81,12 +81,13 @@ def _check_cv(forecaster, tuner, cv, param_grid, y, X, scoring):
assert param_grid[best_idx].items() <= fitted_params.items()


def _create_hierarchical_data():
def _create_hierarchical_data(n_columns=1):
y = _make_hierarchical(
random_state=TEST_RANDOM_SEEDS[0],
hierarchy_levels=(2, 2),
min_timepoints=15,
max_timepoints=15,
n_columns=n_columns,
)
X = _make_hierarchical(
random_state=TEST_RANDOM_SEEDS[1],
Expand All @@ -97,7 +98,10 @@ def _create_hierarchical_data():
return y, X


NAIVE = NaiveForecaster(strategy="mean")
# estimator fixtures used for tuning
# set_tags in NaiveForecaster ensures that it is univariate and broadcasts
# this is currently the case, but a future improved NaiveForecaster may reduce coverage
NAIVE = NaiveForecaster(strategy="mean").set_tags(**{"scitype:y": "univariate"})
NAIVE_GRID = {"window_length": TEST_WINDOW_LENGTHS_INT}
PIPE = TransformedTargetForecaster(
[
Expand Down Expand Up @@ -126,9 +130,15 @@ def _create_hierarchical_data():
@pytest.mark.parametrize("scoring", TEST_METRICS)
@pytest.mark.parametrize("cv", CVs)
@pytest.mark.parametrize("error_score", ERROR_SCORES)
def test_gscv(forecaster, param_grid, cv, scoring, error_score):
@pytest.mark.parametrize("multivariate", [True, False])
def test_gscv(forecaster, param_grid, cv, scoring, error_score, multivariate):
"""Test ForecastingGridSearchCV."""
y, X = load_longley()
fkiraly marked this conversation as resolved.
Show resolved Hide resolved
if multivariate:
X, y = load_longley()
else:
y, X = load_longley()

gscv = ForecastingGridSearchCV(
forecaster,
param_grid=param_grid,
Expand Down Expand Up @@ -177,6 +187,7 @@ def test_rscv(forecaster, param_grid, cv, scoring, error_score, n_iter, random_s
ParameterSampler(param_grid, n_iter, random_state=random_state)
)
_check_cv(forecaster, rscv, cv, param_distributions, y, X, scoring)
_check_fitted_params_keys(rscv.get_fitted_params())


@pytest.mark.skipif(
Expand All @@ -189,9 +200,10 @@ def test_rscv(forecaster, param_grid, cv, scoring, error_score, n_iter, random_s
@pytest.mark.parametrize("scoring", TEST_METRICS)
@pytest.mark.parametrize("cv", CVs)
@pytest.mark.parametrize("error_score", ERROR_SCORES)
def test_gscv_hierarchical(forecaster, param_grid, cv, scoring, error_score):
@pytest.mark.parametrize("n_cols", [1, 2])
def test_gscv_hierarchical(forecaster, param_grid, cv, scoring, error_score, n_cols):
"""Test ForecastingGridSearchCV."""
y, X = _create_hierarchical_data()
y, X = _create_hierarchical_data(n_columns=n_cols)
gscv = ForecastingGridSearchCV(
forecaster,
param_grid=param_grid,
Expand All @@ -203,6 +215,7 @@ def test_gscv_hierarchical(forecaster, param_grid, cv, scoring, error_score):

param_grid = ParameterGrid(param_grid)
_check_cv(forecaster, gscv, cv, param_grid, y, X, scoring)
_check_fitted_params_keys(gscv.get_fitted_params())


@pytest.mark.skipif(
Expand Down