Skip to content

Commit

Permalink
[MNT] differential testing for performance_metrics module (#6616)
Browse files Browse the repository at this point in the history
This PR adds differential testing for `performance_metrics` module.

Runs tests in the module iff any file within is changed.

Relies on #6617 to avoid merge
conflicts.
  • Loading branch information
fkiraly committed Jun 20, 2024
1 parent 6438c61 commit 283c2d0
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@

__all__ = [
"_BaseProbaForecastingErrorMetric",
"PinballLoss",
"EmpiricalCoverage",
"ConstraintViolation",
"CRPS",
"AUCalibration",
"ConstraintViolation",
"EmpiricalCoverage",
"IntervalWidth",
"LogLoss",
"PinballLoss",
"SquaredDistrLoss",
"AUCalibration",
]

from sktime.performance_metrics.forecasting.probabilistic._classes import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
)
from sktime.proba.normal import Normal
from sktime.proba.tfp import TFNormal
from sktime.tests.test_switch import run_test_module_changed
from sktime.utils.dependencies import _check_soft_dependencies

warnings.filterwarnings("ignore", category=FutureWarning)
Expand All @@ -24,6 +25,10 @@
normal_dists = [Normal]


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("normal", normal_dists)
@pytest.mark.parametrize("metric", DISTR_METRICS)
@pytest.mark.parametrize("multivariate", [True, False])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
PinballLoss,
)
from sktime.split import temporal_train_test_split
from sktime.tests.test_switch import run_test_module_changed
from sktime.utils._testing.series import _make_series

warnings.filterwarnings("ignore", category=FutureWarning)
Expand Down Expand Up @@ -76,6 +77,10 @@ def sample_data(request):


# Test the parametrized fixture
@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize(
"sample_data",
[
Expand Down Expand Up @@ -159,6 +164,10 @@ def helper_check_output(metric, score_average, multioutput, sample_data):
assert len(eval_loss) == no_vars


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize(
"sample_data",
[
Expand All @@ -176,6 +185,10 @@ def test_output_quantiles(metric, score_average, multioutput, sample_data):
helper_check_output(metric, score_average, multioutput, sample_data)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize(
"sample_data",
[
Expand All @@ -193,6 +206,10 @@ def test_output_intervals(metric, score_average, multioutput, sample_data):
helper_check_output(metric, score_average, multioutput, sample_data)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric", quantile_metrics)
@pytest.mark.parametrize(
"sample_data",
Expand Down Expand Up @@ -223,6 +240,10 @@ def test_evaluate_alpha_positive(metric, sample_data):


# This test tests quantile data
@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize(
"sample_data",
[
Expand All @@ -243,6 +264,10 @@ def test_evaluate_alpha_negative(metric, sample_data):
res = Loss(y_true=y_true, y_pred=y_pred) # noqa


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric", all_metrics)
@pytest.mark.parametrize("score_average", [True, False])
def test_multioutput_weighted(metric, score_average):
Expand Down
31 changes: 31 additions & 0 deletions sktime/performance_metrics/forecasting/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,15 @@

import numpy as np
import pandas as pd
import pytest

from sktime.tests.test_switch import run_test_module_changed


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
def test_gmse_class():
"""Doctest from GeometricMeanSquaredError."""
from sktime.performance_metrics.forecasting import GeometricMeanSquaredError
Expand Down Expand Up @@ -35,6 +42,10 @@ def test_gmse_class():
assert np.allclose(rgmse(y_true, y_pred), 0.7000014418652152)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
def test_gmse_function():
"""Doctest from geometric_mean_squared_error."""
from sktime.performance_metrics.forecasting import geometric_mean_squared_error
Expand Down Expand Up @@ -70,6 +81,10 @@ def test_gmse_function():
)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
def test_linex_class():
"""Doctest from MeanLinexError."""
from sktime.performance_metrics.forecasting import MeanLinexError
Expand All @@ -94,6 +109,10 @@ def test_linex_class():
assert np.allclose(linex_error(y_true, y_pred), 0.30917568000716666)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
def test_linex_function():
"""Doctest from mean_linex_error."""
from sktime.performance_metrics.forecasting import mean_linex_error
Expand All @@ -116,6 +135,10 @@ def test_linex_function():
)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
def test_make_scorer():
"""Test make_forecasting_scorer and the failure case in #4827."""
import functools
Expand All @@ -131,6 +154,10 @@ def test_make_scorer():
scorer.evaluate(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]))


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
def test_make_scorer_sklearn():
"""Test make_forecasting_scorer and the failure case in #5715.
Expand All @@ -146,6 +173,10 @@ def test_make_scorer_sklearn():
scorer.evaluate(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]))


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
def test_metric_coercion_bug():
"""Tests for sensible output when using hierarchical arg with non-hierarchical data.
Expand Down
29 changes: 29 additions & 0 deletions sktime/performance_metrics/tests/test_metrics_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
_classes,
make_forecasting_scorer,
)
from sktime.tests.test_switch import run_test_module_changed
from sktime.utils._testing.hierarchical import _make_hierarchical
from sktime.utils._testing.panel import _make_panel
from sktime.utils._testing.series import _make_series
Expand All @@ -29,6 +30,10 @@
BACKENDS = _get_parallel_test_fixtures("config")


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("n_columns", [1, 2])
@pytest.mark.parametrize("multioutput", MULTIOUTPUT)
@pytest.mark.parametrize("metric", metrics, ids=names)
Expand Down Expand Up @@ -80,6 +85,10 @@ def test_metric_output_direct(metric, multioutput, n_columns):
assert np.allclose(res[1], res[2])


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("backend", BACKENDS)
@pytest.mark.parametrize("n_columns", [1, 2])
@pytest.mark.parametrize(
Expand Down Expand Up @@ -126,6 +135,10 @@ def test_metric_hierarchical(multioutput, multilevel, n_columns, backend):
assert len(res) == len(y_true.columns)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("greater_is_better", [True, False])
def test_custom_metric(greater_is_better):
"""Test custom metric constructor, integration _DynamicForecastingErrorMetric."""
Expand Down Expand Up @@ -154,6 +167,10 @@ def custom_mape(y_true, y_pred) -> float:
check_estimator(fc_scorer, raise_exceptions=True)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("n_columns", [1, 2])
@pytest.mark.parametrize("multioutput", MULTIOUTPUT)
@pytest.mark.parametrize("metric", metrics, ids=names)
Expand Down Expand Up @@ -189,6 +206,10 @@ def test_metric_output_by_instance(metric, multioutput, n_columns):
assert (res.index == y_true.index).all()


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("backend", BACKENDS)
@pytest.mark.parametrize("n_columns", [1, 2])
@pytest.mark.parametrize("multilevel", ["uniform_average", "raw_values"])
Expand Down Expand Up @@ -230,6 +251,10 @@ def test_metric_hierarchical_by_index(multioutput, multilevel, n_columns, backen
assert set(expected_index) == set(found_index)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric", metrics, ids=names)
def test_uniform_average_time(metric):
"""Tests that uniform_average_time indeed ignores index."""
Expand Down Expand Up @@ -258,6 +283,10 @@ def test_uniform_average_time(metric):
assert np.allclose(res, res_noix)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric", metrics, ids=names)
def test_metric_weights(metric):
"""Test that weights are correctly applied to the metric."""
Expand Down
5 changes: 5 additions & 0 deletions sktime/performance_metrics/tests/test_numpy_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest

from sktime.performance_metrics.forecasting import _functions
from sktime.tests.test_switch import run_test_module_changed
from sktime.utils._testing.series import _make_series

numpy_metrics = getmembers(_functions, isfunction)
Expand All @@ -18,6 +19,10 @@
MULTIOUTPUT = ["uniform_average", "raw_values", "numpy"]


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("n_columns", [1, 2])
@pytest.mark.parametrize("multioutput", MULTIOUTPUT)
@pytest.mark.parametrize("metric", metrics, ids=names)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
relative_loss,
)
from sktime.performance_metrics.tests._config import RANDOM_SEED
from sktime.tests.test_switch import run_test_module_changed
from sktime.utils._testing.series import _make_series

# For multiple comparisons of equality between functions and classes
Expand Down Expand Up @@ -388,6 +389,10 @@ def _call_metrics(metric_func, metric_class, y_true, y_pred, y_train, y_pred_ben
return function_metric, class_metric


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric_func_name", LOSS_RESULTS.keys())
@pytest.mark.parametrize("n_test_case", [1, 2, 3])
def test_univariate_loss_expected_zero(n_test_case, metric_func_name):
Expand Down Expand Up @@ -423,6 +428,10 @@ def test_univariate_loss_expected_zero(n_test_case, metric_func_name):
)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric_func_name", LOSS_RESULTS.keys())
@pytest.mark.parametrize("n_test_case", [1, 2, 3])
def test_univariate_loss_against_expected_value(n_test_case, metric_func_name):
Expand Down Expand Up @@ -459,6 +468,10 @@ def test_univariate_loss_against_expected_value(n_test_case, metric_func_name):
)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric_func_name", LOSS_RESULTS.keys())
@pytest.mark.parametrize("random_state", RANDOM_STATES)
def test_univariate_metric_function_class_equality(metric_func_name, random_state):
Expand All @@ -485,6 +498,10 @@ def test_univariate_metric_function_class_equality(metric_func_name, random_stat
)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("random_state", RANDOM_STATES)
@pytest.mark.parametrize("metric_func_name", LOSS_RESULTS.keys())
def test_univariate_function_output_type(metric_func_name, random_state):
Expand All @@ -506,6 +523,10 @@ def test_univariate_function_output_type(metric_func_name, random_state):
)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric_func_name", LOSS_RESULTS.keys())
def test_y_true_y_pred_inconsistent_n_outputs_raises_error(metric_func_name):
"""Error should be raised when y_true and y_pred have different number of output."""
Expand All @@ -525,6 +546,10 @@ def test_y_true_y_pred_inconsistent_n_outputs_raises_error(metric_func_name):
metric_func(y_true, y_pred, y_train=y_train, y_pred_benchmark=y_pred_benchmark)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric_func_name", LOSS_RESULTS.keys())
def test_y_true_y_pred_inconsistent_n_timepoints_raises_error(metric_func_name):
"""Error should be raised if input variables have inconsistent number of samples."""
Expand All @@ -541,6 +566,10 @@ def test_y_true_y_pred_inconsistent_n_timepoints_raises_error(metric_func_name):
metric_func(y_true, y_pred, y_train=y_train, y_pred_benchmark=y_pred_benchmark)


@pytest.mark.skipif(
not run_test_module_changed(["sktime.performance_metrics"]),
reason="Run if performance_metrics module has changed.",
)
@pytest.mark.parametrize("metric_func_name", LOSS_RESULTS.keys())
def test_y_true_y_pred_inconsistent_n_variables_raises_error(metric_func_name):
"""Error should be raised when y_true and y_pred have different number of output."""
Expand Down
Loading

0 comments on commit 283c2d0

Please sign in to comment.