Skip to content

Commit

Permalink
Merge b7da439 into d476518
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronspring committed Sep 9, 2020
2 parents d476518 + b7da439 commit 171fa4a
Show file tree
Hide file tree
Showing 8 changed files with 287 additions and 5 deletions.
2 changes: 1 addition & 1 deletion ci/run-linter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ echo "[black]"
black --check --line-length=88 -S xskillscore

echo "[isort]"
isort --recursive --check-only xskillscore
isort --check-only xskillscore

echo "[doc8]"
doc8 *.rst
8 changes: 8 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,11 @@ Multi-Category Metrics
Contingency.heidke_score
Contingency.peirce_score
Contingency.gerrity_score

Tests
-----

.. autosummary::
:toctree: api/

sign_test
6 changes: 6 additions & 0 deletions docs/source/api/xskillscore.sign_test.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
xskillscore.sign\_test
======================

.. currentmodule:: xskillscore

.. autofunction:: sign_test
88 changes: 85 additions & 3 deletions docs/source/quick-start.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions xskillscore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
rps,
threshold_brier_score,
)
from .core.test import sign_test
from .versioning.print_versions import show_versions

try:
Expand Down
110 changes: 110 additions & 0 deletions xskillscore/core/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import scipy.stats as st
import xarray as xr


def sign_test(
forecast1, forecast2, observation, dim=None, categorical=False, alpha=0.05
):
"""
Returns the Delsole and Tippett sign test over the given time dimension.
Parameters
----------
forecast1 : xarray.Dataset or xarray.DataArray
forecast1 to be compared to observation
forecast2 : xarray.Dataset or xarray.DataArray
forecast2 to be compared to observation
observation : xarray.Dataset or xarray.DataArray
observation to be compared to both forecasts
if str, then assume that forecast1 and forecast2 have already been compared
to observations and choose from:
* ``negatively_oriented_already_evaluated``: metric between forecast1
(forecast2) and observations. Distances are positively oriented,
therefore the smaller distance wins.
* ``positively_oriented_already_evaluated``: metric between forecast1
(forecast2) and observations. The larger positively oriented metric
wins.
* ``categorical_already_evaluated``: categorical data following
``logical(forecast1)==logical(forecast2)`` where ``logical`` is a
function return binary output
dim : str
time dimension of dimension over which to compute the random walk.
This dimension is not reduced, unlike in other xskillscore functions.
alpha : float
significance level for random walk.
categorical : bool, optional
If True, the winning forecast is only rewarded a point if it exactly equals
the observations
Returns
-------
xarray.DataArray or xarray.Dataset reduced by dim containing the sign test and
confidence as new dimension results:
* ``sign_test``: positive (negative) number shows how many times over
``dim`` ``forecast1`` is better (worse) than ``forecast2``.
* ``confidence``: Positive boundary for the random walk at significance
``alpha``.
Examples
--------
>>> f1 = xr.DataArray(np.random.normal(size=(30)),
... coords=[('time', np.arange(30))])
>>> f2 = xr.DataArray(np.random.normal(size=(30)),
... coords=[('time', np.arange(30))])
>>> o = xr.DataArray(np.random.normal(size=(30)),
... coords=[('time', np.arange(30))])
>>> st = sign_test(f1, f2, o, dim='time')
>>> st.sel(results='sign_test').plot()
>>> st.sel(results='confidence').plot(c='gray')
>>> (-1*st.sel(results='confidence')).plot(c='gray')
References
----------
* DelSole, T., & Tippett, M. K. (2016). Forecast Comparison Based on Random
Walks. Monthly Weather Review, 144(2), 615–626. doi: 10/f782pf
"""
# two shortcuts for climpred
climpred_keys = [
'negatively_oriented_already_evaluated',
'positively_oriented_already_evaluated',
'categorical_already_evaluated',
]
if isinstance(observation, str):
if observation == 'negatively_oriented_already_evaluated': # mse, mae, rmse
diff1 = forecast1
diff2 = forecast2
elif observation == 'positively_oriented_already_evaluated': # 1-mse/std, msss
diff1 = forecast1
diff2 = forecast2
elif observation == 'categorical_already_evaluated':
diff1 = ~forecast1
diff2 = ~forecast2
else:
raise ValueError(f'special key not found in {climpred_keys}')
else:
if categorical:
diff1 = -1 * (forecast1 == observation)
diff2 = -1 * (forecast2 == observation)
else:
diff1 = abs(
forecast1 - observation
) # is like xs.mae(forecast1,observation,dim=[])
diff2 = abs(forecast2 - observation)

sign_test = (1 * (diff1 < diff2) - 1 * (diff2 < diff1)).cumsum(dim)

# Estimate 95% confidence interval -----
notnan = 1 * (diff1.notnull() & diff2.notnull())
N = notnan.cumsum(dim)
# z_alpha is the value at which the standardized cumulative Gaussian distributed exceeds alpha
confidence = st.norm.ppf(1 - alpha / 2) * xr.ufuncs.sqrt(N)
confidence.coords['alpha'] = alpha

res = xr.concat([sign_test, confidence], dim='results')
res['results'] = ['sign_test', 'confidence']
return res
4 changes: 3 additions & 1 deletion xskillscore/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import pytest
import xarray as xr

PERIODS = 10 # effective_p_value produces nans for shorter periods
PERIODS = 12 # effective_p_value produces nans for shorter periods

np.random.seed(42)


@pytest.fixture
Expand Down
73 changes: 73 additions & 0 deletions xskillscore/tests/test_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import numpy as np
import pytest

from xskillscore import sign_test

OFFSET = -1


@pytest.fixture
def a_1d_worse(a_1d):
return a_1d + OFFSET


def logical(ds):
return ds > 0.5


def test_sign_test_raw(a_1d, a_1d_worse, b_1d):
actual = sign_test(a_1d, a_1d_worse, b_1d, dim='time', alpha=0.05)
walk_larger_significance = actual.sel(results='sign_test') > actual.sel(
results='confidence'
)
crossing_after_timesteps = walk_larger_significance.argmax(dim='time')
# check timesteps after which sign_test larger confidence
assert crossing_after_timesteps == 3


def test_sign_test_categorical(a_1d, a_1d_worse, b_1d):
"""Test sign_test categorical."""
a_1d = logical(a_1d)
a_1d_worse = logical(a_1d_worse)
b_1d = logical(b_1d)
sign_test(a_1d, a_1d_worse, b_1d, dim='time', categorical=True)


@pytest.mark.parametrize('categorical', [True, False])
def test_sign_test_identical(a_1d, a_1d_worse, b_1d, categorical):
"""Test that identical forecasts show no walk step."""
identicals = [1, 7]
for i in identicals:
# set a_1d_worse = a_1d identical for time=i
a_1d_worse = a_1d_worse.where(a_1d_worse != a_1d_worse.isel(time=i)).fillna(
a_1d.isel(time=i)
)
if categorical:
a_1d = logical(a_1d)
a_1d_worse = logical(a_1d_worse)
b_1d = logical(b_1d)
actual = sign_test(a_1d, a_1d_worse, b_1d, dim='time', categorical=categorical)
# check flat
assert (
actual.sel(results='sign_test')
.diff(dim='time')
.isel(time=[i - 1 for i in identicals])
== 0
).all()


def test_sign_test_alpha(a_1d, a_1d_worse, b_1d):
"""Test that larger alpha leads to small confidence bounds in sign_test."""
actual_large_alpha = sign_test(a_1d, a_1d_worse, b_1d, dim='time', alpha=0.1)
actual_small_alpha = sign_test(a_1d, a_1d_worse, b_1d, dim='time', alpha=0.01)
# check difference in confidence
assert (
actual_large_alpha.sel(results='confidence')
< actual_small_alpha.sel(results='confidence')
).all()
# check identical sign_test
assert (
actual_large_alpha.sel(results='sign_test')
.drop('alpha')
.equals(actual_small_alpha.sel(results='sign_test').drop('alpha'))
)

0 comments on commit 171fa4a

Please sign in to comment.