From e309682d203a2efaaf1eef1cb2a03e12cfa4e6cc Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sat, 30 Apr 2022 16:47:48 +0200 Subject: [PATCH 01/32] new polyval algo --- xarray/core/computation.py | 44 ++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 1834622d96e..3c5ee7fccf7 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1843,36 +1843,52 @@ def where(cond, x, y, keep_attrs=None): ) -def polyval(coord, coeffs, degree_dim="degree"): +def polyval( + coord: DataArray, coeffs: DataArray, degree_dim: Hashable = "degree" +) -> DataArray: """Evaluate a polynomial at specific values Parameters ---------- coord : DataArray - The 1D coordinate along which to evaluate the polynomial. + Values at which to evaluate the polynomial. coeffs : DataArray - Coefficients of the polynomials. - degree_dim : str, default: "degree" + Coefficients of the polynomial. + degree_dim : Hashable, default: "degree" Name of the polynomial degree dimension in `coeffs`. + Returns + ------- + DataArray + Evaluated polynomial. + See Also -------- xarray.DataArray.polyfit - numpy.polyval + numpy.polynomial.polynomial.polyval """ - from .dataarray import DataArray - from .missing import get_clean_interp_index - - x = get_clean_interp_index(coord, coord.name, strict=False) deg_coord = coeffs[degree_dim] - lhs = DataArray( - np.vander(x, int(deg_coord.max()) + 1), - dims=(coord.name, degree_dim), - coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]}, + deg_idx_sorted = np.argsort(deg_coord.values) + max_deg = int(deg_coord[deg_idx_sorted[-1]]) + + # using Horner's method + # https://en.wikipedia.org/wiki/Horner%27s_method + res = ( + coeffs.isel({degree_dim: int(deg_idx_sorted[-1])}, drop=True) + .broadcast_like(coord) + .copy(deep=True) ) - return (lhs * coeffs).sum(degree_dim) + deg_idx = len(deg_coord) - 2 + for deg in range(max_deg - 1, -1, -1): + res *= coord + if deg_idx >= 0 and deg == int(deg_coord[deg_idx_sorted[deg_idx]]): + # this degrees coefficient is provided, if not assume 0 + res += coeffs.isel({degree_dim: int(deg_idx_sorted[deg_idx])}, drop=True) + deg_idx -= 1 + + return res def _calc_idxminmax( From 88e476a9f348a20b21c14b4c13b9840e4fbfaecb Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sat, 30 Apr 2022 17:33:07 +0200 Subject: [PATCH 02/32] polyval improved typing with datasets --- xarray/core/computation.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 3c5ee7fccf7..008b1a5ca26 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -17,6 +17,7 @@ Iterable, Mapping, Sequence, + overload, ) import numpy as np @@ -1843,23 +1844,38 @@ def where(cond, x, y, keep_attrs=None): ) +@overload +def polyval(coord: DataArray, coeffs: DataArray, degree_dim: Hashable) -> DataArray: + ... + + +@overload +def polyval(coord: T_Xarray, coeffs: Dataset, degree_dim: Hashable) -> Dataset: + ... + + +@overload +def polyval(coord: Dataset, coeffs: T_Xarray, degree_dim: Hashable) -> Dataset: + ... + + def polyval( - coord: DataArray, coeffs: DataArray, degree_dim: Hashable = "degree" -) -> DataArray: + coord: T_Xarray, coeffs: T_Xarray, degree_dim: Hashable = "degree" +) -> T_Xarray: """Evaluate a polynomial at specific values Parameters ---------- - coord : DataArray + coord : DataArray or Dataset Values at which to evaluate the polynomial. - coeffs : DataArray + coeffs : DataArray or Dataset Coefficients of the polynomial. degree_dim : Hashable, default: "degree" Name of the polynomial degree dimension in `coeffs`. Returns ------- - DataArray + DataArray or Dataset Evaluated polynomial. See Also From 261aadcc6bf5bcf9c63eca3387d4a1c26446b381 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sat, 30 Apr 2022 17:33:22 +0200 Subject: [PATCH 03/32] more polyval unit tests --- xarray/tests/test_computation.py | 56 ++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 7a397428ba3..2f5b32e531b 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -475,10 +475,13 @@ def test_unified_dim_sizes() -> None: "x": 1, "y": 2, } - assert unified_dim_sizes( - [xr.Variable(("x", "z"), [[1]]), xr.Variable(("y", "z"), [[1, 2], [3, 4]])], - exclude_dims={"z"}, - ) == {"x": 1, "y": 2} + assert ( + unified_dim_sizes( + [xr.Variable(("x", "z"), [[1]]), xr.Variable(("y", "z"), [[1, 2], [3, 4]])], + exclude_dims={"z"}, + ) + == {"x": 1, "y": 2} + ) # duplicate dimensions with pytest.raises(ValueError): @@ -1935,7 +1938,7 @@ def test_where_attrs() -> None: @pytest.mark.parametrize("use_dask", [True, False]) @pytest.mark.parametrize("use_datetime", [True, False]) -def test_polyval(use_dask, use_datetime) -> None: +def test_polyval_compat(use_dask, use_datetime) -> None: if use_dask and not has_dask: pytest.skip("requires dask") @@ -1949,7 +1952,7 @@ def test_polyval(use_dask, use_datetime) -> None: xcoord = xr.DataArray(x, dims=("x",), name="x") da = xr.DataArray( - np.stack((1.0 + x + 2.0 * x**2, 1.0 + 2.0 * x + 3.0 * x**2)), + np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)), dims=("d", "x"), coords={"x": xcoord, "d": [0, 1]}, ) @@ -1966,6 +1969,47 @@ def test_polyval(use_dask, use_datetime) -> None: xr.testing.assert_allclose(da, da_pv.T) +@pytest.mark.parametrize( + ["coeffs", "expected"], + [ + pytest.param( + xr.DataArray([0, 1], dims="degree"), + xr.DataArray([1, 2, 3], dims="x"), + id="simple", + ), + pytest.param( + xr.DataArray([[0, 1], [0, 1]], dims=("y", "degree")), + xr.DataArray([[1, 1], [2, 2], [3, 3]], dims=("x", "y")), + id="broadcast-x", + ), + pytest.param( + xr.DataArray([[0, 1], [1, 0], [1, 1]], dims=("x", "degree")), + xr.DataArray([1, 1, 1 + 3], dims="x"), + id="shared-dim", + ), + pytest.param( + xr.DataArray([1, 0, 0], dims="degree", coords={"degree": [2, 1, 0]}), + xr.DataArray([1, 2 ** 2, 3 ** 2], dims="x"), + id="reordered-index", + ), + pytest.param( + xr.DataArray([5], dims="degree", coords={"degree": [3]}), + xr.DataArray([5, 5 * 2 ** 3, 5 * 3 ** 3], dims="x"), + id="sparse-index", + ), + pytest.param( + xr.Dataset({"a": ("degree", [0, 1]), "b": ("degree", [1, 0])}), + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [1, 1, 1])}), + id="dataset", + ), + ], +) +def test_polyval(coeffs, expected) -> None: + x = xr.DataArray([1, 2, 3], dims="x") + actual = xr.polyval(x, coeffs) + xr.testing.assert_allclose(actual, expected) + + @pytest.mark.parametrize("use_dask", [False, True]) @pytest.mark.parametrize( "a, b, ae, be, dim, axis", From 2a6a6331391713509f3ce86f60c2a7ff046fcec9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 30 Apr 2022 15:35:14 +0000 Subject: [PATCH 04/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_computation.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 2f5b32e531b..e9c7a8aff9c 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -475,13 +475,10 @@ def test_unified_dim_sizes() -> None: "x": 1, "y": 2, } - assert ( - unified_dim_sizes( - [xr.Variable(("x", "z"), [[1]]), xr.Variable(("y", "z"), [[1, 2], [3, 4]])], - exclude_dims={"z"}, - ) - == {"x": 1, "y": 2} - ) + assert unified_dim_sizes( + [xr.Variable(("x", "z"), [[1]]), xr.Variable(("y", "z"), [[1, 2], [3, 4]])], + exclude_dims={"z"}, + ) == {"x": 1, "y": 2} # duplicate dimensions with pytest.raises(ValueError): @@ -1952,7 +1949,7 @@ def test_polyval_compat(use_dask, use_datetime) -> None: xcoord = xr.DataArray(x, dims=("x",), name="x") da = xr.DataArray( - np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)), + np.stack((1.0 + x + 2.0 * x**2, 1.0 + 2.0 * x + 3.0 * x**2)), dims=("d", "x"), coords={"x": xcoord, "d": [0, 1]}, ) @@ -1989,12 +1986,12 @@ def test_polyval_compat(use_dask, use_datetime) -> None: ), pytest.param( xr.DataArray([1, 0, 0], dims="degree", coords={"degree": [2, 1, 0]}), - xr.DataArray([1, 2 ** 2, 3 ** 2], dims="x"), + xr.DataArray([1, 2**2, 3**2], dims="x"), id="reordered-index", ), pytest.param( xr.DataArray([5], dims="degree", coords={"degree": [3]}), - xr.DataArray([5, 5 * 2 ** 3, 5 * 3 ** 3], dims="x"), + xr.DataArray([5, 5 * 2**3, 5 * 3**3], dims="x"), id="sparse-index", ), pytest.param( From a2701b87d2de2385eeac168d240507a4a862fafb Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sat, 30 Apr 2022 18:17:46 +0200 Subject: [PATCH 05/32] support for Dataset coord in polyval --- xarray/core/computation.py | 4 ++++ xarray/tests/test_computation.py | 31 ++++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 008b1a5ca26..8440e042f88 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1883,6 +1883,7 @@ def polyval( xarray.DataArray.polyfit numpy.polynomial.polynomial.polyval """ + from .dataset import Dataset deg_coord = coeffs[degree_dim] @@ -1896,6 +1897,9 @@ def polyval( .broadcast_like(coord) .copy(deep=True) ) + if isinstance(coord, Dataset) and not isinstance(res, Dataset): + res = Dataset({var: res for var in coord}) + deg_idx = len(deg_coord) - 2 for deg in range(max_deg - 1, -1, -1): res *= coord diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index e9c7a8aff9c..a05ec515a69 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1967,42 +1967,59 @@ def test_polyval_compat(use_dask, use_datetime) -> None: @pytest.mark.parametrize( - ["coeffs", "expected"], + ["x", "coeffs", "expected"], [ pytest.param( - xr.DataArray([0, 1], dims="degree"), xr.DataArray([1, 2, 3], dims="x"), + xr.DataArray([2, 3, 4], dims="degree"), + xr.DataArray([9, 2 + 6 + 16, 2 + 9 + 36], dims="x"), id="simple", ), pytest.param( + xr.DataArray([1, 2, 3], dims="x"), xr.DataArray([[0, 1], [0, 1]], dims=("y", "degree")), xr.DataArray([[1, 1], [2, 2], [3, 3]], dims=("x", "y")), id="broadcast-x", ), pytest.param( + xr.DataArray([1, 2, 3], dims="x"), xr.DataArray([[0, 1], [1, 0], [1, 1]], dims=("x", "degree")), xr.DataArray([1, 1, 1 + 3], dims="x"), id="shared-dim", ), pytest.param( + xr.DataArray([1, 2, 3], dims="x"), xr.DataArray([1, 0, 0], dims="degree", coords={"degree": [2, 1, 0]}), - xr.DataArray([1, 2**2, 3**2], dims="x"), + xr.DataArray([1, 2 ** 2, 3 ** 2], dims="x"), id="reordered-index", ), pytest.param( + xr.DataArray([1, 2, 3], dims="x"), xr.DataArray([5], dims="degree", coords={"degree": [3]}), - xr.DataArray([5, 5 * 2**3, 5 * 3**3], dims="x"), + xr.DataArray([5, 5 * 2 ** 3, 5 * 3 ** 3], dims="x"), id="sparse-index", ), pytest.param( + xr.DataArray([1, 2, 3], dims="x"), xr.Dataset({"a": ("degree", [0, 1]), "b": ("degree", [1, 0])}), xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [1, 1, 1])}), - id="dataset", + id="array-dataset", + ), + pytest.param( + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [2, 3, 4])}), + xr.DataArray([1, 1], dims="degree"), + xr.Dataset({"a": ("x", [2, 3, 4]), "b": ("x", [3, 4, 5])}), + id="dataset-array", + ), + pytest.param( + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [2, 3, 4])}), + xr.Dataset({"a": ("degree", [0, 1]), "b": ("degree", [1, 1])}), + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [3, 4, 5])}), + id="dataset-dataset", ), ], ) -def test_polyval(coeffs, expected) -> None: - x = xr.DataArray([1, 2, 3], dims="x") +def test_polyval(x, coeffs, expected) -> None: actual = xr.polyval(x, coeffs) xr.testing.assert_allclose(actual, expected) From 553de1037adb899dda860e743912dc001ab780a8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 30 Apr 2022 16:19:39 +0000 Subject: [PATCH 06/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_computation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index a05ec515a69..25c8c531c84 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1990,13 +1990,13 @@ def test_polyval_compat(use_dask, use_datetime) -> None: pytest.param( xr.DataArray([1, 2, 3], dims="x"), xr.DataArray([1, 0, 0], dims="degree", coords={"degree": [2, 1, 0]}), - xr.DataArray([1, 2 ** 2, 3 ** 2], dims="x"), + xr.DataArray([1, 2**2, 3**2], dims="x"), id="reordered-index", ), pytest.param( xr.DataArray([1, 2, 3], dims="x"), xr.DataArray([5], dims="degree", coords={"degree": [3]}), - xr.DataArray([5, 5 * 2 ** 3, 5 * 3 ** 3], dims="x"), + xr.DataArray([5, 5 * 2**3, 5 * 3**3], dims="x"), id="sparse-index", ), pytest.param( From 4fbca23a9fd8458ec8f917dd0e54656925503e90 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sat, 30 Apr 2022 19:58:45 +0200 Subject: [PATCH 07/32] fix bug in polyval broadcasting --- xarray/core/computation.py | 9 +-------- xarray/tests/test_computation.py | 8 ++++---- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 8440e042f88..d06a8a341ca 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1892,14 +1892,7 @@ def polyval( # using Horner's method # https://en.wikipedia.org/wiki/Horner%27s_method - res = ( - coeffs.isel({degree_dim: int(deg_idx_sorted[-1])}, drop=True) - .broadcast_like(coord) - .copy(deep=True) - ) - if isinstance(coord, Dataset) and not isinstance(res, Dataset): - res = Dataset({var: res for var in coord}) - + res = coeffs.isel({degree_dim: int(deg_idx_sorted[-1])}, drop=True) + 0 * coord deg_idx = len(deg_coord) - 2 for deg in range(max_deg - 1, -1, -1): res *= coord diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 25c8c531c84..592498a0978 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1963,7 +1963,7 @@ def test_polyval_compat(use_dask, use_datetime) -> None: da_pv = xr.polyval(da.x, coeffs) - xr.testing.assert_allclose(da, da_pv.T) + xr.testing.assert_allclose(da, da_pv) @pytest.mark.parametrize( @@ -1978,7 +1978,7 @@ def test_polyval_compat(use_dask, use_datetime) -> None: pytest.param( xr.DataArray([1, 2, 3], dims="x"), xr.DataArray([[0, 1], [0, 1]], dims=("y", "degree")), - xr.DataArray([[1, 1], [2, 2], [3, 3]], dims=("x", "y")), + xr.DataArray([[1, 2, 3], [1, 2, 3]], dims=("y", "x")), id="broadcast-x", ), pytest.param( @@ -2012,9 +2012,9 @@ def test_polyval_compat(use_dask, use_datetime) -> None: id="dataset-array", ), pytest.param( - xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [2, 3, 4])}), + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [2, 3, 4])}), xr.Dataset({"a": ("degree", [0, 1]), "b": ("degree", [1, 1])}), - xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [3, 4, 5])}), + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [3, 4, 5])}), id="dataset-dataset", ), ], From 212505ff587c3b74cb67c2e4f1ae5c54eb67f6c8 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 13:38:13 +0200 Subject: [PATCH 08/32] support for datetime values in polyval --- xarray/core/computation.py | 24 ++++++++++++++- xarray/tests/test_computation.py | 51 +++++++++++--------------------- 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index d06a8a341ca..a1f71cafa6b 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -24,6 +24,8 @@ from . import dtypes, duck_array_ops, utils from .alignment import align, deep_align +from .common import zeros_like +from .duck_array_ops import datetime_to_numeric from .indexes import Index, filter_indexes_from_coords from .merge import merge_attrs, merge_coordinates_without_align from .options import OPTIONS, _get_keep_attrs @@ -1883,6 +1885,7 @@ def polyval( xarray.DataArray.polyfit numpy.polynomial.polynomial.polyval """ + from .dataarray import DataArray from .dataset import Dataset deg_coord = coeffs[degree_dim] @@ -1890,9 +1893,28 @@ def polyval( deg_idx_sorted = np.argsort(deg_coord.values) max_deg = int(deg_coord[deg_idx_sorted[-1]]) + def ensure_numeric(data: DataArray) -> DataArray: + if data.dtype.kind in "mM": + return DataArray( + datetime_to_numeric( + data, offset=np.datetime64("1970-01-01"), datetime_unit="ns" + ), + dims=data.dims, + coords=data.coords, + attrs=data.attrs, + ) + return data + + if isinstance(coord, Dataset): + coord = coord.map(ensure_numeric) + else: + coord = ensure_numeric(coord) + # using Horner's method # https://en.wikipedia.org/wiki/Horner%27s_method - res = coeffs.isel({degree_dim: int(deg_idx_sorted[-1])}, drop=True) + 0 * coord + res = coeffs.isel({degree_dim: int(deg_idx_sorted[-1])}, drop=True) + zeros_like( + coord + ) deg_idx = len(deg_coord) - 2 for deg in range(max_deg - 1, -1, -1): res *= coord diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 592498a0978..dba57833ff7 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1933,39 +1933,6 @@ def test_where_attrs() -> None: assert actual.attrs == {} -@pytest.mark.parametrize("use_dask", [True, False]) -@pytest.mark.parametrize("use_datetime", [True, False]) -def test_polyval_compat(use_dask, use_datetime) -> None: - if use_dask and not has_dask: - pytest.skip("requires dask") - - if use_datetime: - xcoord = xr.DataArray( - pd.date_range("2000-01-01", freq="D", periods=10), dims=("x",), name="x" - ) - x = xr.core.missing.get_clean_interp_index(xcoord, "x") - else: - x = np.arange(10) - xcoord = xr.DataArray(x, dims=("x",), name="x") - - da = xr.DataArray( - np.stack((1.0 + x + 2.0 * x**2, 1.0 + 2.0 * x + 3.0 * x**2)), - dims=("d", "x"), - coords={"x": xcoord, "d": [0, 1]}, - ) - coeffs = xr.DataArray( - [[2, 1, 1], [3, 2, 1]], - dims=("d", "degree"), - coords={"d": [0, 1], "degree": [2, 1, 0]}, - ) - if use_dask: - coeffs = coeffs.chunk({"d": 2}) - - da_pv = xr.polyval(da.x, coeffs) - - xr.testing.assert_allclose(da, da_pv) - - @pytest.mark.parametrize( ["x", "coeffs", "expected"], [ @@ -2017,9 +1984,27 @@ def test_polyval_compat(use_dask, use_datetime) -> None: xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [3, 4, 5])}), id="dataset-dataset", ), + pytest.param( + xr.DataArray([1, 2, 3], dims="x"), + xr.DataArray([2, 3, 4], dims="degree").chunk({"degree": 2}), + xr.DataArray([9, 2 + 6 + 16, 2 + 9 + 36], dims="x"), + id="dask", + ), + pytest.param( + xr.DataArray(pd.date_range("1970-01-01", freq="s", periods=3), dims="x"), + xr.DataArray([0, 1], dims="degree"), + xr.DataArray( + [0, 1e9, 2e9], + dims="x", + coords={"x": pd.date_range("1970-01-01", freq="s", periods=3)}, + ), + id="datetime", + ), ], ) def test_polyval(x, coeffs, expected) -> None: + if coeffs.chunks is not None and not has_dask: + pytest.skip("requires dask") actual = xr.polyval(x, coeffs) xr.testing.assert_allclose(actual, expected) From 945cbfb47d3ae50cc2a712599b511fd95c96c0ad Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 13:49:49 +0200 Subject: [PATCH 09/32] polyval update in whats-new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4882402073c..b850d2d0278 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,9 @@ New Features - Allow passing chunks in ``**kwargs`` form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) By `Tom Nicholas `_. +- :py:meth:`xr.polyval` now supports :py:class:`Dataset` and :py:class:`DataArray` args of any shape, + is faster and requires less memory. (:pull:`6548`) + By `Michael Niklas `_. Breaking changes ~~~~~~~~~~~~~~~~ From 5945537ed11d82bf0442ef10011cf67206e82a68 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 14:28:58 +0200 Subject: [PATCH 10/32] fix dask polyval unit tests --- xarray/tests/test_computation.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index dba57833ff7..d2eed817b0c 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1933,6 +1933,7 @@ def test_where_attrs() -> None: assert actual.attrs == {} +@pytest.mark.parametrize("use_dask", [False, True]) @pytest.mark.parametrize( ["x", "coeffs", "expected"], [ @@ -1984,12 +1985,6 @@ def test_where_attrs() -> None: xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [3, 4, 5])}), id="dataset-dataset", ), - pytest.param( - xr.DataArray([1, 2, 3], dims="x"), - xr.DataArray([2, 3, 4], dims="degree").chunk({"degree": 2}), - xr.DataArray([9, 2 + 6 + 16, 2 + 9 + 36], dims="x"), - id="dask", - ), pytest.param( xr.DataArray(pd.date_range("1970-01-01", freq="s", periods=3), dims="x"), xr.DataArray([0, 1], dims="degree"), @@ -2002,9 +1997,11 @@ def test_where_attrs() -> None: ), ], ) -def test_polyval(x, coeffs, expected) -> None: - if coeffs.chunks is not None and not has_dask: +def test_polyval(use_dask, x, coeffs, expected) -> None: + if use_dask: + if not has_dask: pytest.skip("requires dask") + coeffs = coeffs.chunk({"degree": 2}) actual = xr.polyval(x, coeffs) xr.testing.assert_allclose(actual, expected) From a0964ed9fceee8c2af6438abf6476f699259e9bc Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 16:09:51 +0200 Subject: [PATCH 11/32] fix bug in polyval unit tests --- xarray/tests/test_computation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index d2eed817b0c..a752db04fe8 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -2000,7 +2000,7 @@ def test_where_attrs() -> None: def test_polyval(use_dask, x, coeffs, expected) -> None: if use_dask: if not has_dask: - pytest.skip("requires dask") + pytest.skip("requires dask") coeffs = coeffs.chunk({"degree": 2}) actual = xr.polyval(x, coeffs) xr.testing.assert_allclose(actual, expected) From 82db39459e3c2f40b9b04062d8665939075f977d Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 18:05:06 +0200 Subject: [PATCH 12/32] add polyval benchmark --- asv_bench/benchmarks/polyfit.py | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 asv_bench/benchmarks/polyfit.py diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py new file mode 100644 index 00000000000..2309d68234d --- /dev/null +++ b/asv_bench/benchmarks/polyfit.py @@ -0,0 +1,39 @@ +import numpy as np +import xarray as xr + +from . import parameterized, randn, requires_dask + +ndegs = (2, 5, 20) +nxs = (10, 100, 1000, 5000) + +xs = { + nx: xr.DataArray(randn((nx, nx)), dims=("x", "y")) for nx in nxs +} +coeffs = { + ndeg: xr.DataArray(randn((ndeg)), dims="degree") for ndeg in ndegs +} + +class Polyval: + def setup(self, *args, **kwargs): + self.coeffs = coeffs + self.xs = xs + + @parameterized(["nx", "ndeg"], [nxs, ndegs]) + def time_polyval(self, nx, ndeg): + x = self.xs[nx] + c = self.coeffs[ndeg] + xr.polyval(x, c) + + @parameterized(["nx", "ndeg"], [nxs, ndegs]) + def peakmem_polyval(self, nx, ndeg): + x = self.xs[nx] + c = self.coeffs[ndeg] + xr.polyval(x, c) + +class PolyvalDask(Polyval): + def setup(self, *args, **kwargs): + requires_dask() + super().setup(*args, **kwargs) + self.xs = { + nx: self.xs[nx].chunk({"x": 100, "y": 100}) for nx in nxs + } From ca5a7f73ac471f7cb8c7cd6140201293ff502592 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 18:09:01 +0200 Subject: [PATCH 13/32] add breaking change of polyval tp whats-new --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b850d2d0278..dfa36a4dcb0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,10 @@ Breaking changes - Xarray's ufuncs have been removed, now that they can be replaced by numpy's ufuncs in all supported versions of numpy. By `Maximilian Roos `_. +- :py:meth:`xr.polyval` now uses the ``coord`` argument directly instead of its index coordinate. + (:pull:`6548`) + By `Michael Niklas `_. + Deprecations ~~~~~~~~~~~~ From 7a708315597879b08843f85494ff9314b6d5c602 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 18:16:45 +0200 Subject: [PATCH 14/32] move _ensure_numeric to its own function --- xarray/core/computation.py | 51 ++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index a1f71cafa6b..c34adac3cff 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1893,31 +1893,16 @@ def polyval( deg_idx_sorted = np.argsort(deg_coord.values) max_deg = int(deg_coord[deg_idx_sorted[-1]]) - def ensure_numeric(data: DataArray) -> DataArray: - if data.dtype.kind in "mM": - return DataArray( - datetime_to_numeric( - data, offset=np.datetime64("1970-01-01"), datetime_unit="ns" - ), - dims=data.dims, - coords=data.coords, - attrs=data.attrs, - ) - return data - - if isinstance(coord, Dataset): - coord = coord.map(ensure_numeric) - else: - coord = ensure_numeric(coord) + x = _ensure_numeric(coord) # using Horner's method # https://en.wikipedia.org/wiki/Horner%27s_method res = coeffs.isel({degree_dim: int(deg_idx_sorted[-1])}, drop=True) + zeros_like( - coord + x ) deg_idx = len(deg_coord) - 2 for deg in range(max_deg - 1, -1, -1): - res *= coord + res *= x if deg_idx >= 0 and deg == int(deg_coord[deg_idx_sorted[deg_idx]]): # this degrees coefficient is provided, if not assume 0 res += coeffs.isel({degree_dim: int(deg_idx_sorted[deg_idx])}, drop=True) @@ -1926,6 +1911,36 @@ def ensure_numeric(data: DataArray) -> DataArray: return res +def _ensure_numeric(data: T_Xarray) -> T_Xarray: + """Converts all datetime64 variables to float64 + + Parameters + ---------- + data : DataArray or Dataset + Variables with possible datetime dtypes. + + Returns + ------- + DataArray or Dataset + Variables with datetime64 dtypes converted to float64. + """ + def to_floatable(x: DataArray) -> DataArray: + if x.dtype.kind in "mM": + return x.copy( + data=datetime_to_numeric( + x, + offset=np.datetime64("1970-01-01"), + datetime_unit="ns", + ), + ) + return x + + if isinstance(data, Dataset): + return data.map(to_floatable) + else: + return to_floatable(data) + + def _calc_idxminmax( *, array, From 401e1268d4cbd63844e474b42458b848c75069a1 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 18:18:49 +0200 Subject: [PATCH 15/32] fix import error in _ensure_numeric --- xarray/core/computation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index c34adac3cff..e61ae44127b 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1885,9 +1885,6 @@ def polyval( xarray.DataArray.polyfit numpy.polynomial.polynomial.polyval """ - from .dataarray import DataArray - from .dataset import Dataset - deg_coord = coeffs[degree_dim] deg_idx_sorted = np.argsort(deg_coord.values) @@ -1924,6 +1921,8 @@ def _ensure_numeric(data: T_Xarray) -> T_Xarray: DataArray or Dataset Variables with datetime64 dtypes converted to float64. """ + from .dataset import Dataset + def to_floatable(x: DataArray) -> DataArray: if x.dtype.kind in "mM": return x.copy( From 3c21a64aa0c10e99020d723d79270f049af8d251 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 18:19:15 +0200 Subject: [PATCH 16/32] add raise_if_dask_computes to polyval unit tests --- xarray/tests/test_computation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index a752db04fe8..3ecc01f94d5 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -2002,7 +2002,8 @@ def test_polyval(use_dask, x, coeffs, expected) -> None: if not has_dask: pytest.skip("requires dask") coeffs = coeffs.chunk({"degree": 2}) - actual = xr.polyval(x, coeffs) + with raise_if_dask_computes(): + actual = xr.polyval(x, coeffs) xr.testing.assert_allclose(actual, expected) From ff37fe2e21edd0d0a88a82d0c7845445eba7db87 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 18:23:49 +0200 Subject: [PATCH 17/32] chunk coord arg as well for polyval unit tests --- xarray/tests/test_computation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 3ecc01f94d5..78f67d660b1 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -2002,6 +2002,7 @@ def test_polyval(use_dask, x, coeffs, expected) -> None: if not has_dask: pytest.skip("requires dask") coeffs = coeffs.chunk({"degree": 2}) + x = x.chunk({"x": 2}) with raise_if_dask_computes(): actual = xr.polyval(x, coeffs) xr.testing.assert_allclose(actual, expected) From 63ed13793a7933c00cf8c4d685565260629d60f5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 1 May 2022 16:25:43 +0000 Subject: [PATCH 18/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- asv_bench/benchmarks/polyfit.py | 17 +++++++---------- xarray/core/computation.py | 6 ++---- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py index 2309d68234d..e7e24741d47 100644 --- a/asv_bench/benchmarks/polyfit.py +++ b/asv_bench/benchmarks/polyfit.py @@ -1,4 +1,5 @@ import numpy as np + import xarray as xr from . import parameterized, randn, requires_dask @@ -6,12 +7,9 @@ ndegs = (2, 5, 20) nxs = (10, 100, 1000, 5000) -xs = { - nx: xr.DataArray(randn((nx, nx)), dims=("x", "y")) for nx in nxs -} -coeffs = { - ndeg: xr.DataArray(randn((ndeg)), dims="degree") for ndeg in ndegs -} +xs = {nx: xr.DataArray(randn((nx, nx)), dims=("x", "y")) for nx in nxs} +coeffs = {ndeg: xr.DataArray(randn(ndeg), dims="degree") for ndeg in ndegs} + class Polyval: def setup(self, *args, **kwargs): @@ -23,17 +21,16 @@ def time_polyval(self, nx, ndeg): x = self.xs[nx] c = self.coeffs[ndeg] xr.polyval(x, c) - + @parameterized(["nx", "ndeg"], [nxs, ndegs]) def peakmem_polyval(self, nx, ndeg): x = self.xs[nx] c = self.coeffs[ndeg] xr.polyval(x, c) + class PolyvalDask(Polyval): def setup(self, *args, **kwargs): requires_dask() super().setup(*args, **kwargs) - self.xs = { - nx: self.xs[nx].chunk({"x": 100, "y": 100}) for nx in nxs - } + self.xs = {nx: self.xs[nx].chunk({"x": 100, "y": 100}) for nx in nxs} diff --git a/xarray/core/computation.py b/xarray/core/computation.py index e61ae44127b..41d97a704d0 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1894,9 +1894,7 @@ def polyval( # using Horner's method # https://en.wikipedia.org/wiki/Horner%27s_method - res = coeffs.isel({degree_dim: int(deg_idx_sorted[-1])}, drop=True) + zeros_like( - x - ) + res = coeffs.isel({degree_dim: int(deg_idx_sorted[-1])}, drop=True) + zeros_like(x) deg_idx = len(deg_coord) - 2 for deg in range(max_deg - 1, -1, -1): res *= x @@ -1933,7 +1931,7 @@ def to_floatable(x: DataArray) -> DataArray: ), ) return x - + if isinstance(data, Dataset): return data.map(to_floatable) else: From 8343b29349bd4639ccd247acb4d415639cb27b89 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 19:03:16 +0200 Subject: [PATCH 19/32] simplify polyval algo with sortby --- xarray/core/computation.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index e61ae44127b..67640fb98ad 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1885,24 +1885,22 @@ def polyval( xarray.DataArray.polyfit numpy.polynomial.polynomial.polyval """ - deg_coord = coeffs[degree_dim] - deg_idx_sorted = np.argsort(deg_coord.values) - max_deg = int(deg_coord[deg_idx_sorted[-1]]) + coeffs = coeffs.sortby(degree_dim) + deg_coord = coeffs[degree_dim] + max_deg = int(deg_coord[-1]) x = _ensure_numeric(coord) # using Horner's method # https://en.wikipedia.org/wiki/Horner%27s_method - res = coeffs.isel({degree_dim: int(deg_idx_sorted[-1])}, drop=True) + zeros_like( - x - ) - deg_idx = len(deg_coord) - 2 + res = coeffs.isel({degree_dim: -1}, drop=True) + zeros_like(x) + deg_idx = len(deg_coord) - 2 # -2nd index for deg in range(max_deg - 1, -1, -1): res *= x - if deg_idx >= 0 and deg == int(deg_coord[deg_idx_sorted[deg_idx]]): + if deg_idx >= 0 and deg == int(deg_coord[deg_idx]): # this degrees coefficient is provided, if not assume 0 - res += coeffs.isel({degree_dim: int(deg_idx_sorted[deg_idx])}, drop=True) + res += coeffs.isel({degree_dim: deg_idx}, drop=True) deg_idx -= 1 return res From 0ef3462e6b6d6ed4e926de728685b0e169e85f13 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 19:09:36 +0200 Subject: [PATCH 20/32] comment some code until PR#6556 --- xarray/tests/test_computation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 78f67d660b1..335062b4920 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -2003,8 +2003,10 @@ def test_polyval(use_dask, x, coeffs, expected) -> None: pytest.skip("requires dask") coeffs = coeffs.chunk({"degree": 2}) x = x.chunk({"x": 2}) - with raise_if_dask_computes(): - actual = xr.polyval(x, coeffs) + # uncomment with https://github.com/pydata/xarray/pull/6556 + # with raise_if_dask_computes(): + # actual = xr.polyval(x, coeffs) + actual = xr.polyval(x, coeffs) xr.testing.assert_allclose(actual, expected) From e6f4675ca323c79928ea39a8b050f74900401460 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 19:12:13 +0200 Subject: [PATCH 21/32] Revert "comment some code until PR#6556" This reverts commit 0ef3462e6b6d6ed4e926de728685b0e169e85f13. --- xarray/tests/test_computation.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 335062b4920..78f67d660b1 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -2003,10 +2003,8 @@ def test_polyval(use_dask, x, coeffs, expected) -> None: pytest.skip("requires dask") coeffs = coeffs.chunk({"degree": 2}) x = x.chunk({"x": 2}) - # uncomment with https://github.com/pydata/xarray/pull/6556 - # with raise_if_dask_computes(): - # actual = xr.polyval(x, coeffs) - actual = xr.polyval(x, coeffs) + with raise_if_dask_computes(): + actual = xr.polyval(x, coeffs) xr.testing.assert_allclose(actual, expected) From 3b88386fc5201c0798b55f368289eb4e8f36cf90 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 19:12:46 +0200 Subject: [PATCH 22/32] fix dask issues with polyval unit tests --- xarray/core/computation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 994d0136a5b..6911dcdb1da 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1925,7 +1925,7 @@ def to_floatable(x: DataArray) -> DataArray: if x.dtype.kind in "mM": return x.copy( data=datetime_to_numeric( - x, + x.data, offset=np.datetime64("1970-01-01"), datetime_unit="ns", ), From ce392a7f54cc1700492e0466df50115d4b62ed03 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 19:19:01 +0200 Subject: [PATCH 23/32] remove unused import --- asv_bench/benchmarks/polyfit.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py index e7e24741d47..1d28b0bcc24 100644 --- a/asv_bench/benchmarks/polyfit.py +++ b/asv_bench/benchmarks/polyfit.py @@ -1,5 +1,3 @@ -import numpy as np - import xarray as xr from . import parameterized, randn, requires_dask From 4e8b72e049c260ad0e1cdc9a8a30622e7f87b282 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 20:20:12 +0200 Subject: [PATCH 24/32] make polyval benchmark backwards compatible --- asv_bench/benchmarks/polyfit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py index 1d28b0bcc24..58c71b6c50c 100644 --- a/asv_bench/benchmarks/polyfit.py +++ b/asv_bench/benchmarks/polyfit.py @@ -3,10 +3,10 @@ from . import parameterized, randn, requires_dask ndegs = (2, 5, 20) -nxs = (10, 100, 1000, 5000) +nxs = (10**2, 10**4, 10**6, 10**8) -xs = {nx: xr.DataArray(randn((nx, nx)), dims=("x", "y")) for nx in nxs} -coeffs = {ndeg: xr.DataArray(randn(ndeg), dims="degree") for ndeg in ndegs} +xs = {nx: xr.DataArray(randn((nx,)), dims="x") for nx in nxs} +coeffs = {ndeg: xr.DataArray(randn((ndeg,)), dims="degree") for ndeg in ndegs} class Polyval: @@ -31,4 +31,4 @@ class PolyvalDask(Polyval): def setup(self, *args, **kwargs): requires_dask() super().setup(*args, **kwargs) - self.xs = {nx: self.xs[nx].chunk({"x": 100, "y": 100}) for nx in nxs} + self.xs = {nx: self.xs[nx].chunk({"x": 10000}) for nx in nxs} From 70c4419bb71957bf0c61260da4ec56b924020ca3 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 1 May 2022 20:55:01 +0200 Subject: [PATCH 25/32] another bugfix for polyval benchmark --- asv_bench/benchmarks/polyfit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py index 58c71b6c50c..9dc43287c88 100644 --- a/asv_bench/benchmarks/polyfit.py +++ b/asv_bench/benchmarks/polyfit.py @@ -5,7 +5,7 @@ ndegs = (2, 5, 20) nxs = (10**2, 10**4, 10**6, 10**8) -xs = {nx: xr.DataArray(randn((nx,)), dims="x") for nx in nxs} +xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in nxs} coeffs = {ndeg: xr.DataArray(randn((ndeg,)), dims="degree") for ndeg in ndegs} From c4ced87b83326a4baa577f1f985667944d475bca Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 1 May 2022 20:56:48 -0600 Subject: [PATCH 26/32] Update asv_bench/benchmarks/polyfit.py --- asv_bench/benchmarks/polyfit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py index 9dc43287c88..118d04871f8 100644 --- a/asv_bench/benchmarks/polyfit.py +++ b/asv_bench/benchmarks/polyfit.py @@ -3,7 +3,7 @@ from . import parameterized, randn, requires_dask ndegs = (2, 5, 20) -nxs = (10**2, 10**4, 10**6, 10**8) +nxs = (10**2, 10**6) xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in nxs} coeffs = {ndeg: xr.DataArray(randn((ndeg,)), dims="degree") for ndeg in ndegs} From 7a73a4202aa1872ac25e86609e487834dba0d870 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 1 May 2022 21:23:12 -0600 Subject: [PATCH 27/32] Actually compute dask arrays in benchmark. --- asv_bench/benchmarks/polyfit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py index 118d04871f8..255b37fa572 100644 --- a/asv_bench/benchmarks/polyfit.py +++ b/asv_bench/benchmarks/polyfit.py @@ -18,13 +18,13 @@ def setup(self, *args, **kwargs): def time_polyval(self, nx, ndeg): x = self.xs[nx] c = self.coeffs[ndeg] - xr.polyval(x, c) + xr.polyval(x, c).compute() @parameterized(["nx", "ndeg"], [nxs, ndegs]) def peakmem_polyval(self, nx, ndeg): x = self.xs[nx] c = self.coeffs[ndeg] - xr.polyval(x, c) + xr.polyval(x, c).compute() class PolyvalDask(Polyval): From a824ad2b70dfedd186c4070e482119ba7234b22d Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 2 May 2022 10:54:05 -0600 Subject: [PATCH 28/32] Minor cleanup --- asv_bench/benchmarks/polyfit.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py index 255b37fa572..7d933e43834 100644 --- a/asv_bench/benchmarks/polyfit.py +++ b/asv_bench/benchmarks/polyfit.py @@ -2,25 +2,24 @@ from . import parameterized, randn, requires_dask -ndegs = (2, 5, 20) -nxs = (10**2, 10**6) - -xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in nxs} -coeffs = {ndeg: xr.DataArray(randn((ndeg,)), dims="degree") for ndeg in ndegs} +NDEGS = (2, 5, 20) +NX = (10**2, 10**6) class Polyval: def setup(self, *args, **kwargs): - self.coeffs = coeffs - self.xs = xs + self.xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in NX} + self.coeffs = { + ndeg: xr.DataArray(randn((ndeg,)), dims="degree") for ndeg in NDEGS + } - @parameterized(["nx", "ndeg"], [nxs, ndegs]) + @parameterized(["nx", "ndeg"], [NX, NDEGS]) def time_polyval(self, nx, ndeg): x = self.xs[nx] c = self.coeffs[ndeg] xr.polyval(x, c).compute() - @parameterized(["nx", "ndeg"], [nxs, ndegs]) + @parameterized(["nx", "ndeg"], [NX, NDEGS]) def peakmem_polyval(self, nx, ndeg): x = self.xs[nx] c = self.coeffs[ndeg] @@ -31,4 +30,4 @@ class PolyvalDask(Polyval): def setup(self, *args, **kwargs): requires_dask() super().setup(*args, **kwargs) - self.xs = {nx: self.xs[nx].chunk({"x": 10000}) for nx in nxs} + self.xs = {k: v.chunk({"x": 10000}) for k, v in self.xs.items()} From 047cd04788c4db66b0c4192bcff07110a21aae31 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 3 May 2022 17:51:00 +0200 Subject: [PATCH 29/32] simplify polyval algo using reindex --- xarray/core/computation.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 6911dcdb1da..f429fd0f437 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1886,22 +1886,16 @@ def polyval( numpy.polynomial.polynomial.polyval """ - coeffs = coeffs.sortby(degree_dim) - deg_coord = coeffs[degree_dim] - max_deg = int(deg_coord[-1]) - - x = _ensure_numeric(coord) + max_deg = coeffs[degree_dim].max().item() + coeffs = coeffs.reindex({degree_dim: np.arange(max_deg + 1)}, fill_value=0) + coord = _ensure_numeric(coord) # using Horner's method # https://en.wikipedia.org/wiki/Horner%27s_method - res = coeffs.isel({degree_dim: -1}, drop=True) + zeros_like(x) - deg_idx = len(deg_coord) - 2 # -2nd index + res = coeffs.isel({degree_dim: max_deg}, drop=True) + zeros_like(coord) for deg in range(max_deg - 1, -1, -1): - res *= x - if deg_idx >= 0 and deg == int(deg_coord[deg_idx]): - # this degrees coefficient is provided, if not assume 0 - res += coeffs.isel({degree_dim: deg_idx}, drop=True) - deg_idx -= 1 + res *= coord + res += coeffs.isel({degree_dim: deg}, drop=True) return res From 0d0bb8e3ac5e3c7618154a36ab25d80485ecf09c Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 3 May 2022 18:05:59 +0200 Subject: [PATCH 30/32] don't copy coeffs if not necessary --- xarray/core/computation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f429fd0f437..2be42ea5a33 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1887,7 +1887,9 @@ def polyval( """ max_deg = coeffs[degree_dim].max().item() - coeffs = coeffs.reindex({degree_dim: np.arange(max_deg + 1)}, fill_value=0) + coeffs = coeffs.reindex( + {degree_dim: np.arange(max_deg + 1)}, fill_value=0, copy=False + ) coord = _ensure_numeric(coord) # using Horner's method From 05e02661113c3c0c6737af75141d24f5776a0b92 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 4 May 2022 10:03:43 -0600 Subject: [PATCH 31/32] Make sure degree_dim is an indexed coordinate of int dtype --- xarray/core/computation.py | 8 ++++++++ xarray/tests/test_computation.py | 35 +++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 2be42ea5a33..1a32cda512c 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1886,6 +1886,14 @@ def polyval( numpy.polynomial.polynomial.polyval """ + if degree_dim not in coeffs._indexes: + raise ValueError( + f"Dimension `{degree_dim}` should be a coordinate variable with labels." + ) + if not np.issubdtype(coeffs[degree_dim].dtype, int): + raise ValueError( + f"Dimension `{degree_dim}` should be of integer dtype. Received {coeffs[degree_dim].dtype} instead." + ) max_deg = coeffs[degree_dim].max().item() coeffs = coeffs.reindex( {degree_dim: np.arange(max_deg + 1)}, fill_value=0, copy=False diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 78f67d660b1..127fdc5404f 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1939,19 +1939,25 @@ def test_where_attrs() -> None: [ pytest.param( xr.DataArray([1, 2, 3], dims="x"), - xr.DataArray([2, 3, 4], dims="degree"), + xr.DataArray([2, 3, 4], dims="degree", coords={"degree": [0, 1, 2]}), xr.DataArray([9, 2 + 6 + 16, 2 + 9 + 36], dims="x"), id="simple", ), pytest.param( xr.DataArray([1, 2, 3], dims="x"), - xr.DataArray([[0, 1], [0, 1]], dims=("y", "degree")), + xr.DataArray( + [[0, 1], [0, 1]], dims=("y", "degree"), coords={"degree": [0, 1]} + ), xr.DataArray([[1, 2, 3], [1, 2, 3]], dims=("y", "x")), id="broadcast-x", ), pytest.param( xr.DataArray([1, 2, 3], dims="x"), - xr.DataArray([[0, 1], [1, 0], [1, 1]], dims=("x", "degree")), + xr.DataArray( + [[0, 1], [1, 0], [1, 1]], + dims=("x", "degree"), + coords={"degree": [0, 1]}, + ), xr.DataArray([1, 1, 1 + 3], dims="x"), id="shared-dim", ), @@ -1969,25 +1975,31 @@ def test_where_attrs() -> None: ), pytest.param( xr.DataArray([1, 2, 3], dims="x"), - xr.Dataset({"a": ("degree", [0, 1]), "b": ("degree", [1, 0])}), + xr.Dataset( + {"a": ("degree", [0, 1]), "b": ("degree", [1, 0])}, + coords={"degree": [0, 1]}, + ), xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [1, 1, 1])}), id="array-dataset", ), pytest.param( xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [2, 3, 4])}), - xr.DataArray([1, 1], dims="degree"), + xr.DataArray([1, 1], dims="degree", coords={"degree": [0, 1]}), xr.Dataset({"a": ("x", [2, 3, 4]), "b": ("x", [3, 4, 5])}), id="dataset-array", ), pytest.param( xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [2, 3, 4])}), - xr.Dataset({"a": ("degree", [0, 1]), "b": ("degree", [1, 1])}), + xr.Dataset( + {"a": ("degree", [0, 1]), "b": ("degree", [1, 1])}, + coords={"degree": [0, 1]}, + ), xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [3, 4, 5])}), id="dataset-dataset", ), pytest.param( xr.DataArray(pd.date_range("1970-01-01", freq="s", periods=3), dims="x"), - xr.DataArray([0, 1], dims="degree"), + xr.DataArray([0, 1], dims="degree", coords={"degree": [0, 1]}), xr.DataArray( [0, 1e9, 2e9], dims="x", @@ -2008,6 +2020,15 @@ def test_polyval(use_dask, x, coeffs, expected) -> None: xr.testing.assert_allclose(actual, expected) +def test_polyval_degree_dim_checks(): + x = (xr.DataArray([1, 2, 3], dims="x"),) + coeffs = xr.DataArray([2, 3, 4], dims="degree", coords={"degree": [0, 1, 2]}) + with pytest.raises(ValueError): + xr.polyval(x, coeffs.drop_vars("degree")) + with pytest.raises(ValueError): + xr.polyval(x, coeffs.assign_coords(degree=coeffs.degree.astype(float))) + + @pytest.mark.parametrize("use_dask", [False, True]) @pytest.mark.parametrize( "a, b, ae, be, dim, axis", From bd3dd81d52212cac72b359bbd2e50897d70efd16 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 4 May 2022 10:40:03 -0600 Subject: [PATCH 32/32] Fix benchmark --- asv_bench/benchmarks/polyfit.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py index 7d933e43834..429ffa19baa 100644 --- a/asv_bench/benchmarks/polyfit.py +++ b/asv_bench/benchmarks/polyfit.py @@ -1,3 +1,5 @@ +import numpy as np + import xarray as xr from . import parameterized, randn, requires_dask @@ -10,7 +12,10 @@ class Polyval: def setup(self, *args, **kwargs): self.xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in NX} self.coeffs = { - ndeg: xr.DataArray(randn((ndeg,)), dims="degree") for ndeg in NDEGS + ndeg: xr.DataArray( + randn((ndeg,)), dims="degree", coords={"degree": np.arange(ndeg)} + ) + for ndeg in NDEGS } @parameterized(["nx", "ndeg"], [NX, NDEGS])