diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 306083e9c22b2..5f8cdb2a0bdac 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -50,20 +50,24 @@ class Engine: ["int", "float"], [np.sum, lambda x: np.sum(x) + 5], ["cython", "numba"], + ["sum", "max", "min", "median", "mean"], ) - param_names = ["constructor", "dtype", "function", "engine"] + param_names = ["constructor", "dtype", "function", "engine", "method"] - def setup(self, constructor, dtype, function, engine): + def setup(self, constructor, dtype, function, engine, method): N = 10 ** 3 arr = (100 * np.random.random(N)).astype(dtype) self.data = getattr(pd, constructor)(arr) - def time_rolling_apply(self, constructor, dtype, function, engine): + def time_rolling_apply(self, constructor, dtype, function, engine, method): self.data.rolling(10).apply(function, raw=True, engine=engine) - def time_expanding_apply(self, constructor, dtype, function, engine): + def time_expanding_apply(self, constructor, dtype, function, engine, method): self.data.expanding().apply(function, raw=True, engine=engine) + def time_rolling_methods(self, constructor, dtype, function, engine, method): + getattr(self.data.rolling(10), method)(engine=engine) + class ExpandingMethods: diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index 08641bc5b17ae..9db4a4bb873bd 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -321,6 +321,10 @@ Numba will be applied in potentially two routines: #. If ``func`` is a standard Python function, the engine will `JIT `__ the passed function. ``func`` can also be a JITed function in which case the engine will not JIT the function again. #. The engine will JIT the for loop where the apply function is applied to each window. +.. versionadded:: 1.3 + +``mean``, ``median``, ``max``, ``min``, and ``sum`` also support the ``engine`` and ``engine_kwargs`` arguments. + The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the `numba.jit decorator `__. These keyword arguments will be applied to *both* the passed function (if a standard Python function) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b4b98ec0403a8..1760e773c7b93 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -52,6 +52,7 @@ Other enhancements - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`) - :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`) +- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 81aa6699c3c61..1f0c16fb5aa8f 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -172,33 +172,33 @@ def apply( @Substitution(name="expanding") @Appender(_shared_docs["sum"]) - def sum(self, *args, **kwargs): + def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("sum", args, kwargs) - return super().sum(*args, **kwargs) + return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding", func_name="max") @Appender(_doc_template) @Appender(_shared_docs["max"]) - def max(self, *args, **kwargs): + def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("max", args, kwargs) - return super().max(*args, **kwargs) + return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["min"]) - def min(self, *args, **kwargs): + def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("min", args, kwargs) - return super().min(*args, **kwargs) + return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["mean"]) - def mean(self, *args, **kwargs): + def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("mean", args, kwargs) - return super().mean(*args, **kwargs) + return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["median"]) - def median(self, **kwargs): - return super().median(**kwargs) + def median(self, engine=None, engine_kwargs=None, **kwargs): + return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding", versionadded="") @Appender(_shared_docs["std"]) @@ -256,9 +256,16 @@ def kurt(self, **kwargs): @Substitution(name="expanding") @Appender(_shared_docs["quantile"]) - def quantile(self, quantile, interpolation="linear", **kwargs): + def quantile( + self, + quantile, + interpolation="linear", + **kwargs, + ): return super().quantile( - quantile=quantile, interpolation=interpolation, **kwargs + quantile=quantile, + interpolation=interpolation, + **kwargs, ) @Substitution(name="expanding", func_name="cov") diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index db8a48300206b..7ae1e61d426b9 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1241,6 +1241,7 @@ def count(self): objects instead. If you are just applying a NumPy reduction function this will achieve much better performance. + engine : str, default None * ``'cython'`` : Runs rolling apply through C-extensions from cython. * ``'numba'`` : Runs rolling apply through JIT compiled code from numba. @@ -1351,8 +1352,21 @@ def apply_func(values, begin, end, min_periods, raw=raw): return apply_func - def sum(self, *args, **kwargs): + def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("sum", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nansum with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) + return self.apply( + np.nansum, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + ) window_func = window_aggregations.roll_sum return self._apply(window_func, name="sum", **kwargs) @@ -1362,13 +1376,43 @@ def sum(self, *args, **kwargs): Parameters ---------- - *args, **kwargs - Arguments and keyword arguments to be passed into func. + engine : str, default None + * ``'cython'`` : Runs rolling max through C-extensions from cython. + * ``'numba'`` : Runs rolling max through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.3.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` + + .. versionadded:: 1.3.0 + + **kwargs + For compatibility with other %(name)s methods. Has no effect on + the result. """ ) - def max(self, *args, **kwargs): + def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("max", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nanmax with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) + return self.apply( + np.nanmax, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + ) window_func = window_aggregations.roll_max return self._apply(window_func, name="max", **kwargs) @@ -1378,8 +1422,25 @@ def max(self, *args, **kwargs): Parameters ---------- + engine : str, default None + * ``'cython'`` : Runs rolling min through C-extensions from cython. + * ``'numba'`` : Runs rolling min through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.3.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` + + .. versionadded:: 1.3.0 + **kwargs - Under Review. + For compatibility with other %(name)s methods. Has no effect on + the result. Returns ------- @@ -1409,13 +1470,39 @@ def max(self, *args, **kwargs): """ ) - def min(self, *args, **kwargs): + def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("min", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nanmin with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) + return self.apply( + np.nanmin, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + ) window_func = window_aggregations.roll_min return self._apply(window_func, name="min", **kwargs) - def mean(self, *args, **kwargs): + def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("mean", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nanmean with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) + return self.apply( + np.nanmean, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + ) window_func = window_aggregations.roll_mean return self._apply(window_func, name="mean", **kwargs) @@ -1425,9 +1512,25 @@ def mean(self, *args, **kwargs): Parameters ---------- + engine : str, default None + * ``'cython'`` : Runs rolling median through C-extensions from cython. + * ``'numba'`` : Runs rolling median through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.3.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` + + .. versionadded:: 1.3.0 + **kwargs For compatibility with other %(name)s methods. Has no effect - on the computed median. + on the computed result. Returns ------- @@ -1456,10 +1559,21 @@ def mean(self, *args, **kwargs): """ ) - def median(self, **kwargs): + def median(self, engine=None, engine_kwargs=None, **kwargs): + if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nanmedian with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) + return self.apply( + np.nanmedian, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + ) window_func = window_aggregations.roll_median_c - # GH 32865. Move max window size calculation to - # the median function implementation return self._apply(window_func, name="median", **kwargs) def std(self, ddof: int = 1, *args, **kwargs): @@ -1492,7 +1606,8 @@ def var(self, ddof: int = 1, *args, **kwargs): Parameters ---------- **kwargs - Keyword arguments to be passed into func. + For compatibility with other %(name)s methods. Has no effect on + the result. """ def skew(self, **kwargs): @@ -1512,7 +1627,8 @@ def skew(self, **kwargs): Parameters ---------- **kwargs - Under Review. + For compatibility with other %(name)s methods. Has no effect on + the result. Returns ------- @@ -1604,6 +1720,7 @@ def kurt(self, **kwargs): ---------- quantile : float Quantile to compute. 0 <= quantile <= 1. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to use, when the desired quantile lies between two data points `i` and `j`: @@ -1614,6 +1731,23 @@ def kurt(self, **kwargs): * higher: `j`. * nearest: `i` or `j` whichever is nearest. * midpoint: (`i` + `j`) / 2. + + engine : str, default None + * ``'cython'`` : Runs rolling quantile through C-extensions from cython. + * ``'numba'`` : Runs rolling quantile through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.3.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` + + .. versionadded:: 1.3.0 + **kwargs For compatibility with other %(name)s methods. Has no effect on the result. @@ -1995,33 +2129,33 @@ def apply( @Substitution(name="rolling") @Appender(_shared_docs["sum"]) - def sum(self, *args, **kwargs): + def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("sum", args, kwargs) - return super().sum(*args, **kwargs) + return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling", func_name="max") @Appender(_doc_template) @Appender(_shared_docs["max"]) - def max(self, *args, **kwargs): + def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("max", args, kwargs) - return super().max(*args, **kwargs) + return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling") @Appender(_shared_docs["min"]) - def min(self, *args, **kwargs): + def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("min", args, kwargs) - return super().min(*args, **kwargs) + return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling") @Appender(_shared_docs["mean"]) - def mean(self, *args, **kwargs): + def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("mean", args, kwargs) - return super().mean(*args, **kwargs) + return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling") @Appender(_shared_docs["median"]) - def median(self, **kwargs): - return super().median(**kwargs) + def median(self, engine=None, engine_kwargs=None, **kwargs): + return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling", versionadded="") @Appender(_shared_docs["std"]) @@ -2081,7 +2215,9 @@ def kurt(self, **kwargs): @Appender(_shared_docs["quantile"]) def quantile(self, quantile, interpolation="linear", **kwargs): return super().quantile( - quantile=quantile, interpolation=interpolation, **kwargs + quantile=quantile, + interpolation=interpolation, + **kwargs, ) @Substitution(name="rolling", func_name="cov") diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index a765f268cfb07..70bead489d2c6 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -47,12 +47,26 @@ def win_types_special(request): "kurt", "skew", "count", + "sem", ] ) def arithmetic_win_operators(request): return request.param +@pytest.fixture( + params=[ + "sum", + "mean", + "median", + "max", + "min", + ] +) +def arithmetic_numba_supported_operators(request): + return request.param + + @pytest.fixture(params=["right", "left", "both", "neither"]) def closed(request): return request.param diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 4d22495e6c69a..9d9c216801d73 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -12,9 +12,9 @@ @td.skip_if_no("numba", "0.46.0") @pytest.mark.filterwarnings("ignore:\\nThe keyword argument") # Filter warnings when parallel=True and the function can't be parallelized by Numba -class TestRollingApply: +class TestEngine: @pytest.mark.parametrize("jit", [True, False]) - def test_numba_vs_cython(self, jit, nogil, parallel, nopython, center): + def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center): def f(x, *args): arg_sum = 0 for arg in args: @@ -38,8 +38,47 @@ def f(x, *args): ) tm.assert_series_equal(result, expected) + def test_numba_vs_cython_rolling_methods( + self, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + + method = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(5)) + roll = df.rolling(2) + result = getattr(roll, method)(engine="numba", engine_kwargs=engine_kwargs) + expected = getattr(roll, method)(engine="cython") + + # Check the cache + assert (getattr(np, f"nan{method}"), "Rolling_apply_single") in NUMBA_FUNC_CACHE + + tm.assert_frame_equal(result, expected) + + def test_numba_vs_cython_expanding_methods( + self, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + + method = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(5)) + expand = df.expanding() + result = getattr(expand, method)(engine="numba", engine_kwargs=engine_kwargs) + expected = getattr(expand, method)(engine="cython") + + # Check the cache + assert ( + getattr(np, f"nan{method}"), + "Expanding_apply_single", + ) in NUMBA_FUNC_CACHE + + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("jit", [True, False]) - def test_cache(self, jit, nogil, parallel, nopython): + def test_cache_apply(self, jit, nogil, parallel, nopython): # Test that the functions are cached correctly if we switch functions def func_1(x): return np.mean(x) + 4 @@ -138,7 +177,27 @@ def f(x): f, engine="numba", raw=True ) - def test_table_method_rolling(self, axis, nogil, parallel, nopython): + @pytest.mark.xfail( + raises=NotImplementedError, reason="method='table' is not supported." + ) + def test_table_method_rolling_methods( + self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + method = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(3)) + + result = getattr( + df.rolling(2, method="table", axis=axis, min_periods=0), method + )(engine_kwargs=engine_kwargs, engine="numba") + expected = getattr( + df.rolling(2, method="single", axis=axis, min_periods=0), method + )(engine_kwargs=engine_kwargs, engine="numba") + tm.assert_frame_equal(result, expected) + + def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): @@ -173,7 +232,7 @@ def weighted_mean(x): ) tm.assert_frame_equal(result, expected) - def test_table_method_expanding(self, axis, nogil, parallel, nopython): + def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): @@ -187,3 +246,23 @@ def f(x): f, raw=True, engine_kwargs=engine_kwargs, engine="numba" ) tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail( + raises=NotImplementedError, reason="method='table' is not supported." + ) + def test_table_method_expanding_methods( + self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + method = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(3)) + + result = getattr(df.expanding(method="table", axis=axis), method)( + engine_kwargs=engine_kwargs, engine="numba" + ) + expected = getattr(df.expanding(method="single", axis=axis), method)( + engine_kwargs=engine_kwargs, engine="numba" + ) + tm.assert_frame_equal(result, expected)