Skip to content

Commit

Permalink
API: rolling.apply will pass Series to function
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Apr 15, 2018
1 parent b16974a commit 5c5abe5
Show file tree
Hide file tree
Showing 4 changed files with 477 additions and 306 deletions.
32 changes: 32 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,35 @@ The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtyp
pd.get_dummies(df, columns=['c'], dtype=bool).dtypes


.. _whatsnew_0230.enhancements.window_raw:

Rolling/Expanding.apply() accepts a ``raw`` keyword to pass a ``Series`` to the function
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

:func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
:func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` have gained a ``raw=None`` parameter.
This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The
default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``.
In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`)

.. ipython:: python

s = pd.Series(np.arange(5), np.arange(5) + 1)
s

Pass a ``Series``:

.. ipython:: python

s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False)

Mimic the original behavior of passing a ndarray:

.. ipython:: python

s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True)


.. _whatsnew_0230.enhancements.merge_on_columns_and_levels:

Merging on a combination of columns and index levels
Expand Down Expand Up @@ -843,6 +872,9 @@ Deprecations
- ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`)
- ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`)
- The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`).
- :func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
:func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` will show a ``FutureWarning`` if the new
``raw`` parameter is not explicity passed (:issue:`20584`)

.. _whatsnew_0230.prior_deprecations:

Expand Down
46 changes: 32 additions & 14 deletions pandas/_libs/window.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1432,39 +1432,44 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
return output


def roll_generic(ndarray[float64_t, cast=True] input,
def roll_generic(object obj,
int64_t win, int64_t minp, object index, object closed,
int offset, object func,
int offset, object func, bint raw,
object args, object kwargs):
cdef:
ndarray[double_t] output, counts, bufarr
ndarray[float64_t, cast=True] arr
float64_t *buf
float64_t *oldbuf
int64_t nobs = 0, i, j, s, e, N
bint is_variable
ndarray[int64_t] start, end

if not input.flags.c_contiguous:
input = input.copy('C')

n = len(input)
n = len(obj)
if n == 0:
return input
return obj

arr = np.asarray(obj)

# ndarray input
if raw:
if not arr.flags.c_contiguous:
arr = arr.copy('C')

counts = roll_sum(np.concatenate([np.isfinite(input).astype(float),
counts = roll_sum(np.concatenate([np.isfinite(arr).astype(float),
np.array([0.] * offset)]),
win, minp, index, closed)[offset:]

start, end, N, win, minp, is_variable = get_window_indexer(input, win,
start, end, N, win, minp, is_variable = get_window_indexer(arr, win,
minp, index,
closed,
floor=0)

output = np.empty(N, dtype=float)

if is_variable:
# variable window arr or series

# variable window
if offset != 0:
raise ValueError("unable to roll_generic with a non-zero offset")

Expand All @@ -1473,7 +1478,20 @@ def roll_generic(ndarray[float64_t, cast=True] input,
e = end[i]

if counts[i] >= minp:
output[i] = func(input[s:e], *args, **kwargs)
if raw:
output[i] = func(arr[s:e], *args, **kwargs)
else:
output[i] = func(obj.iloc[s:e], *args, **kwargs)
else:
output[i] = NaN

elif not raw:
# series
for i from 0 <= i < N:
if counts[i] >= minp:
sl = slice(int_max(i + offset - win + 1, 0),
int_min(i + offset + 1, N))
output[i] = func(obj.iloc[sl], *args, **kwargs)
else:
output[i] = NaN

Expand All @@ -1482,12 +1500,12 @@ def roll_generic(ndarray[float64_t, cast=True] input,
# truncated windows at the beginning, through first full-length window
for i from 0 <= i < (int_min(win, N) - offset):
if counts[i] >= minp:
output[i] = func(input[0: (i + offset + 1)], *args, **kwargs)
output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
else:
output[i] = NaN

# remaining full-length windows
buf = <float64_t *> input.data
buf = <float64_t *> arr.data
bufarr = np.empty(win, dtype=float)
oldbuf = <float64_t *> bufarr.data
for i from (win - offset) <= i < (N - offset):
Expand All @@ -1502,7 +1520,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,
# truncated windows at the end
for i from int_max(N - offset, 0) <= i < N:
if counts[i] >= minp:
output[i] = func(input[int_max(i + offset - win + 1, 0): N],
output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
*args,
**kwargs)
else:
Expand Down
54 changes: 43 additions & 11 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def _center_window(self, result, window):
def aggregate(self, arg, *args, **kwargs):
result, how = self._aggregate(arg, *args, **kwargs)
if result is None:
return self.apply(arg, args=args, kwargs=kwargs)
return self.apply(arg, raw=False, args=args, kwargs=kwargs)
return result

agg = aggregate
Expand Down Expand Up @@ -954,23 +954,53 @@ def count(self):
Parameters
----------
func : function
Must produce a single value from an ndarray input
\*args and \*\*kwargs are passed to the function""")
Must produce a single value from an ndarray input if raw=True
or a Series if raw=False
raw : bool, default None
* ``False`` : passes each row or column as a Series to the
function.
* ``True`` or ``None`` : the passed function will receive ndarray
objects instead.
If you are just applying a NumPy reduction function this will
achieve much better performance.
The raw parameter is required and will show a FutureWarning if
not passed. In the futures raw will default to False.
.. versionadded:: 0.23.0
\*args and \*\*kwargs are passed to the function""")

def apply(self, func, raw=None, args=(), kwargs={}):
from pandas import Series

def apply(self, func, args=(), kwargs={}):
# TODO: _level is unused?
_level = kwargs.pop('_level', None) # noqa
window = self._get_window()
offset = _offset(window, self.center)
index, indexi = self._get_index()

# TODO: default is for backward compat
# change to False in the future
if raw is None:
warnings.warn(
"raw defaults to False "
"meaning a Series will be passed to the "
"applied function. In the future raw will default to True "
"meaning a ndarray is passed to the "
"applied function", FutureWarning, stacklevel=3)
raw = True

def f(arg, window, min_periods, closed):
minp = _use_window(min_periods, window)
return _window.roll_generic(arg, window, minp, indexi, closed,
offset, func, args, kwargs)
if not raw:
arg = Series(arg, index=self.obj.index)
return _window.roll_generic(
arg, window, minp, indexi,
closed, offset, func, raw, args, kwargs)

return self._apply(f, func, args=args, kwargs=kwargs,
center=False)
center=False, raw=raw)

def sum(self, *args, **kwargs):
nv.validate_window_func('sum', args, kwargs)
Expand Down Expand Up @@ -1498,8 +1528,9 @@ def count(self):
@Substitution(name='rolling')
@Appender(_doc_template)
@Appender(_shared_docs['apply'])
def apply(self, func, args=(), kwargs={}):
return super(Rolling, self).apply(func, args=args, kwargs=kwargs)
def apply(self, func, raw=None, args=(), kwargs={}):
return super(Rolling, self).apply(
func, raw=raw, args=args, kwargs=kwargs)

@Substitution(name='rolling')
@Appender(_shared_docs['sum'])
Expand Down Expand Up @@ -1756,8 +1787,9 @@ def count(self, **kwargs):
@Substitution(name='expanding')
@Appender(_doc_template)
@Appender(_shared_docs['apply'])
def apply(self, func, args=(), kwargs={}):
return super(Expanding, self).apply(func, args=args, kwargs=kwargs)
def apply(self, func, raw=None, args=(), kwargs={}):
return super(Expanding, self).apply(
func, raw=raw, args=args, kwargs=kwargs)

@Substitution(name='expanding')
@Appender(_shared_docs['sum'])
Expand Down
Loading

0 comments on commit 5c5abe5

Please sign in to comment.