Skip to content

Commit

Permalink
API: rolling.apply will pass Series to function
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Apr 11, 2018
1 parent 2794474 commit f131bba
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 96 deletions.
30 changes: 29 additions & 1 deletion doc/source/whatsnew/v0.23.0.txt
Expand Up @@ -60,6 +60,34 @@ The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtyp
pd.get_dummies(df, columns=['c'], dtype=bool).dtypes


.. _whatsnew_0230.enhancements.window_raw:

Rolling/Expanding.apply() accepts a ``raw`` keyword to pass a ``Series`` to the function
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The :func`Series.rolling`, :func:`DataFrame.rolling`, :func`Series.expanding`, :func:`DataFrame.expanding` methods when used with ``.apply()`` have gained a ``raw=None`` parameter.
This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The
default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``.
In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`)

.. ipython:: python

s = pd.Series(np.arange(5), np.arange(5) + 1)
s

Pass a ``Series``:

.. ipython:: python

s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False)

Mimic the original behavior of passing a ndarray:

.. ipython:: python

s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True)


.. _whatsnew_0230.enhancements.merge_on_columns_and_levels:

Merging on a combination of columns and index levels
Expand Down Expand Up @@ -407,7 +435,7 @@ Other Enhancements
- Updated ``to_gbq`` and ``read_gbq`` signature and documentation to reflect changes from
the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ
library. (:issue:`20564`)

.. _whatsnew_0230.api_breaking:

Backwards incompatible API changes
Expand Down
46 changes: 32 additions & 14 deletions pandas/_libs/window.pyx
Expand Up @@ -1432,39 +1432,44 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
return output


def roll_generic(ndarray[float64_t, cast=True] input,
def roll_generic(object obj,
int64_t win, int64_t minp, object index, object closed,
int offset, object func,
int offset, object func, bint raw,
object args, object kwargs):
cdef:
ndarray[double_t] output, counts, bufarr
ndarray[float64_t, cast=True] arr
float64_t *buf
float64_t *oldbuf
int64_t nobs = 0, i, j, s, e, N
bint is_variable
ndarray[int64_t] start, end

if not input.flags.c_contiguous:
input = input.copy('C')

n = len(input)
n = len(obj)
if n == 0:
return input
return obj

arr = np.asarray(obj)

# ndarray input
if raw:
if not arr.flags.c_contiguous:
arr = arr.copy('C')

counts = roll_sum(np.concatenate([np.isfinite(input).astype(float),
counts = roll_sum(np.concatenate([np.isfinite(arr).astype(float),
np.array([0.] * offset)]),
win, minp, index, closed)[offset:]

start, end, N, win, minp, is_variable = get_window_indexer(input, win,
start, end, N, win, minp, is_variable = get_window_indexer(arr, win,
minp, index,
closed,
floor=0)

output = np.empty(N, dtype=float)

if is_variable:
# variable window arr or series

# variable window
if offset != 0:
raise ValueError("unable to roll_generic with a non-zero offset")

Expand All @@ -1473,7 +1478,20 @@ def roll_generic(ndarray[float64_t, cast=True] input,
e = end[i]

if counts[i] >= minp:
output[i] = func(input[s:e], *args, **kwargs)
if raw:
output[i] = func(arr[s:e], *args, **kwargs)
else:
output[i] = func(obj.iloc[s:e], *args, **kwargs)
else:
output[i] = NaN

elif not raw:
# series
for i from 0 <= i < N:
if counts[i] >= minp:
sl = slice(int_max(i + offset - win + 1, 0),
int_min(i + offset + 1, N))
output[i] = func(obj.iloc[sl], *args, **kwargs)
else:
output[i] = NaN

Expand All @@ -1482,12 +1500,12 @@ def roll_generic(ndarray[float64_t, cast=True] input,
# truncated windows at the beginning, through first full-length window
for i from 0 <= i < (int_min(win, N) - offset):
if counts[i] >= minp:
output[i] = func(input[0: (i + offset + 1)], *args, **kwargs)
output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
else:
output[i] = NaN

# remaining full-length windows
buf = <float64_t *> input.data
buf = <float64_t *> arr.data
bufarr = np.empty(win, dtype=float)
oldbuf = <float64_t *> bufarr.data
for i from (win - offset) <= i < (N - offset):
Expand All @@ -1502,7 +1520,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,
# truncated windows at the end
for i from int_max(N - offset, 0) <= i < N:
if counts[i] >= minp:
output[i] = func(input[int_max(i + offset - win + 1, 0): N],
output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
*args,
**kwargs)
else:
Expand Down
48 changes: 38 additions & 10 deletions pandas/core/window.py
Expand Up @@ -314,7 +314,7 @@ def _center_window(self, result, window):
def aggregate(self, arg, *args, **kwargs):
result, how = self._aggregate(arg, *args, **kwargs)
if result is None:
return self.apply(arg, args=args, kwargs=kwargs)
return self.apply(arg, raw=False, args=args, kwargs=kwargs)
return result

agg = aggregate
Expand Down Expand Up @@ -955,22 +955,48 @@ def count(self):
----------
func : function
Must produce a single value from an ndarray input
\*args and \*\*kwargs are passed to the function""")
raw : bool, default None
* ``False`` : passes each row or column as a Series to the
function.
* ``True`` or ``None`` : the passed function will receive ndarray
objects instead.
If you are just applying a NumPy reduction function this will
achieve much better performance.
.. versionadded:: 0.23.0
\*args and \*\*kwargs are passed to the function""")

def apply(self, func, raw=None, args=(), kwargs={}):
from pandas import Series

def apply(self, func, args=(), kwargs={}):
# TODO: _level is unused?
_level = kwargs.pop('_level', None) # noqa
window = self._get_window()
offset = _offset(window, self.center)
index, indexi = self._get_index()

# TODO: default is for backward compat
# change to False in the future
if raw is None:
warnings.warn(
"pass the raw keyword to remain backward compatible "
"for .apply().\nIn the future, this will default to "
"False, meaning a Series will be passed to the "
"applied function. Not passing raw, defaults "
"raw=True, meaning a ndarray is passed to the "
"applied function", FutureWarning, stacklevel=3)
raw = True

def f(arg, window, min_periods, closed):
minp = _use_window(min_periods, window)
return _window.roll_generic(arg, window, minp, indexi, closed,
offset, func, args, kwargs)
if not raw:
arg = Series(arg, index=self.obj.index)
return _window.roll_generic(
arg, window, minp, indexi,
closed, offset, func, raw, args, kwargs)

return self._apply(f, func, args=args, kwargs=kwargs,
center=False)
center=False, raw=raw)

def sum(self, *args, **kwargs):
nv.validate_window_func('sum', args, kwargs)
Expand Down Expand Up @@ -1498,8 +1524,9 @@ def count(self):
@Substitution(name='rolling')
@Appender(_doc_template)
@Appender(_shared_docs['apply'])
def apply(self, func, args=(), kwargs={}):
return super(Rolling, self).apply(func, args=args, kwargs=kwargs)
def apply(self, func, raw=None, args=(), kwargs={}):
return super(Rolling, self).apply(
func, raw=raw, args=args, kwargs=kwargs)

@Substitution(name='rolling')
@Appender(_shared_docs['sum'])
Expand Down Expand Up @@ -1756,8 +1783,9 @@ def count(self, **kwargs):
@Substitution(name='expanding')
@Appender(_doc_template)
@Appender(_shared_docs['apply'])
def apply(self, func, args=(), kwargs={}):
return super(Expanding, self).apply(func, args=args, kwargs=kwargs)
def apply(self, func, raw=None, args=(), kwargs={}):
return super(Expanding, self).apply(
func, raw=raw, args=args, kwargs=kwargs)

@Substitution(name='expanding')
@Appender(_shared_docs['sum'])
Expand Down

0 comments on commit f131bba

Please sign in to comment.