Skip to content

Commit

Permalink
COMPAT: Expand compatibility with fromnumeric.py
Browse files Browse the repository at this point in the history
Expands compatibility with fromnumeric.py in tslib.pyx and
puts checks in window.py, groupby.py, and resample.py to
ensure that pandas functions such as 'mean' are not called
via the numpy library.

Closes gh-12811.
  • Loading branch information
gfyoung committed May 19, 2016
1 parent 070e877 commit eb4762c
Show file tree
Hide file tree
Showing 12 changed files with 316 additions and 49 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.18.2.txt
Expand Up @@ -46,7 +46,8 @@ API changes


- Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`)

- Compatibility with NumPy array methods has been expanded to timestamps (:issue: `12811`)
- An ``UnsupportedFunctionCall`` error is now raised if groupby or resample functions like ``mean`` are called via NumPy (:issue: `12811`)

.. _whatsnew_0182.api.tolist:

Expand Down
76 changes: 75 additions & 1 deletion pandas/compat/numpy/function.py
Expand Up @@ -21,7 +21,7 @@
from numpy import ndarray
from pandas.util.validators import (validate_args, validate_kwargs,
validate_args_and_kwargs)
from pandas.core.common import is_integer
from pandas.core.common import is_integer, UnsupportedFunctionCall
from pandas.compat import OrderedDict


Expand Down Expand Up @@ -245,3 +245,77 @@ def validate_transpose_for_generic(inst, kwargs):
msg += " for {klass} instances".format(klass=klass)

raise ValueError(msg)


def validate_window_func(name, args, kwargs):
numpy_args = ('axis', 'dtype', 'out')
msg = ("numpy operations are not "
"valid with window objects. "
"Use .{func}() directly instead ".format(func=name))

if len(args) > 0:
raise UnsupportedFunctionCall(msg)

for arg in numpy_args:
if arg in kwargs:
raise UnsupportedFunctionCall(msg)


def validate_rolling_func(name, args, kwargs):
numpy_args = ('axis', 'dtype', 'out')
msg = ("numpy operations are not "
"valid with window objects. "
"Use .rolling(...).{func}() instead ".format(func=name))

if len(args) > 0:
raise UnsupportedFunctionCall(msg)

for arg in numpy_args:
if arg in kwargs:
raise UnsupportedFunctionCall(msg)


def validate_expanding_func(name, args, kwargs):
numpy_args = ('axis', 'dtype', 'out')
msg = ("numpy operations are not "
"valid with window objects. "
"Use .expanding(...).{func}() instead ".format(func=name))

if len(args) > 0:
raise UnsupportedFunctionCall(msg)

for arg in numpy_args:
if arg in kwargs:
raise UnsupportedFunctionCall(msg)


def validate_groupby_func(name, args, kwargs):
"""
'args' and 'kwargs' should be empty because all of
their necessary parameters are explicitly listed in
the function signature
"""
if len(args) + len(kwargs) > 0:
raise UnsupportedFunctionCall((
"numpy operations are not valid "
"with groupby. Use .groupby(...)."
"{func}() instead".format(func=name)))

RESAMPLER_NUMPY_OPS = ('min', 'max', 'sum', 'prod',
'mean', 'std', 'var')


def validate_resampler_func(method, args, kwargs):
"""
'args' and 'kwargs' should be empty because all of
their necessary parameters are explicitly listed in
the function signature
"""
if len(args) + len(kwargs) > 0:
if method in RESAMPLER_NUMPY_OPS:
raise UnsupportedFunctionCall((
"numpy operations are not valid "
"with resample. Use .resample(...)."
"{func}() instead".format(func=method)))
else:
raise TypeError("too many arguments passed in")
4 changes: 4 additions & 0 deletions pandas/core/common.py
Expand Up @@ -41,6 +41,10 @@ class AmbiguousIndexError(PandasError, KeyError):
pass


class UnsupportedFunctionCall(ValueError):
pass


class AbstractMethodError(NotImplementedError):
"""Raise this error instead of NotImplementedError for abstract methods
while keeping compatibility with Python 2 and Python 3.
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/generic.py
Expand Up @@ -5299,7 +5299,7 @@ def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f):
@Appender(_num_doc)
def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None,
**kwargs):
nv.validate_stat_func(tuple(), kwargs)
nv.validate_stat_func(tuple(), kwargs, fname=name)
if skipna is None:
skipna = True
if axis is None:
Expand All @@ -5319,7 +5319,7 @@ def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f):
@Appender(_num_ddof_doc)
def stat_func(self, axis=None, skipna=None, level=None, ddof=1,
numeric_only=None, **kwargs):
nv.validate_stat_ddof_func(tuple(), kwargs)
nv.validate_stat_ddof_func(tuple(), kwargs, fname=name)
if skipna is None:
skipna = True
if axis is None:
Expand All @@ -5340,7 +5340,7 @@ def _make_cum_function(cls, name, name1, name2, axis_descr, desc, accum_func,
@Appender("Return cumulative {0} over requested axis.".format(name) +
_cnum_doc)
def cum_func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs):
nv.validate_cum_func(tuple(), kwargs)
nv.validate_cum_func(tuple(), kwargs, fname=name)
if axis is None:
axis = self._stat_axis_number
else:
Expand Down Expand Up @@ -5374,7 +5374,7 @@ def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f):
@Appender(_bool_doc)
def logical_func(self, axis=None, bool_only=None, skipna=None, level=None,
**kwargs):
nv.validate_logical_func(tuple(), kwargs)
nv.validate_logical_func(tuple(), kwargs, fname=name)
if skipna is None:
skipna = True
if axis is None:
Expand Down
19 changes: 12 additions & 7 deletions pandas/core/groupby.py
Expand Up @@ -11,6 +11,7 @@
callable, map
)
from pandas import compat
from pandas.compat.numpy import function as nv
from pandas.compat.numpy import _np_version_under1p8
from pandas.core.base import (PandasObject, SelectionMixin, GroupByError,
DataError, SpecificationError)
Expand Down Expand Up @@ -954,12 +955,13 @@ def count(self):

@Substitution(name='groupby')
@Appender(_doc_template)
def mean(self):
def mean(self, *args, **kwargs):
"""
Compute mean of groups, excluding missing values
For multiple groupings, the result index will be a MultiIndex
"""
nv.validate_groupby_func('mean', args, kwargs)
try:
return self._cython_agg_general('mean')
except GroupByError:
Expand Down Expand Up @@ -993,7 +995,7 @@ def f(x):

@Substitution(name='groupby')
@Appender(_doc_template)
def std(self, ddof=1):
def std(self, ddof=1, *args, **kwargs):
"""
Compute standard deviation of groups, excluding missing values
Expand All @@ -1005,12 +1007,13 @@ def std(self, ddof=1):
degrees of freedom
"""

# todo, implement at cython level?
# TODO: implement at Cython level?
nv.validate_groupby_func('std', args, kwargs)
return np.sqrt(self.var(ddof=ddof))

@Substitution(name='groupby')
@Appender(_doc_template)
def var(self, ddof=1):
def var(self, ddof=1, *args, **kwargs):
"""
Compute variance of groups, excluding missing values
Expand All @@ -1021,7 +1024,7 @@ def var(self, ddof=1):
ddof : integer, default 1
degrees of freedom
"""

nv.validate_groupby_func('var', args, kwargs)
if ddof == 1:
return self._cython_agg_general('var')
else:
Expand Down Expand Up @@ -1317,17 +1320,19 @@ def cumcount(self, ascending=True):

@Substitution(name='groupby')
@Appender(_doc_template)
def cumprod(self, axis=0):
def cumprod(self, axis=0, *args, **kwargs):
"""Cumulative product for each group"""
nv.validate_groupby_func('cumprod', args, kwargs)
if axis != 0:
return self.apply(lambda x: x.cumprod(axis=axis))

return self._cython_transform('cumprod')

@Substitution(name='groupby')
@Appender(_doc_template)
def cumsum(self, axis=0):
def cumsum(self, axis=0, *args, **kwargs):
"""Cumulative sum for each group"""
nv.validate_groupby_func('cumsum', args, kwargs)
if axis != 0:
return self.apply(lambda x: x.cumprod(axis=axis))

Expand Down

0 comments on commit eb4762c

Please sign in to comment.