Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Floor and ceil methods during pandas.eval which are provided by numexpr #24355

Merged
merged 17 commits into from
Dec 30, 2018
Merged
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1440,7 +1440,7 @@ Numeric
- :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`)
- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2:sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`)
- Calls such as :meth:`DataFrame.round` with a non-unique :meth:`CategoricalIndex` now return expected data. Previously, data would be improperly duplicated (:issue:`21809`).
- Added ``log10`` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`)
- Added ``log10``, `floor` and `ceil` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`, :issue:`24353`)
- Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`)
- Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`)
- Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:`24356`).
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/computation/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@

_NUMEXPR_INSTALLED = False
_MIN_NUMEXPR_VERSION = "2.6.1"
_NUMEXPR_VERSION = None

try:
import numexpr as ne
ver = LooseVersion(ne.__version__)
_NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION)
_NUMEXPR_VERSION = ver

if not _NUMEXPR_INSTALLED:
warnings.warn(
Expand All @@ -19,4 +21,4 @@
except ImportError: # pragma: no cover
pass

__all__ = ['_NUMEXPR_INSTALLED']
__all__ = ['_NUMEXPR_INSTALLED', '_NUMEXPR_VERSION']
16 changes: 13 additions & 3 deletions pandas/core/computation/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""

from datetime import datetime
from distutils.version import LooseVersion
from functools import partial
import operator as op

Expand All @@ -23,8 +24,11 @@

_unary_math_ops = ('sin', 'cos', 'exp', 'log', 'expm1', 'log1p',
'sqrt', 'sinh', 'cosh', 'tanh', 'arcsin', 'arccos',
'arctan', 'arccosh', 'arcsinh', 'arctanh', 'abs', 'log10')
'arctan', 'arccosh', 'arcsinh', 'arctanh', 'abs', 'log10',
'floor', 'ceil'
)
_binary_math_ops = ('arctan2',)

_mathops = _unary_math_ops + _binary_math_ops


Expand Down Expand Up @@ -539,11 +543,17 @@ def __unicode__(self):


class FuncNode(object):

def __init__(self, name):
if name not in _mathops:
from pandas.core.computation.check import (_NUMEXPR_INSTALLED,
_NUMEXPR_VERSION)
if name not in _mathops or (
_NUMEXPR_INSTALLED and
_NUMEXPR_VERSION < LooseVersion('2.6.9') and
name in ('floor', 'ceil')
):
raise ValueError(
"\"{0}\" is not a supported function".format(name))

self.name = name
self.func = getattr(np, name)

Expand Down
36 changes: 34 additions & 2 deletions pandas/tests/computation/test_eval.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import warnings
import operator
from distutils.version import LooseVersion
from itertools import product

import pytest
Expand All @@ -14,6 +15,7 @@
from pandas.util.testing import makeCustomDataframe as mkdf

from pandas.core.computation import pytables
from pandas.core.computation.check import _NUMEXPR_VERSION
from pandas.core.computation.engines import _engines, NumExprClobberingError
from pandas.core.computation.expr import PythonExprVisitor, PandasExprVisitor
from pandas.core.computation.expressions import (
Expand All @@ -32,6 +34,7 @@
assert_produces_warning)
from pandas.compat import PY3, reduce


_series_frame_incompatible = _bool_ops_syms
_scalar_skip = 'in', 'not in'

Expand All @@ -54,6 +57,25 @@ def parser(request):
return request.param


@pytest.fixture
def ne_lt_2_6_9():
if _NUMEXPR_INSTALLED and _NUMEXPR_VERSION >= LooseVersion('2.6.9'):
pytest.skip("numexpr is >= 2.6.9")
return 'numexpr'


@pytest.fixture
def unary_fns_for_ne():
if _NUMEXPR_INSTALLED:
if _NUMEXPR_VERSION >= LooseVersion('2.6.9'):
return _unary_math_ops
else:
return tuple(x for x in _unary_math_ops
if x not in ("floor", "ceil"))
else:
pytest.skip("numexpr is not present")


def engine_has_neg_frac(engine):
return _engines[engine].has_neg_frac

Expand Down Expand Up @@ -1622,16 +1644,26 @@ def eval(self, *args, **kwargs):
kwargs['level'] = kwargs.pop('level', 0) + 1
return pd.eval(*args, **kwargs)

def test_unary_functions(self):
def test_unary_functions(self, unary_fns_for_ne):
df = DataFrame({'a': np.random.randn(10)})
a = df.a
for fn in self.unary_fns:

for fn in unary_fns_for_ne:
expr = "{0}(a)".format(fn)
got = self.eval(expr)
with np.errstate(all='ignore'):
expect = getattr(np, fn)(a)
tm.assert_series_equal(got, expect, check_names=False)

def test_floor_and_ceil_functions_raise_error(self,
ne_lt_2_6_9,
unary_fns_for_ne):
for fn in ('floor', 'ceil'):
msg = "\"{0}\" is not a supported function".format(fn)
with pytest.raises(ValueError, match=msg):
expr = "{0}(100)".format(fn)
self.eval(expr)

def test_binary_functions(self):
df = DataFrame({'a': np.random.randn(10),
'b': np.random.randn(10)})
Expand Down