Skip to content

Commit

Permalink
TST: fix and test index division by zero
Browse files Browse the repository at this point in the history
Related: #19336

Author: Brock Mendel <jbrockmendel@gmail.com>

Closes #19347 from jbrockmendel/div_zero2 and squashes the following commits:

be1e2e1 [Brock Mendel] move fixture to conftest
64b0c08 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2
aa969f8 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2
000aefd [Brock Mendel] fix long again
9de356a [Brock Mendel] revert fixture to fix test_range failures
b8cf21d [Brock Mendel] flake8 remove unused import
afedba9 [Brock Mendel] whatsnew clarification
b51c2e1 [Brock Mendel] fixturize
37efd51 [Brock Mendel] make zero a fixture
965f721 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2
d648ef6 [Brock Mendel] requested edits
1ef3a6c [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2
78de1a4 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2
0277d9f [Brock Mendel] add ipython output to whatsnew
5d7e3ea [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2
ea75c3c [Brock Mendel] ipython block
6fc61bd [Brock Mendel] elaborate docstring
ca3bf42 [Brock Mendel] Whatsnew section
cd54349 [Brock Mendel] move dispatch_missing to core.missing
06df02a [Brock Mendel] py3 fix
84c74c5 [Brock Mendel] remove operator.div for py3
6acc2f7 [Brock Mendel] fix missing import
e0e89b9 [Brock Mendel] fix and and tests for divmod
969f342 [Brock Mendel] fix and test index division by zero
  • Loading branch information
jbrockmendel authored and jreback committed Feb 6, 2018
1 parent a01f74c commit ed10bf6
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 19 deletions.
44 changes: 44 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Expand Up @@ -204,6 +204,50 @@ Please note that the string `index` is not supported with the round trip format,
new_df
print(new_df.index.name)

.. _whatsnew_0230.enhancements.index_division_by_zero:

Index Division By Zero Fills Correctly
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`)

Previous Behavior:

.. code-block:: ipython

In [6]: index = pd.Int64Index([-1, 0, 1])

In [7]: index / 0
Out[7]: Int64Index([0, 0, 0], dtype='int64')

# Previous behavior yielded different results depending on the type of zero in the divisor
In [8]: index / 0.0
Out[8]: Float64Index([-inf, nan, inf], dtype='float64')

In [9]: index = pd.UInt64Index([0, 1])

In [10]: index / np.array([0, 0], dtype=np.uint64)
Out[10]: UInt64Index([0, 0], dtype='uint64')

In [11]: pd.RangeIndex(1, 5) / 0
ZeroDivisionError: integer division or modulo by zero

Current Behavior:

.. ipython:: python

index = pd.Int64Index([-1, 0, 1])
# division by zero gives -infinity where negative, +infinity where positive, and NaN for 0 / 0
index / 0

# The result of division by zero should not depend on whether the zero is int or float
index / 0.0

index = pd.UInt64Index([0, 1])
index / np.array([0, 0], dtype=np.uint64)

pd.RangeIndex(1, 5) / 0

.. _whatsnew_0230.enhancements.other:

Other Enhancements
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/base.py
Expand Up @@ -4040,6 +4040,8 @@ def _evaluate_numeric_binop(self, other):
attrs = self._maybe_update_attributes(attrs)
with np.errstate(all='ignore'):
result = op(values, other)

result = missing.dispatch_missing(op, values, other, result)
return constructor(result, **attrs)

return _evaluate_numeric_binop
Expand Down
31 changes: 13 additions & 18 deletions pandas/core/indexes/range.py
Expand Up @@ -550,7 +550,7 @@ def __getitem__(self, key):
return super_getitem(key)

def __floordiv__(self, other):
if is_integer(other):
if is_integer(other) and other != 0:
if (len(self) == 0 or
self._start % other == 0 and
self._step % other == 0):
Expand Down Expand Up @@ -592,26 +592,27 @@ def _evaluate_numeric_binop(self, other):
attrs = self._get_attributes_dict()
attrs = self._maybe_update_attributes(attrs)

left, right = self, other
if reversed:
self, other = other, self
left, right = right, left

try:
# apply if we have an override
if step:
with np.errstate(all='ignore'):
rstep = step(self._step, other)
rstep = step(left._step, right)

# we don't have a representable op
# so return a base index
if not is_integer(rstep) or not rstep:
raise ValueError

else:
rstep = self._step
rstep = left._step

with np.errstate(all='ignore'):
rstart = op(self._start, other)
rstop = op(self._stop, other)
rstart = op(left._start, right)
rstop = op(left._stop, right)

result = RangeIndex(rstart,
rstop,
Expand All @@ -627,18 +628,12 @@ def _evaluate_numeric_binop(self, other):

return result

except (ValueError, TypeError, AttributeError):
pass

# convert to Int64Index ops
if isinstance(self, RangeIndex):
self = self.values
if isinstance(other, RangeIndex):
other = other.values

with np.errstate(all='ignore'):
results = op(self, other)
return Index(results, **attrs)
except (ValueError, TypeError, AttributeError,
ZeroDivisionError):
# Defer to Int64Index implementation
if reversed:
return op(other, self._int64index)
return op(self._int64index, other)

return _evaluate_numeric_binop

Expand Down
82 changes: 82 additions & 0 deletions pandas/core/missing.py
@@ -1,6 +1,7 @@
"""
Routines for filling missing data
"""
import operator

import numpy as np
from distutils.version import LooseVersion
Expand Down Expand Up @@ -650,6 +651,87 @@ def fill_zeros(result, x, y, name, fill):
return result


def mask_zero_div_zero(x, y, result, copy=False):
"""
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
of the numerator or the denominator.
Parameters
----------
x : ndarray
y : ndarray
result : ndarray
copy : bool (default False)
Whether to always create a new array or try to fill in the existing
array if possible.
Returns
-------
filled_result : ndarray
Examples
--------
>>> x = np.array([1, 0, -1], dtype=np.int64)
>>> y = 0 # int 0; numpy behavior is different with float
>>> result = x / y
>>> result # raw numpy result does not fill division by zero
array([0, 0, 0])
>>> mask_zero_div_zero(x, y, result)
array([ inf, nan, -inf])
"""
if is_scalar(y):
y = np.array(y)

zmask = y == 0
if zmask.any():
shape = result.shape

nan_mask = (zmask & (x == 0)).ravel()
neginf_mask = (zmask & (x < 0)).ravel()
posinf_mask = (zmask & (x > 0)).ravel()

if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
result = result.astype('float64', copy=copy).ravel()

np.putmask(result, nan_mask, np.nan)
np.putmask(result, posinf_mask, np.inf)
np.putmask(result, neginf_mask, -np.inf)

result = result.reshape(shape)

return result


def dispatch_missing(op, left, right, result):
"""
Fill nulls caused by division by zero, casting to a diffferent dtype
if necessary.
Parameters
----------
op : function (operator.add, operator.div, ...)
left : object (Index for non-reversed ops)
right : object (Index fof reversed ops)
result : ndarray
Returns
-------
result : ndarray
"""
opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__')
if op in [operator.truediv, operator.floordiv,
getattr(operator, 'div', None)]:
result = mask_zero_div_zero(left, right, result)
elif op is operator.mod:
result = fill_zeros(result, left, right, opstr, np.nan)
elif op is divmod:
res0 = mask_zero_div_zero(left, right, result[0])
res1 = fill_zeros(result[1], left, right, opstr, np.nan)
result = (res0, res1)
return result


def _interp_limit(invalid, fw_limit, bw_limit):
"""
Get indexers of values that won't be filled
Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/indexes/conftest.py
@@ -1,9 +1,10 @@
import pytest
import numpy as np
import pandas as pd

import pandas.util.testing as tm
from pandas.core.indexes.api import Index, MultiIndex
from pandas.compat import lzip
from pandas.compat import lzip, long


@pytest.fixture(params=[tm.makeUnicodeIndex(100),
Expand All @@ -29,3 +30,18 @@ def indices(request):
def one(request):
# zero-dim integer array behaves like an integer
return request.param


zeros = [box([0] * 5, dtype=dtype)
for box in [pd.Index, np.array]
for dtype in [np.int64, np.uint64, np.float64]]
zeros.extend([np.array(0, dtype=dtype)
for dtype in [np.int64, np.uint64, np.float64]])
zeros.extend([0, 0.0, long(0)])


@pytest.fixture(params=zeros)
def zero(request):
# For testing division by (or of) zero for Index with length 5, this
# gives several scalar-zeros and length-5 vector-zeros
return request.param
42 changes: 42 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Expand Up @@ -157,6 +157,48 @@ def test_divmod_series(self):
for r, e in zip(result, expected):
tm.assert_series_equal(r, e)

def test_div_zero(self, zero):
idx = self.create_index()

expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
dtype=np.float64)
result = idx / zero
tm.assert_index_equal(result, expected)
ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8')
tm.assert_series_equal(ser_compat, Series(result))

def test_floordiv_zero(self, zero):
idx = self.create_index()
expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
dtype=np.float64)

result = idx // zero
tm.assert_index_equal(result, expected)
ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8')
tm.assert_series_equal(ser_compat, Series(result))

def test_mod_zero(self, zero):
idx = self.create_index()

expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan],
dtype=np.float64)
result = idx % zero
tm.assert_index_equal(result, expected)
ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8')
tm.assert_series_equal(ser_compat, Series(result))

def test_divmod_zero(self, zero):
idx = self.create_index()

exleft = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
dtype=np.float64)
exright = Index([np.nan, np.nan, np.nan, np.nan, np.nan],
dtype=np.float64)

result = divmod(idx, zero)
tm.assert_index_equal(result[0], exleft)
tm.assert_index_equal(result[1], exright)

def test_explicit_conversions(self):

# GH 8608
Expand Down

0 comments on commit ed10bf6

Please sign in to comment.