Skip to content

Commit

Permalink
BUG: DataFrame.replace with out of bound datetime causing RecursionEr…
Browse files Browse the repository at this point in the history
…ror (#22108)
  • Loading branch information
minggli authored and jreback committed Aug 1, 2018
1 parent b62c324 commit 57c7daa
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 41 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@ Reshaping
- Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`)
- Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`)
- Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`)
-
- Bug in :meth:`DataFrame.replace` raises RecursionError when converting OutOfBounds ``datetime64[ns, tz]`` (:issue:`20380`)
-

Build Changes
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import warnings

from pandas._libs import tslib, lib, tslibs
from pandas._libs.tslibs import iNaT
from pandas._libs.tslibs import iNaT, OutOfBoundsDatetime
from pandas.compat import string_types, text_type, PY3
from .common import (ensure_object, is_bool, is_integer, is_float,
is_complex, is_datetimetz, is_categorical_dtype,
Expand Down Expand Up @@ -838,7 +838,13 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,

# Soft conversions
if datetime:
values = lib.maybe_convert_objects(values, convert_datetime=datetime)
# GH 20380, when datetime is beyond year 2262, hence outside
# bound of nanosecond-resolution 64-bit integers.
try:
values = lib.maybe_convert_objects(values,
convert_datetime=datetime)
except OutOfBoundsDatetime:
pass

if timedelta and is_object_dtype(values.dtype):
# Object check to ensure only run if previous did not convert
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,12 +802,14 @@ def replace(self, to_replace, value, inplace=False, filter=None,
copy=not inplace) for b in blocks]
return blocks
except (TypeError, ValueError):

# try again with a compatible block
block = self.astype(object)
return block.replace(
to_replace=original_to_replace, value=value, inplace=inplace,
filter=filter, regex=regex, convert=convert)
return block.replace(to_replace=original_to_replace,
value=value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert)

def _replace_single(self, *args, **kwargs):
""" no-op on a non-ObjectBlock """
Expand Down
65 changes: 31 additions & 34 deletions pandas/tests/frame/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,40 +755,37 @@ def test_replace_for_new_dtypes(self):
result = tsframe.fillna(method='bfill')
assert_frame_equal(result, tsframe.fillna(method='bfill'))

def test_replace_dtypes(self):
# int
df = DataFrame({'ints': [1, 2, 3]})
result = df.replace(1, 0)
expected = DataFrame({'ints': [0, 2, 3]})
assert_frame_equal(result, expected)

df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int32)
result = df.replace(1, 0)
expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)
assert_frame_equal(result, expected)

df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int16)
result = df.replace(1, 0)
expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)
assert_frame_equal(result, expected)

# bools
df = DataFrame({'bools': [True, False, True]})
result = df.replace(False, True)
assert result.values.all()

# complex blocks
df = DataFrame({'complex': [1j, 2j, 3j]})
result = df.replace(1j, 0j)
expected = DataFrame({'complex': [0j, 2j, 3j]})
assert_frame_equal(result, expected)

# datetime blocks
prev = datetime.today()
now = datetime.today()
df = DataFrame({'datetime64': Index([prev, now, prev])})
result = df.replace(prev, now)
expected = DataFrame({'datetime64': Index([now] * 3)})
@pytest.mark.parametrize('frame, to_replace, value, expected', [
(DataFrame({'ints': [1, 2, 3]}), 1, 0,
DataFrame({'ints': [0, 2, 3]})),
(DataFrame({'ints': [1, 2, 3]}, dtype=np.int32), 1, 0,
DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)),
(DataFrame({'ints': [1, 2, 3]}, dtype=np.int16), 1, 0,
DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)),
(DataFrame({'bools': [True, False, True]}), False, True,
DataFrame({'bools': [True, True, True]})),
(DataFrame({'complex': [1j, 2j, 3j]}), 1j, 0,
DataFrame({'complex': [0j, 2j, 3j]})),
(DataFrame({'datetime64': Index([datetime(2018, 5, 28),
datetime(2018, 7, 28),
datetime(2018, 5, 28)])}),
datetime(2018, 5, 28), datetime(2018, 7, 28),
DataFrame({'datetime64': Index([datetime(2018, 7, 28)] * 3)})),
# GH 20380
(DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['foo']}),
'foo', 'bar',
DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['bar']})),
(DataFrame({'A': date_range('20130101', periods=3, tz='US/Eastern'),
'B': [0, np.nan, 2]}),
Timestamp('20130102', tz='US/Eastern'),
Timestamp('20130104', tz='US/Eastern'),
DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
Timestamp('20130104', tz='US/Eastern'),
Timestamp('20130103', tz='US/Eastern')],
'B': [0, np.nan, 2]}))
])
def test_replace_dtypes(self, frame, to_replace, value, expected):
result = getattr(frame, 'replace')(to_replace, value)
assert_frame_equal(result, expected)

def test_replace_input_formats_listlike(self):
Expand Down

0 comments on commit 57c7daa

Please sign in to comment.