diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e3e1b35f89cbb..e9d4225c3dbd9 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -652,7 +652,7 @@ Reshaping - Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) - Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) - Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`) -- +- Bug in :meth:`DataFrame.replace` raises RecursionError when converting OutOfBounds ``datetime64[ns, tz]`` (:issue:`20380`) - Build Changes diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e369679d2146f..3971e90e64a14 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -6,7 +6,7 @@ import warnings from pandas._libs import tslib, lib, tslibs -from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs import iNaT, OutOfBoundsDatetime from pandas.compat import string_types, text_type, PY3 from .common import (ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, @@ -838,7 +838,13 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, # Soft conversions if datetime: - values = lib.maybe_convert_objects(values, convert_datetime=datetime) + # GH 20380, when datetime is beyond year 2262, hence outside + # bound of nanosecond-resolution 64-bit integers. + try: + values = lib.maybe_convert_objects(values, + convert_datetime=datetime) + except OutOfBoundsDatetime: + pass if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0f3ffb8055330..8ee91ded4ab7a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -802,12 +802,14 @@ def replace(self, to_replace, value, inplace=False, filter=None, copy=not inplace) for b in blocks] return blocks except (TypeError, ValueError): - # try again with a compatible block block = self.astype(object) - return block.replace( - to_replace=original_to_replace, value=value, inplace=inplace, - filter=filter, regex=regex, convert=convert) + return block.replace(to_replace=original_to_replace, + value=value, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert) def _replace_single(self, *args, **kwargs): """ no-op on a non-ObjectBlock """ diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 68d799c55637c..227484abb82c1 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -755,40 +755,37 @@ def test_replace_for_new_dtypes(self): result = tsframe.fillna(method='bfill') assert_frame_equal(result, tsframe.fillna(method='bfill')) - def test_replace_dtypes(self): - # int - df = DataFrame({'ints': [1, 2, 3]}) - result = df.replace(1, 0) - expected = DataFrame({'ints': [0, 2, 3]}) - assert_frame_equal(result, expected) - - df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int32) - result = df.replace(1, 0) - expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int32) - assert_frame_equal(result, expected) - - df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int16) - result = df.replace(1, 0) - expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int16) - assert_frame_equal(result, expected) - - # bools - df = DataFrame({'bools': [True, False, True]}) - result = df.replace(False, True) - assert result.values.all() - - # complex blocks - df = DataFrame({'complex': [1j, 2j, 3j]}) - result = df.replace(1j, 0j) - expected = DataFrame({'complex': [0j, 2j, 3j]}) - assert_frame_equal(result, expected) - - # datetime blocks - prev = datetime.today() - now = datetime.today() - df = DataFrame({'datetime64': Index([prev, now, prev])}) - result = df.replace(prev, now) - expected = DataFrame({'datetime64': Index([now] * 3)}) + @pytest.mark.parametrize('frame, to_replace, value, expected', [ + (DataFrame({'ints': [1, 2, 3]}), 1, 0, + DataFrame({'ints': [0, 2, 3]})), + (DataFrame({'ints': [1, 2, 3]}, dtype=np.int32), 1, 0, + DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)), + (DataFrame({'ints': [1, 2, 3]}, dtype=np.int16), 1, 0, + DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)), + (DataFrame({'bools': [True, False, True]}), False, True, + DataFrame({'bools': [True, True, True]})), + (DataFrame({'complex': [1j, 2j, 3j]}), 1j, 0, + DataFrame({'complex': [0j, 2j, 3j]})), + (DataFrame({'datetime64': Index([datetime(2018, 5, 28), + datetime(2018, 7, 28), + datetime(2018, 5, 28)])}), + datetime(2018, 5, 28), datetime(2018, 7, 28), + DataFrame({'datetime64': Index([datetime(2018, 7, 28)] * 3)})), + # GH 20380 + (DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['foo']}), + 'foo', 'bar', + DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['bar']})), + (DataFrame({'A': date_range('20130101', periods=3, tz='US/Eastern'), + 'B': [0, np.nan, 2]}), + Timestamp('20130102', tz='US/Eastern'), + Timestamp('20130104', tz='US/Eastern'), + DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'), + Timestamp('20130104', tz='US/Eastern'), + Timestamp('20130103', tz='US/Eastern')], + 'B': [0, np.nan, 2]})) + ]) + def test_replace_dtypes(self, frame, to_replace, value, expected): + result = getattr(frame, 'replace')(to_replace, value) assert_frame_equal(result, expected) def test_replace_input_formats_listlike(self):