diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b1dc78bbf8020..58730ab38cb31 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -737,6 +737,7 @@ Other Deprecations - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`) - Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`) - Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`) +- Deprecated reindexing with a ``fill_value`` that cannot be held by the original object's dtype; explicitly cast before reindexing instead (:issue:`53910`) - Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`) - Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`) - Deprecated the 'inplace' keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 4d33b01f616cc..8dd801e656333 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -6,6 +6,7 @@ cast, overload, ) +import warnings import numpy as np @@ -13,6 +14,8 @@ algos as libalgos, lib, ) +from pandas.errors import Pandas4Warning +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( @@ -94,6 +97,15 @@ def take_nd( fill_value = na_value_for_dtype(arr.dtype, compat=False) elif lib.is_np_dtype(arr.dtype, "mM"): dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype: + # GH#53910 + warnings.warn( + "reindexing with a fill_value that cannot be held by the " + "original dtype is deprecated. Explicitly cast to a common " + f"dtype (in this case {dtype}) instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) if arr.dtype != dtype: # EA.take is strict about returning a new object of the same type # so for that case cast upfront @@ -185,6 +197,10 @@ def take_2d_multi( indexer = row_idx, col_idx mask_info = None + if lib.is_float(fill_value) and fill_value.is_integer(): + # Avoid warning if possible + fill_value = int(fill_value) + # check for promotion based on types only (do this first because # it's faster than computing a mask) dtype, fill_value = maybe_promote(arr.dtype, fill_value) @@ -202,6 +218,20 @@ def take_2d_multi( # to crash when trying to cast it to dtype) dtype, fill_value = arr.dtype, arr.dtype.type() + if dtype != arr.dtype and not ( + arr.dtype.kind in "iub" + and lib.is_float(fill_value) + and np.isnan(fill_value) + ): + # GH#53910 + warnings.warn( + "reindexing with a fill_value that cannot be held by the " + "original dtype is deprecated. Explicitly cast to a common " + f"dtype (in this case {dtype}) instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + # at this point, it's guaranteed that dtype can hold both the arr values # and the fill_value out_shape = len(row_idx), len(col_idx) @@ -528,8 +558,20 @@ def _take_preprocess_indexer_and_fill_value( else: # check for promotion based on types only (do this first because # it's faster than computing a mask) + if lib.is_float(fill_value) and fill_value.is_integer(): + # Avoid warning if possible + fill_value = int(fill_value) dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype: + if not (lib.is_float(fill_value) and np.isnan(fill_value)): + # GH#53910 + warnings.warn( + "reindexing with a fill_value that cannot be held by the " + "original dtype is deprecated. Explicitly cast to a common " + f"dtype (in this case {dtype}) instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) # check if promotion is actually required based on indexer if mask is not None: needs_masking = True diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b542ca1f431c3..035e40cfa43eb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5292,14 +5292,6 @@ def reindex( IE10 404 0.08 Chrome 200 0.02 - >>> df.reindex(new_index, fill_value="missing") - http_status response_time - Safari 404 0.07 - Iceweasel missing missing - Comodo Dragon missing missing - IE10 404 0.08 - Chrome 200 0.02 - We can also reindex the columns. >>> df.reindex(columns=["http_status", "user_agent"]) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 93b615fe7b101..fe9055641ef54 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -69,6 +69,7 @@ from pandas.errors import ( DuplicateLabelError, InvalidIndexError, + Pandas4Warning, ) from pandas.util._decorators import ( Appender, @@ -7888,11 +7889,17 @@ def get_values_for_csv( """ if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm": # GH#40754 Convert categorical datetimes to datetime array - values = algos.take_nd( - values.categories._values, - ensure_platform_int(values._codes), - fill_value=na_rep, - ) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "reindexing with a fill_value that cannot be held", + Pandas4Warning, + ) + values = algos.take_nd( + values.categories._values, + ensure_platform_int(values._codes), + fill_value=na_rep, + ) values = ensure_wrapped_if_datetimelike(values) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 3973f47aed240..982b02d7a5cab 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -168,7 +168,10 @@ def test_reindex_date_fill_value(self): ts = df.iloc[0, 0] fv = ts.date() - res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv) + msg = "reindexing with a fill_value that cannot be held" + + with tm.assert_produces_warning(Pandas4Warning, match=msg): + res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv) expected = DataFrame( {"A": df["A"].tolist() + [fv], "B": df["B"].tolist() + [fv], "C": [fv] * 4}, @@ -177,7 +180,8 @@ def test_reindex_date_fill_value(self): tm.assert_frame_equal(res, expected) # only reindexing rows - res = df.reindex(index=range(4), fill_value=fv) + with tm.assert_produces_warning(Pandas4Warning, match=msg): + res = df.reindex(index=range(4), fill_value=fv) tm.assert_frame_equal(res, expected[["A", "B"]]) # same with a datetime-castable str @@ -796,7 +800,9 @@ def test_reindex_fill_value(self): # other dtypes df["foo"] = "foo" - result = df.reindex(range(15), fill_value="0") + msg = "reindexing with a fill_value that cannot be held" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = df.reindex(range(15), fill_value="0") expected = df.reindex(range(15)).fillna("0") tm.assert_frame_equal(result, expected) @@ -1227,7 +1233,9 @@ def test_reindex_datetimelike_to_object(self, dtype): index = df.index.append(Index([1])) columns = df.columns.append(Index(["foo"])) - res = df.reindex(index=index, columns=columns, fill_value=fv) + msg = "reindexing with a fill_value that cannot be held" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + res = df.reindex(index=index, columns=columns, fill_value=fv) expected = DataFrame( { diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index cd48db82d7fe7..11126c227316c 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas import ( NA, Categorical, @@ -314,7 +316,9 @@ def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value): ser = Series([NaT], dtype=dtype) - result = ser.reindex([0, 1], fill_value=fill_value) + msg = "reindexing with a fill_value that cannot be held" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = ser.reindex([0, 1], fill_value=fill_value) expected = Series([NaT, fill_value], index=range(2), dtype=object) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 451ef42fff3d1..e21f447e90634 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -4,6 +4,7 @@ import pytest from pandas._libs import iNaT +from pandas.errors import Pandas4Warning from pandas import array import pandas._testing as tm @@ -15,7 +16,7 @@ (np.int8, np.int16(127), np.int8), (np.int8, np.int16(128), np.int16), (np.int32, 1, np.int32), - (np.int32, 2.0, np.float64), + (np.int32, 2.0, np.int32), (np.int32, 3.0 + 4.0j, np.complex128), (np.int32, True, np.object_), (np.int32, "", np.object_), @@ -43,75 +44,104 @@ def dtype_fill_out_dtype(request): class TestTake: def test_1d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype + + warn = None + if out_dtype != dtype: + warn = Pandas4Warning + msg = "reindexing with a fill_value that cannot be held" + data = np.random.default_rng(2).integers(0, 2, 4).astype(dtype) indexer = [2, 1, 0, -1] - result = algos.take_nd(data, indexer, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, fill_value=fill_value) assert (result[[0, 1, 2]] == data[[2, 1, 0]]).all() assert result[3] == fill_value assert result.dtype == out_dtype indexer = [2, 1, 0, 1] - result = algos.take_nd(data, indexer, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, fill_value=fill_value) assert (result[[0, 1, 2, 3]] == data[indexer]).all() assert result.dtype == dtype def test_2d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype + + warn = None + if out_dtype != dtype: + warn = Pandas4Warning + msg = "reindexing with a fill_value that cannot be held" + data = np.random.default_rng(2).integers(0, 2, (5, 3)).astype(dtype) indexer = [2, 1, 0, -1] - result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) assert (result[[0, 1, 2], :] == data[[2, 1, 0], :]).all() assert (result[3, :] == fill_value).all() assert result.dtype == out_dtype - result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) assert (result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all() assert (result[:, 3] == fill_value).all() assert result.dtype == out_dtype indexer = [2, 1, 0, 1] - result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) assert (result[[0, 1, 2, 3], :] == data[indexer, :]).all() assert result.dtype == dtype - result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) assert (result[:, [0, 1, 2, 3]] == data[:, indexer]).all() assert result.dtype == dtype def test_3d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype + warn = None + if out_dtype != dtype: + warn = Pandas4Warning + msg = "reindexing with a fill_value that cannot be held" + data = np.random.default_rng(2).integers(0, 2, (5, 4, 3)).astype(dtype) indexer = [2, 1, 0, -1] - result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) assert (result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all() assert (result[3, :, :] == fill_value).all() assert result.dtype == out_dtype - result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) assert (result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all() assert (result[:, 3, :] == fill_value).all() assert result.dtype == out_dtype - result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value) assert (result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all() assert (result[:, :, 3] == fill_value).all() assert result.dtype == out_dtype indexer = [2, 1, 0, 1] - result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) assert (result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all() assert result.dtype == dtype - result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) assert (result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all() assert result.dtype == dtype - result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value) + with tm.assert_produces_warning(warn, match=msg): + result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value) assert (result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all() assert result.dtype == dtype