Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ Other Deprecations
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`)
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
- Deprecated reindexing with a ``fill_value`` that cannot be held by the original object's dtype; explicitly cast before reindexing instead (:issue:`53910`)
- Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)
- Deprecated the 'inplace' keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`)

Expand Down
42 changes: 42 additions & 0 deletions pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
cast,
overload,
)
import warnings

import numpy as np

from pandas._libs import (
algos as libalgos,
lib,
)
from pandas.errors import Pandas4Warning
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import maybe_promote
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -94,6 +97,15 @@ def take_nd(
fill_value = na_value_for_dtype(arr.dtype, compat=False)
elif lib.is_np_dtype(arr.dtype, "mM"):
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
if dtype != arr.dtype:
# GH#53910
warnings.warn(
"reindexing with a fill_value that cannot be held by the "
"original dtype is deprecated. Explicitly cast to a common "
f"dtype (in this case {dtype}) instead.",
Pandas4Warning,
stacklevel=find_stack_level(),
)
if arr.dtype != dtype:
# EA.take is strict about returning a new object of the same type
# so for that case cast upfront
Expand Down Expand Up @@ -185,6 +197,10 @@ def take_2d_multi(
indexer = row_idx, col_idx
mask_info = None

if lib.is_float(fill_value) and fill_value.is_integer():
# Avoid warning if possible
fill_value = int(fill_value)

# check for promotion based on types only (do this first because
# it's faster than computing a mask)
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
Expand All @@ -202,6 +218,20 @@ def take_2d_multi(
# to crash when trying to cast it to dtype)
dtype, fill_value = arr.dtype, arr.dtype.type()

if dtype != arr.dtype and not (
arr.dtype.kind in "iub"
and lib.is_float(fill_value)
and np.isnan(fill_value)
):
# GH#53910
warnings.warn(
"reindexing with a fill_value that cannot be held by the "
"original dtype is deprecated. Explicitly cast to a common "
f"dtype (in this case {dtype}) instead.",
Pandas4Warning,
stacklevel=find_stack_level(),
)

# at this point, it's guaranteed that dtype can hold both the arr values
# and the fill_value
out_shape = len(row_idx), len(col_idx)
Expand Down Expand Up @@ -528,8 +558,20 @@ def _take_preprocess_indexer_and_fill_value(
else:
# check for promotion based on types only (do this first because
# it's faster than computing a mask)
if lib.is_float(fill_value) and fill_value.is_integer():
# Avoid warning if possible
fill_value = int(fill_value)
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
if dtype != arr.dtype:
if not (lib.is_float(fill_value) and np.isnan(fill_value)):
# GH#53910
warnings.warn(
"reindexing with a fill_value that cannot be held by the "
"original dtype is deprecated. Explicitly cast to a common "
f"dtype (in this case {dtype}) instead.",
Pandas4Warning,
stacklevel=find_stack_level(),
)
# check if promotion is actually required based on indexer
if mask is not None:
needs_masking = True
Expand Down
17 changes: 12 additions & 5 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
from pandas.errors import (
DuplicateLabelError,
InvalidIndexError,
Pandas4Warning,
)
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -7888,11 +7889,17 @@ def get_values_for_csv(
"""
if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":
# GH#40754 Convert categorical datetimes to datetime array
values = algos.take_nd(
values.categories._values,
ensure_platform_int(values._codes),
fill_value=na_rep,
)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"reindexing with a fill_value that cannot be held",
Pandas4Warning,
)
values = algos.take_nd(
values.categories._values,
ensure_platform_int(values._codes),
fill_value=na_rep,
)

values = ensure_wrapped_if_datetimelike(values)

Expand Down
16 changes: 12 additions & 4 deletions pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,10 @@ def test_reindex_date_fill_value(self):
ts = df.iloc[0, 0]
fv = ts.date()

res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv)
msg = "reindexing with a fill_value that cannot be held"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv)

expected = DataFrame(
{"A": df["A"].tolist() + [fv], "B": df["B"].tolist() + [fv], "C": [fv] * 4},
Expand All @@ -177,7 +180,8 @@ def test_reindex_date_fill_value(self):
tm.assert_frame_equal(res, expected)

# only reindexing rows
res = df.reindex(index=range(4), fill_value=fv)
with tm.assert_produces_warning(Pandas4Warning, match=msg):
res = df.reindex(index=range(4), fill_value=fv)
tm.assert_frame_equal(res, expected[["A", "B"]])

# same with a datetime-castable str
Expand Down Expand Up @@ -796,7 +800,9 @@ def test_reindex_fill_value(self):

# other dtypes
df["foo"] = "foo"
result = df.reindex(range(15), fill_value="0")
msg = "reindexing with a fill_value that cannot be held"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = df.reindex(range(15), fill_value="0")
expected = df.reindex(range(15)).fillna("0")
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1227,7 +1233,9 @@ def test_reindex_datetimelike_to_object(self, dtype):
index = df.index.append(Index([1]))
columns = df.columns.append(Index(["foo"]))

res = df.reindex(index=index, columns=columns, fill_value=fv)
msg = "reindexing with a fill_value that cannot be held"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
res = df.reindex(index=index, columns=columns, fill_value=fv)

expected = DataFrame(
{
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/series/methods/test_reindex.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas.errors import Pandas4Warning

from pandas import (
NA,
Categorical,
Expand Down Expand Up @@ -314,7 +316,9 @@ def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value):

ser = Series([NaT], dtype=dtype)

result = ser.reindex([0, 1], fill_value=fill_value)
msg = "reindexing with a fill_value that cannot be held"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser.reindex([0, 1], fill_value=fill_value)
expected = Series([NaT, fill_value], index=range(2), dtype=object)
tm.assert_series_equal(result, expected)

Expand Down
56 changes: 43 additions & 13 deletions pandas/tests/test_take.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from pandas._libs import iNaT
from pandas.errors import Pandas4Warning

from pandas import array
import pandas._testing as tm
Expand All @@ -15,7 +16,7 @@
(np.int8, np.int16(127), np.int8),
(np.int8, np.int16(128), np.int16),
(np.int32, 1, np.int32),
(np.int32, 2.0, np.float64),
(np.int32, 2.0, np.int32),
(np.int32, 3.0 + 4.0j, np.complex128),
(np.int32, True, np.object_),
(np.int32, "", np.object_),
Expand Down Expand Up @@ -43,75 +44,104 @@ def dtype_fill_out_dtype(request):
class TestTake:
def test_1d_fill_nonna(self, dtype_fill_out_dtype):
dtype, fill_value, out_dtype = dtype_fill_out_dtype

warn = None
if out_dtype != dtype:
warn = Pandas4Warning
msg = "reindexing with a fill_value that cannot be held"

data = np.random.default_rng(2).integers(0, 2, 4).astype(dtype)
indexer = [2, 1, 0, -1]

result = algos.take_nd(data, indexer, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, fill_value=fill_value)
assert (result[[0, 1, 2]] == data[[2, 1, 0]]).all()
assert result[3] == fill_value
assert result.dtype == out_dtype

indexer = [2, 1, 0, 1]

result = algos.take_nd(data, indexer, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, fill_value=fill_value)
assert (result[[0, 1, 2, 3]] == data[indexer]).all()
assert result.dtype == dtype

def test_2d_fill_nonna(self, dtype_fill_out_dtype):
dtype, fill_value, out_dtype = dtype_fill_out_dtype

warn = None
if out_dtype != dtype:
warn = Pandas4Warning
msg = "reindexing with a fill_value that cannot be held"

data = np.random.default_rng(2).integers(0, 2, (5, 3)).astype(dtype)
indexer = [2, 1, 0, -1]

result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
assert (result[[0, 1, 2], :] == data[[2, 1, 0], :]).all()
assert (result[3, :] == fill_value).all()
assert result.dtype == out_dtype

result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
assert (result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all()
assert (result[:, 3] == fill_value).all()
assert result.dtype == out_dtype

indexer = [2, 1, 0, 1]
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
assert (result[[0, 1, 2, 3], :] == data[indexer, :]).all()
assert result.dtype == dtype

result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
assert (result[:, [0, 1, 2, 3]] == data[:, indexer]).all()
assert result.dtype == dtype

def test_3d_fill_nonna(self, dtype_fill_out_dtype):
dtype, fill_value, out_dtype = dtype_fill_out_dtype

warn = None
if out_dtype != dtype:
warn = Pandas4Warning
msg = "reindexing with a fill_value that cannot be held"

data = np.random.default_rng(2).integers(0, 2, (5, 4, 3)).astype(dtype)
indexer = [2, 1, 0, -1]

result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
assert (result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all()
assert (result[3, :, :] == fill_value).all()
assert result.dtype == out_dtype

result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
assert (result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all()
assert (result[:, 3, :] == fill_value).all()
assert result.dtype == out_dtype

result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
assert (result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all()
assert (result[:, :, 3] == fill_value).all()
assert result.dtype == out_dtype

indexer = [2, 1, 0, 1]
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
assert (result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all()
assert result.dtype == dtype

result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
assert (result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all()
assert result.dtype == dtype

result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
with tm.assert_produces_warning(warn, match=msg):
result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
assert (result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all()
assert result.dtype == dtype

Expand Down
Loading