Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLEAN: Enforce pdep6 #59007

Merged
merged 14 commits into from
Jul 8, 2024
Merged
9 changes: 3 additions & 6 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,12 +546,9 @@ def time_chained_indexing(self, mode):


class Block:
params = [
(True, "True"),
(np.array(True), "np.array(True)"),
]
params = [True]
mroeschke marked this conversation as resolved.
Show resolved Hide resolved

def setup(self, true_value, mode):
MarcoGorelli marked this conversation as resolved.
Show resolved Hide resolved
def setup(self, true_value):
self.df = DataFrame(
False,
columns=np.arange(500).astype(str),
Expand All @@ -560,7 +557,7 @@ def setup(self, true_value, mode):

self.true_value = true_value

def time_test(self, true_value, mode):
def time_test(self, true_value):
start = datetime(2010, 5, 1)
end = datetime(2010, 9, 1)
self.df.loc[start:end, :] = true_value
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ Assigning a ``Categorical`` to parts of a column of other types will use the val
:okwarning:

df = pd.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]})
df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"])
df.loc[1:2, "a"] = pd.Categorical([2, 2], categories=[2, 3])
df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"])
df
df.dtypes
Expand Down
16 changes: 5 additions & 11 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
)
from pandas.errors.cow import _chained_assignment_msg
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import (
can_hold_element,
Expand Down Expand Up @@ -2124,14 +2123,14 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
self.obj._mgr.column_setitem(
loc, plane_indexer, value, inplace_only=True
)
except (ValueError, TypeError, LossySetitemError):
except (ValueError, TypeError, LossySetitemError) as exc:
# If we're setting an entire column and we can't do it inplace,
# then we can use value's dtype (or inferred dtype)
# instead of object
dtype = self.obj.dtypes.iloc[loc]
if dtype not in (np.void, object) and not self.obj.empty:
# - Exclude np.void, as that is a special case for expansion.
# We want to warn for
# We want to raise for
# df = pd.DataFrame({'a': [1, 2]})
# df.loc[:, 'a'] = .3
# but not for
Expand All @@ -2140,14 +2139,9 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
# - Exclude `object`, as then no upcasting happens.
# - Exclude empty initial object with enlargement,
# as then there's nothing to be inconsistent with.
warnings.warn(
f"Setting an item of incompatible dtype is deprecated "
"and will raise in a future error of pandas. "
f"Value '{value}' has dtype incompatible with {dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise TypeError(
f"Invalid value '{value}' for dtype '{dtype}'"
) from exc
self.obj.isetitem(loc, value)
else:
# set value into the column (first attempting to operate inplace, then
Expand Down
31 changes: 12 additions & 19 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
# Up/Down-casting

@final
def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
def coerce_to_target_dtype(self, other, raise_on_upcast: bool = False) -> Block:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there any usages of this method, outside this file, that does not set this to raise_on_upcast=False?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's no usage of this method at all outside this file

i've removed the default =False anyway, better to be explicit about this one

"""
coerce the current block to a dtype compat for other
we will return a block, possibly object, and not raise
Expand All @@ -455,25 +455,18 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
)
):
warn_on_upcast = False
raise_on_upcast = False
elif (
isinstance(other, np.ndarray)
and other.ndim == 1
and is_integer_dtype(self.values.dtype)
and is_float_dtype(other.dtype)
and lib.has_only_ints_or_nan(other)
):
warn_on_upcast = False

if warn_on_upcast:
warnings.warn(
f"Setting an item of incompatible dtype is deprecated "
"and will raise an error in a future version of pandas. "
f"Value '{other}' has dtype incompatible with {self.values.dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise_on_upcast = False

if raise_on_upcast:
raise TypeError(f"Invalid value '{other}' for dtype '{self.values.dtype}'")
if self.values.dtype == new_dtype:
raise AssertionError(
f"Did not expect new dtype {new_dtype} to equal self.dtype "
Expand Down Expand Up @@ -1105,7 +1098,7 @@ def setitem(self, indexer, value) -> Block:
casted = np_can_hold_element(values.dtype, value)
except LossySetitemError:
# current dtype cannot store value, coerce to common dtype
nb = self.coerce_to_target_dtype(value, warn_on_upcast=True)
nb = self.coerce_to_target_dtype(value, raise_on_upcast=True)
return nb.setitem(indexer, value)
else:
if self.dtype == _dtype_obj:
Expand Down Expand Up @@ -1176,7 +1169,7 @@ def putmask(self, mask, new) -> list[Block]:
if not is_list_like(new):
# using just new[indexer] can't save us the need to cast
return self.coerce_to_target_dtype(
new, warn_on_upcast=True
new, raise_on_upcast=True
).putmask(mask, new)
else:
indexer = mask.nonzero()[0]
Expand Down Expand Up @@ -1637,11 +1630,11 @@ def setitem(self, indexer, value):
except (ValueError, TypeError):
if isinstance(self.dtype, IntervalDtype):
# see TestSetitemFloatIntervalWithIntIntervalValues
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
return nb.setitem(orig_indexer, orig_value)

elif isinstance(self, NDArrayBackedExtensionBlock):
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
return nb.setitem(orig_indexer, orig_value)

else:
Expand Down Expand Up @@ -1737,13 +1730,13 @@ def putmask(self, mask, new) -> list[Block]:
if isinstance(self.dtype, IntervalDtype):
# Discussion about what we want to support in the general
# case GH#39584
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
return blk.putmask(orig_mask, orig_new)

elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
return blk.putmask(orig_mask, orig_new)

else:
Expand Down
26 changes: 11 additions & 15 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,15 +725,13 @@ def test_column_as_series_set_with_upcast(backend):
with pytest.raises(TypeError, match="Invalid value"):
s[0] = "foo"
expected = Series([1, 2, 3], name="a")
tm.assert_series_equal(s, expected)
tm.assert_frame_equal(df, df_orig)
# ensure cached series on getitem is not the changed series
tm.assert_series_equal(df["a"], df_orig["a"])
else:
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
with pytest.raises(TypeError, match="Invalid value"):
s[0] = "foo"
expected = Series(["foo", 2, 3], dtype=object, name="a")

tm.assert_series_equal(s, expected)
tm.assert_frame_equal(df, df_orig)
# ensure cached series on getitem is not the changed series
tm.assert_series_equal(df["a"], df_orig["a"])


@pytest.mark.parametrize(
Expand Down Expand Up @@ -805,16 +803,14 @@ def test_set_value_copy_only_necessary_column(indexer_func, indexer, val, col):
view = df[:]

if val == "a":
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype is deprecated"
):
with pytest.raises(TypeError, match="Invalid value"):
indexer_func(df)[indexer] = val
else:
indexer_func(df)[indexer] = val

indexer_func(df)[indexer] = val

assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
tm.assert_frame_equal(view, df_orig)
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
tm.assert_frame_equal(view, df_orig)


def test_series_midx_slice():
Expand Down
26 changes: 13 additions & 13 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,26 +1105,26 @@ def test_putmask_aligns_rhs_no_reference(dtype):
assert np.shares_memory(arr_a, get_array(df, "a"))


@pytest.mark.parametrize(
"val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)]
)
def test_putmask_dont_copy_some_blocks(val, exp, warn):
@pytest.mark.parametrize("val, exp, raises", [(5.5, True, True), (5, False, False)])
def test_putmask_dont_copy_some_blocks(val, exp, raises: bool):
df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5})
view = df[:]
df_orig = df.copy()
indexer = DataFrame(
[[True, False, False], [True, False, False]], columns=list("abc")
)
with tm.assert_produces_warning(warn, match="incompatible dtype"):
if raises:
with pytest.raises(TypeError, match="Invalid value"):
df[indexer] = val
else:
df[indexer] = val

assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
# TODO(CoW): Could split blocks to avoid copying the whole block
assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
assert df._mgr._has_no_reference(1) is not exp
assert not df._mgr._has_no_reference(2)
tm.assert_frame_equal(view, df_orig)
assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
# TODO(CoW): Could split blocks to avoid copying the whole block
assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
assert df._mgr._has_no_reference(1) is not exp
assert not df._mgr._has_no_reference(2)
tm.assert_frame_equal(view, df_orig)


@pytest.mark.parametrize("dtype", ["int64", "Int64"])
Expand Down
36 changes: 6 additions & 30 deletions pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,35 +49,19 @@ def test_loc_setitem_multiindex_columns(self, consolidate):
def test_37477():
# fixed by GH#45121
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.at[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.loc[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.iat[1, 1] = 1.2
tm.assert_frame_equal(df, expected)

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.iloc[1, 1] = 1.2
tm.assert_frame_equal(df, expected)


def test_6942(indexer_al):
Expand Down Expand Up @@ -107,19 +91,11 @@ def test_26395(indexer_al):
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
tm.assert_frame_equal(df, expected)

with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
indexer_al(df)["C", "D"] = 44.5
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
tm.assert_frame_equal(df, expected)

with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
indexer_al(df)["C", "D"] = "hello"
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
tm.assert_frame_equal(df, expected)


@pytest.mark.xfail(reason="unwanted upcast")
Expand Down
Loading