Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLEAN: Enforce pdep6 #59007

Merged
merged 14 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,24 +546,17 @@ def time_chained_indexing(self, mode):


class Block:
params = [
(True, "True"),
(np.array(True), "np.array(True)"),
]

def setup(self, true_value, mode):
MarcoGorelli marked this conversation as resolved.
Show resolved Hide resolved
def setup(self):
self.df = DataFrame(
False,
columns=np.arange(500).astype(str),
index=date_range("2010-01-01", "2011-01-01"),
)

self.true_value = true_value

def time_test(self, true_value, mode):
def time_test(self):
start = datetime(2010, 5, 1)
end = datetime(2010, 9, 1)
self.df.loc[start:end, :] = true_value
self.df.loc[start:end, :] = True


from .pandas_vb_common import setup # noqa: F401 isort:skip
2 changes: 1 addition & 1 deletion doc/source/user_guide/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ Assigning a ``Categorical`` to parts of a column of other types will use the val
:okwarning:
df = pd.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]})
df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"])
df.loc[1:2, "a"] = pd.Categorical([2, 2], categories=[2, 3])
df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"])
df
df.dtypes
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ Other Removals
- Changed the default value of ``na_action`` in :meth:`Categorical.map` to ``None`` (:issue:`51645`)
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
- Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`)
- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_)
- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`)
- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`)
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
Expand Down
16 changes: 5 additions & 11 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
)
from pandas.errors.cow import _chained_assignment_msg
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import (
can_hold_element,
Expand Down Expand Up @@ -2124,14 +2123,14 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
self.obj._mgr.column_setitem(
loc, plane_indexer, value, inplace_only=True
)
except (ValueError, TypeError, LossySetitemError):
except (ValueError, TypeError, LossySetitemError) as exc:
# If we're setting an entire column and we can't do it inplace,
# then we can use value's dtype (or inferred dtype)
# instead of object
dtype = self.obj.dtypes.iloc[loc]
if dtype not in (np.void, object) and not self.obj.empty:
# - Exclude np.void, as that is a special case for expansion.
# We want to warn for
# We want to raise for
# df = pd.DataFrame({'a': [1, 2]})
# df.loc[:, 'a'] = .3
# but not for
Expand All @@ -2140,14 +2139,9 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
# - Exclude `object`, as then no upcasting happens.
# - Exclude empty initial object with enlargement,
# as then there's nothing to be inconsistent with.
warnings.warn(
f"Setting an item of incompatible dtype is deprecated "
"and will raise in a future error of pandas. "
f"Value '{value}' has dtype incompatible with {dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise TypeError(
f"Invalid value '{value}' for dtype '{dtype}'"
) from exc
self.obj.isetitem(loc, value)
else:
# set value into the column (first attempting to operate inplace, then
Expand Down
41 changes: 17 additions & 24 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
# Up/Down-casting

@final
def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
def coerce_to_target_dtype(self, other, raise_on_upcast: bool) -> Block:
"""
coerce the current block to a dtype compat for other
we will return a block, possibly object, and not raise
Expand All @@ -455,25 +455,18 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
)
):
warn_on_upcast = False
raise_on_upcast = False
elif (
isinstance(other, np.ndarray)
and other.ndim == 1
and is_integer_dtype(self.values.dtype)
and is_float_dtype(other.dtype)
and lib.has_only_ints_or_nan(other)
):
warn_on_upcast = False

if warn_on_upcast:
warnings.warn(
f"Setting an item of incompatible dtype is deprecated "
"and will raise an error in a future version of pandas. "
f"Value '{other}' has dtype incompatible with {self.values.dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise_on_upcast = False

if raise_on_upcast:
raise TypeError(f"Invalid value '{other}' for dtype '{self.values.dtype}'")
if self.values.dtype == new_dtype:
raise AssertionError(
f"Did not expect new dtype {new_dtype} to equal self.dtype "
Expand Down Expand Up @@ -720,7 +713,7 @@ def replace(
if value is None or value is NA:
blk = self.astype(np.dtype(object))
else:
blk = self.coerce_to_target_dtype(value)
blk = self.coerce_to_target_dtype(value, raise_on_upcast=False)
return blk.replace(
to_replace=to_replace,
value=value,
Expand Down Expand Up @@ -1105,7 +1098,7 @@ def setitem(self, indexer, value) -> Block:
casted = np_can_hold_element(values.dtype, value)
except LossySetitemError:
# current dtype cannot store value, coerce to common dtype
nb = self.coerce_to_target_dtype(value, warn_on_upcast=True)
nb = self.coerce_to_target_dtype(value, raise_on_upcast=True)
return nb.setitem(indexer, value)
else:
if self.dtype == _dtype_obj:
Expand Down Expand Up @@ -1176,7 +1169,7 @@ def putmask(self, mask, new) -> list[Block]:
if not is_list_like(new):
# using just new[indexer] can't save us the need to cast
return self.coerce_to_target_dtype(
new, warn_on_upcast=True
new, raise_on_upcast=True
).putmask(mask, new)
else:
indexer = mask.nonzero()[0]
Expand Down Expand Up @@ -1244,7 +1237,7 @@ def where(self, other, cond) -> list[Block]:
if self.ndim == 1 or self.shape[0] == 1:
# no need to split columns

block = self.coerce_to_target_dtype(other)
block = self.coerce_to_target_dtype(other, raise_on_upcast=False)
return block.where(orig_other, cond)

else:
Expand Down Expand Up @@ -1438,7 +1431,7 @@ def shift(self, periods: int, fill_value: Any = None) -> list[Block]:
fill_value,
)
except LossySetitemError:
nb = self.coerce_to_target_dtype(fill_value)
nb = self.coerce_to_target_dtype(fill_value, raise_on_upcast=False)
return nb.shift(periods, fill_value=fill_value)

else:
Expand Down Expand Up @@ -1637,11 +1630,11 @@ def setitem(self, indexer, value):
except (ValueError, TypeError):
if isinstance(self.dtype, IntervalDtype):
# see TestSetitemFloatIntervalWithIntIntervalValues
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
return nb.setitem(orig_indexer, orig_value)

elif isinstance(self, NDArrayBackedExtensionBlock):
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
return nb.setitem(orig_indexer, orig_value)

else:
Expand Down Expand Up @@ -1676,13 +1669,13 @@ def where(self, other, cond) -> list[Block]:
if self.ndim == 1 or self.shape[0] == 1:
if isinstance(self.dtype, IntervalDtype):
# TestSetitemFloatIntervalWithIntIntervalValues
blk = self.coerce_to_target_dtype(orig_other)
blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
return blk.where(orig_other, orig_cond)

elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(orig_other)
blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
return blk.where(orig_other, orig_cond)

else:
Expand Down Expand Up @@ -1737,13 +1730,13 @@ def putmask(self, mask, new) -> list[Block]:
if isinstance(self.dtype, IntervalDtype):
# Discussion about what we want to support in the general
# case GH#39584
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
return blk.putmask(orig_mask, orig_new)

elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
return blk.putmask(orig_mask, orig_new)

else:
Expand Down
26 changes: 11 additions & 15 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,15 +725,13 @@ def test_column_as_series_set_with_upcast(backend):
with pytest.raises(TypeError, match="Invalid value"):
s[0] = "foo"
expected = Series([1, 2, 3], name="a")
tm.assert_series_equal(s, expected)
tm.assert_frame_equal(df, df_orig)
# ensure cached series on getitem is not the changed series
tm.assert_series_equal(df["a"], df_orig["a"])
else:
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
with pytest.raises(TypeError, match="Invalid value"):
s[0] = "foo"
expected = Series(["foo", 2, 3], dtype=object, name="a")

tm.assert_series_equal(s, expected)
tm.assert_frame_equal(df, df_orig)
# ensure cached series on getitem is not the changed series
tm.assert_series_equal(df["a"], df_orig["a"])


@pytest.mark.parametrize(
Expand Down Expand Up @@ -805,16 +803,14 @@ def test_set_value_copy_only_necessary_column(indexer_func, indexer, val, col):
view = df[:]

if val == "a":
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype is deprecated"
):
with pytest.raises(TypeError, match="Invalid value"):
indexer_func(df)[indexer] = val
else:
indexer_func(df)[indexer] = val

indexer_func(df)[indexer] = val

assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
tm.assert_frame_equal(view, df_orig)
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
tm.assert_frame_equal(view, df_orig)


def test_series_midx_slice():
Expand Down
26 changes: 13 additions & 13 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,26 +1105,26 @@ def test_putmask_aligns_rhs_no_reference(dtype):
assert np.shares_memory(arr_a, get_array(df, "a"))


@pytest.mark.parametrize(
"val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)]
)
def test_putmask_dont_copy_some_blocks(val, exp, warn):
@pytest.mark.parametrize("val, exp, raises", [(5.5, True, True), (5, False, False)])
def test_putmask_dont_copy_some_blocks(val, exp, raises: bool):
df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5})
view = df[:]
df_orig = df.copy()
indexer = DataFrame(
[[True, False, False], [True, False, False]], columns=list("abc")
)
with tm.assert_produces_warning(warn, match="incompatible dtype"):
if raises:
with pytest.raises(TypeError, match="Invalid value"):
df[indexer] = val
else:
df[indexer] = val

assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
# TODO(CoW): Could split blocks to avoid copying the whole block
assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
assert df._mgr._has_no_reference(1) is not exp
assert not df._mgr._has_no_reference(2)
tm.assert_frame_equal(view, df_orig)
assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
# TODO(CoW): Could split blocks to avoid copying the whole block
assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
assert df._mgr._has_no_reference(1) is not exp
assert not df._mgr._has_no_reference(2)
tm.assert_frame_equal(view, df_orig)


@pytest.mark.parametrize("dtype", ["int64", "Int64"])
Expand Down
36 changes: 6 additions & 30 deletions pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,35 +49,19 @@ def test_loc_setitem_multiindex_columns(self, consolidate):
def test_37477():
# fixed by GH#45121
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.at[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.loc[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.iat[1, 1] = 1.2
tm.assert_frame_equal(df, expected)

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.iloc[1, 1] = 1.2
tm.assert_frame_equal(df, expected)


def test_6942(indexer_al):
Expand Down Expand Up @@ -107,19 +91,11 @@ def test_26395(indexer_al):
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
tm.assert_frame_equal(df, expected)

with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
indexer_al(df)["C", "D"] = 44.5
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
tm.assert_frame_equal(df, expected)

with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
indexer_al(df)["C", "D"] = "hello"
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
tm.assert_frame_equal(df, expected)


@pytest.mark.xfail(reason="unwanted upcast")
Expand Down
Loading
Loading