Skip to content

Commit

Permalink
CLEAN: Enforce pdep6 (#59007)
Browse files Browse the repository at this point in the history
* enforce pdep6

* fixup Block.time_test benchmark

* update comment

* update warn to raise

* add missing assertion

* simplify

* remove default value for `raise_on_upcast`

* add whatsnew
  • Loading branch information
MarcoGorelli committed Jul 8, 2024
1 parent a93e2e2 commit 6090042
Show file tree
Hide file tree
Showing 29 changed files with 383 additions and 655 deletions.
13 changes: 3 additions & 10 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,24 +546,17 @@ def time_chained_indexing(self, mode):


class Block:
params = [
(True, "True"),
(np.array(True), "np.array(True)"),
]

def setup(self, true_value, mode):
def setup(self):
self.df = DataFrame(
False,
columns=np.arange(500).astype(str),
index=date_range("2010-01-01", "2011-01-01"),
)

self.true_value = true_value

def time_test(self, true_value, mode):
def time_test(self):
start = datetime(2010, 5, 1)
end = datetime(2010, 9, 1)
self.df.loc[start:end, :] = true_value
self.df.loc[start:end, :] = True


from .pandas_vb_common import setup # noqa: F401 isort:skip
2 changes: 1 addition & 1 deletion doc/source/user_guide/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ Assigning a ``Categorical`` to parts of a column of other types will use the val
:okwarning:
df = pd.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]})
df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"])
df.loc[1:2, "a"] = pd.Categorical([2, 2], categories=[2, 3])
df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"])
df
df.dtypes
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ Other Removals
- Changed the default value of ``na_action`` in :meth:`Categorical.map` to ``None`` (:issue:`51645`)
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
- Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`)
- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_)
- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`)
- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`)
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
Expand Down
16 changes: 5 additions & 11 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
)
from pandas.errors.cow import _chained_assignment_msg
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import (
can_hold_element,
Expand Down Expand Up @@ -2124,14 +2123,14 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
self.obj._mgr.column_setitem(
loc, plane_indexer, value, inplace_only=True
)
except (ValueError, TypeError, LossySetitemError):
except (ValueError, TypeError, LossySetitemError) as exc:
# If we're setting an entire column and we can't do it inplace,
# then we can use value's dtype (or inferred dtype)
# instead of object
dtype = self.obj.dtypes.iloc[loc]
if dtype not in (np.void, object) and not self.obj.empty:
# - Exclude np.void, as that is a special case for expansion.
# We want to warn for
# We want to raise for
# df = pd.DataFrame({'a': [1, 2]})
# df.loc[:, 'a'] = .3
# but not for
Expand All @@ -2140,14 +2139,9 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
# - Exclude `object`, as then no upcasting happens.
# - Exclude empty initial object with enlargement,
# as then there's nothing to be inconsistent with.
warnings.warn(
f"Setting an item of incompatible dtype is deprecated "
"and will raise in a future error of pandas. "
f"Value '{value}' has dtype incompatible with {dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise TypeError(
f"Invalid value '{value}' for dtype '{dtype}'"
) from exc
self.obj.isetitem(loc, value)
else:
# set value into the column (first attempting to operate inplace, then
Expand Down
41 changes: 17 additions & 24 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
# Up/Down-casting

@final
def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
def coerce_to_target_dtype(self, other, raise_on_upcast: bool) -> Block:
"""
coerce the current block to a dtype compat for other
we will return a block, possibly object, and not raise
Expand All @@ -455,25 +455,18 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
)
):
warn_on_upcast = False
raise_on_upcast = False
elif (
isinstance(other, np.ndarray)
and other.ndim == 1
and is_integer_dtype(self.values.dtype)
and is_float_dtype(other.dtype)
and lib.has_only_ints_or_nan(other)
):
warn_on_upcast = False

if warn_on_upcast:
warnings.warn(
f"Setting an item of incompatible dtype is deprecated "
"and will raise an error in a future version of pandas. "
f"Value '{other}' has dtype incompatible with {self.values.dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise_on_upcast = False

if raise_on_upcast:
raise TypeError(f"Invalid value '{other}' for dtype '{self.values.dtype}'")
if self.values.dtype == new_dtype:
raise AssertionError(
f"Did not expect new dtype {new_dtype} to equal self.dtype "
Expand Down Expand Up @@ -720,7 +713,7 @@ def replace(
if value is None or value is NA:
blk = self.astype(np.dtype(object))
else:
blk = self.coerce_to_target_dtype(value)
blk = self.coerce_to_target_dtype(value, raise_on_upcast=False)
return blk.replace(
to_replace=to_replace,
value=value,
Expand Down Expand Up @@ -1105,7 +1098,7 @@ def setitem(self, indexer, value) -> Block:
casted = np_can_hold_element(values.dtype, value)
except LossySetitemError:
# current dtype cannot store value, coerce to common dtype
nb = self.coerce_to_target_dtype(value, warn_on_upcast=True)
nb = self.coerce_to_target_dtype(value, raise_on_upcast=True)
return nb.setitem(indexer, value)
else:
if self.dtype == _dtype_obj:
Expand Down Expand Up @@ -1176,7 +1169,7 @@ def putmask(self, mask, new) -> list[Block]:
if not is_list_like(new):
# using just new[indexer] can't save us the need to cast
return self.coerce_to_target_dtype(
new, warn_on_upcast=True
new, raise_on_upcast=True
).putmask(mask, new)
else:
indexer = mask.nonzero()[0]
Expand Down Expand Up @@ -1244,7 +1237,7 @@ def where(self, other, cond) -> list[Block]:
if self.ndim == 1 or self.shape[0] == 1:
# no need to split columns

block = self.coerce_to_target_dtype(other)
block = self.coerce_to_target_dtype(other, raise_on_upcast=False)
return block.where(orig_other, cond)

else:
Expand Down Expand Up @@ -1438,7 +1431,7 @@ def shift(self, periods: int, fill_value: Any = None) -> list[Block]:
fill_value,
)
except LossySetitemError:
nb = self.coerce_to_target_dtype(fill_value)
nb = self.coerce_to_target_dtype(fill_value, raise_on_upcast=False)
return nb.shift(periods, fill_value=fill_value)

else:
Expand Down Expand Up @@ -1637,11 +1630,11 @@ def setitem(self, indexer, value):
except (ValueError, TypeError):
if isinstance(self.dtype, IntervalDtype):
# see TestSetitemFloatIntervalWithIntIntervalValues
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
return nb.setitem(orig_indexer, orig_value)

elif isinstance(self, NDArrayBackedExtensionBlock):
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
return nb.setitem(orig_indexer, orig_value)

else:
Expand Down Expand Up @@ -1676,13 +1669,13 @@ def where(self, other, cond) -> list[Block]:
if self.ndim == 1 or self.shape[0] == 1:
if isinstance(self.dtype, IntervalDtype):
# TestSetitemFloatIntervalWithIntIntervalValues
blk = self.coerce_to_target_dtype(orig_other)
blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
return blk.where(orig_other, orig_cond)

elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(orig_other)
blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
return blk.where(orig_other, orig_cond)

else:
Expand Down Expand Up @@ -1737,13 +1730,13 @@ def putmask(self, mask, new) -> list[Block]:
if isinstance(self.dtype, IntervalDtype):
# Discussion about what we want to support in the general
# case GH#39584
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
return blk.putmask(orig_mask, orig_new)

elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
return blk.putmask(orig_mask, orig_new)

else:
Expand Down
26 changes: 11 additions & 15 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,15 +725,13 @@ def test_column_as_series_set_with_upcast(backend):
with pytest.raises(TypeError, match="Invalid value"):
s[0] = "foo"
expected = Series([1, 2, 3], name="a")
tm.assert_series_equal(s, expected)
tm.assert_frame_equal(df, df_orig)
# ensure cached series on getitem is not the changed series
tm.assert_series_equal(df["a"], df_orig["a"])
else:
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
with pytest.raises(TypeError, match="Invalid value"):
s[0] = "foo"
expected = Series(["foo", 2, 3], dtype=object, name="a")

tm.assert_series_equal(s, expected)
tm.assert_frame_equal(df, df_orig)
# ensure cached series on getitem is not the changed series
tm.assert_series_equal(df["a"], df_orig["a"])


@pytest.mark.parametrize(
Expand Down Expand Up @@ -805,16 +803,14 @@ def test_set_value_copy_only_necessary_column(indexer_func, indexer, val, col):
view = df[:]

if val == "a":
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype is deprecated"
):
with pytest.raises(TypeError, match="Invalid value"):
indexer_func(df)[indexer] = val
else:
indexer_func(df)[indexer] = val

indexer_func(df)[indexer] = val

assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
tm.assert_frame_equal(view, df_orig)
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
tm.assert_frame_equal(view, df_orig)


def test_series_midx_slice():
Expand Down
26 changes: 13 additions & 13 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,26 +1105,26 @@ def test_putmask_aligns_rhs_no_reference(dtype):
assert np.shares_memory(arr_a, get_array(df, "a"))


@pytest.mark.parametrize(
"val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)]
)
def test_putmask_dont_copy_some_blocks(val, exp, warn):
@pytest.mark.parametrize("val, exp, raises", [(5.5, True, True), (5, False, False)])
def test_putmask_dont_copy_some_blocks(val, exp, raises: bool):
df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5})
view = df[:]
df_orig = df.copy()
indexer = DataFrame(
[[True, False, False], [True, False, False]], columns=list("abc")
)
with tm.assert_produces_warning(warn, match="incompatible dtype"):
if raises:
with pytest.raises(TypeError, match="Invalid value"):
df[indexer] = val
else:
df[indexer] = val

assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
# TODO(CoW): Could split blocks to avoid copying the whole block
assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
assert df._mgr._has_no_reference(1) is not exp
assert not df._mgr._has_no_reference(2)
tm.assert_frame_equal(view, df_orig)
assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
# TODO(CoW): Could split blocks to avoid copying the whole block
assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
assert df._mgr._has_no_reference(1) is not exp
assert not df._mgr._has_no_reference(2)
tm.assert_frame_equal(view, df_orig)


@pytest.mark.parametrize("dtype", ["int64", "Int64"])
Expand Down
36 changes: 6 additions & 30 deletions pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,35 +49,19 @@ def test_loc_setitem_multiindex_columns(self, consolidate):
def test_37477():
# fixed by GH#45121
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.at[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.loc[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.iat[1, 1] = 1.2
tm.assert_frame_equal(df, expected)

df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
df.iloc[1, 1] = 1.2
tm.assert_frame_equal(df, expected)


def test_6942(indexer_al):
Expand Down Expand Up @@ -107,19 +91,11 @@ def test_26395(indexer_al):
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
tm.assert_frame_equal(df, expected)

with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
indexer_al(df)["C", "D"] = 44.5
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
tm.assert_frame_equal(df, expected)

with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
with pytest.raises(TypeError, match="Invalid value"):
indexer_al(df)["C", "D"] = "hello"
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
tm.assert_frame_equal(df, expected)


@pytest.mark.xfail(reason="unwanted upcast")
Expand Down
Loading

0 comments on commit 6090042

Please sign in to comment.