pandas-dev · mroeschke · Jul 8, 2024 · Jun 13, 2024 · Jun 21, 2024 · Jun 26, 2024
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
@@ -546,24 +546,17 @@ def time_chained_indexing(self, mode):
 
 
 class Block:
-    params = [
-        (True, "True"),
-        (np.array(True), "np.array(True)"),
-    ]
-
-    def setup(self, true_value, mode):
+    def setup(self):
         self.df = DataFrame(
             False,
             columns=np.arange(500).astype(str),
             index=date_range("2010-01-01", "2011-01-01"),
         )
 
-        self.true_value = true_value
-
-    def time_test(self, true_value, mode):
+    def time_test(self):
         start = datetime(2010, 5, 1)
         end = datetime(2010, 9, 1)
-        self.df.loc[start:end, :] = true_value
+        self.df.loc[start:end, :] = True
 
 
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
@@ -793,7 +793,7 @@ Assigning a ``Categorical`` to parts of a column of other types will use the val
     :okwarning:
 
     df = pd.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]})
-    df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"])
+    df.loc[1:2, "a"] = pd.Categorical([2, 2], categories=[2, 3])
     df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"])
     df
     df.dtypes

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -360,6 +360,7 @@ Other Removals
 - Changed the default value of ``na_action`` in :meth:`Categorical.map` to ``None`` (:issue:`51645`)
 - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
 - Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`)
+- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_)
 - Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`)
 - Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`,  (:issue:`56036`)
 - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -25,7 +25,6 @@
 )
 from pandas.errors.cow import _chained_assignment_msg
 from pandas.util._decorators import doc
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import (
     can_hold_element,
@@ -2124,14 +2123,14 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
                 self.obj._mgr.column_setitem(
                     loc, plane_indexer, value, inplace_only=True
                 )
-            except (ValueError, TypeError, LossySetitemError):
+            except (ValueError, TypeError, LossySetitemError) as exc:
                 # If we're setting an entire column and we can't do it inplace,
                 #  then we can use value's dtype (or inferred dtype)
                 #  instead of object
                 dtype = self.obj.dtypes.iloc[loc]
                 if dtype not in (np.void, object) and not self.obj.empty:
                     # - Exclude np.void, as that is a special case for expansion.
-                    #   We want to warn for
+                    #   We want to raise for
                     #       df = pd.DataFrame({'a': [1, 2]})
                     #       df.loc[:, 'a'] = .3
                     #   but not for
@@ -2140,14 +2139,9 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
                     # - Exclude `object`, as then no upcasting happens.
                     # - Exclude empty initial object with enlargement,
                     #   as then there's nothing to be inconsistent with.
-                    warnings.warn(
-                        f"Setting an item of incompatible dtype is deprecated "
-                        "and will raise in a future error of pandas. "
-                        f"Value '{value}' has dtype incompatible with {dtype}, "
-                        "please explicitly cast to a compatible dtype first.",
-                        FutureWarning,
-                        stacklevel=find_stack_level(),
-                    )
+                    raise TypeError(
+                        f"Invalid value '{value}' for dtype '{dtype}'"
+                    ) from exc
                 self.obj.isetitem(loc, value)
         else:
             # set value into the column (first attempting to operate inplace, then

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -428,7 +428,7 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
     # Up/Down-casting
 
     @final
-    def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
+    def coerce_to_target_dtype(self, other, raise_on_upcast: bool) -> Block:
         """
         coerce the current block to a dtype compat for other
         we will return a block, possibly object, and not raise
@@ -455,25 +455,18 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
                 isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
             )
         ):
-            warn_on_upcast = False
+            raise_on_upcast = False
         elif (
             isinstance(other, np.ndarray)
             and other.ndim == 1
             and is_integer_dtype(self.values.dtype)
             and is_float_dtype(other.dtype)
             and lib.has_only_ints_or_nan(other)
         ):
-            warn_on_upcast = False
-
-        if warn_on_upcast:
-            warnings.warn(
-                f"Setting an item of incompatible dtype is deprecated "
-                "and will raise an error in a future version of pandas. "
-                f"Value '{other}' has dtype incompatible with {self.values.dtype}, "
-                "please explicitly cast to a compatible dtype first.",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
+            raise_on_upcast = False
+
+        if raise_on_upcast:
+            raise TypeError(f"Invalid value '{other}' for dtype '{self.values.dtype}'")
         if self.values.dtype == new_dtype:
             raise AssertionError(
                 f"Did not expect new dtype {new_dtype} to equal self.dtype "
@@ -720,7 +713,7 @@ def replace(
             if value is None or value is NA:
                 blk = self.astype(np.dtype(object))
             else:
-                blk = self.coerce_to_target_dtype(value)
+                blk = self.coerce_to_target_dtype(value, raise_on_upcast=False)
             return blk.replace(
                 to_replace=to_replace,
                 value=value,
@@ -1105,7 +1098,7 @@ def setitem(self, indexer, value) -> Block:
             casted = np_can_hold_element(values.dtype, value)
         except LossySetitemError:
             # current dtype cannot store value, coerce to common dtype
-            nb = self.coerce_to_target_dtype(value, warn_on_upcast=True)
+            nb = self.coerce_to_target_dtype(value, raise_on_upcast=True)
             return nb.setitem(indexer, value)
         else:
             if self.dtype == _dtype_obj:
@@ -1176,7 +1169,7 @@ def putmask(self, mask, new) -> list[Block]:
                 if not is_list_like(new):
                     # using just new[indexer] can't save us the need to cast
                     return self.coerce_to_target_dtype(
-                        new, warn_on_upcast=True
+                        new, raise_on_upcast=True
                     ).putmask(mask, new)
                 else:
                     indexer = mask.nonzero()[0]
@@ -1244,7 +1237,7 @@ def where(self, other, cond) -> list[Block]:
             if self.ndim == 1 or self.shape[0] == 1:
                 # no need to split columns
 
-                block = self.coerce_to_target_dtype(other)
+                block = self.coerce_to_target_dtype(other, raise_on_upcast=False)
                 return block.where(orig_other, cond)
 
             else:
@@ -1438,7 +1431,7 @@ def shift(self, periods: int, fill_value: Any = None) -> list[Block]:
                 fill_value,
             )
         except LossySetitemError:
-            nb = self.coerce_to_target_dtype(fill_value)
+            nb = self.coerce_to_target_dtype(fill_value, raise_on_upcast=False)
             return nb.shift(periods, fill_value=fill_value)
 
         else:
@@ -1637,11 +1630,11 @@ def setitem(self, indexer, value):
         except (ValueError, TypeError):
             if isinstance(self.dtype, IntervalDtype):
                 # see TestSetitemFloatIntervalWithIntIntervalValues
-                nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
+                nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
                 return nb.setitem(orig_indexer, orig_value)
 
             elif isinstance(self, NDArrayBackedExtensionBlock):
-                nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
+                nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
                 return nb.setitem(orig_indexer, orig_value)
 
             else:
@@ -1676,13 +1669,13 @@ def where(self, other, cond) -> list[Block]:
             if self.ndim == 1 or self.shape[0] == 1:
                 if isinstance(self.dtype, IntervalDtype):
                     # TestSetitemFloatIntervalWithIntIntervalValues
-                    blk = self.coerce_to_target_dtype(orig_other)
+                    blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
                     return blk.where(orig_other, orig_cond)
 
                 elif isinstance(self, NDArrayBackedExtensionBlock):
                     # NB: not (yet) the same as
                     #  isinstance(values, NDArrayBackedExtensionArray)
-                    blk = self.coerce_to_target_dtype(orig_other)
+                    blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
                     return blk.where(orig_other, orig_cond)
 
                 else:
@@ -1737,13 +1730,13 @@ def putmask(self, mask, new) -> list[Block]:
                 if isinstance(self.dtype, IntervalDtype):
                     # Discussion about what we want to support in the general
                     #  case GH#39584
-                    blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
+                    blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
                     return blk.putmask(orig_mask, orig_new)
 
                 elif isinstance(self, NDArrayBackedExtensionBlock):
                     # NB: not (yet) the same as
                     #  isinstance(values, NDArrayBackedExtensionArray)
-                    blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
+                    blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
                     return blk.putmask(orig_mask, orig_new)
 
                 else:

diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
@@ -725,15 +725,13 @@ def test_column_as_series_set_with_upcast(backend):
         with pytest.raises(TypeError, match="Invalid value"):
             s[0] = "foo"
         expected = Series([1, 2, 3], name="a")
+        tm.assert_series_equal(s, expected)
+        tm.assert_frame_equal(df, df_orig)
+        # ensure cached series on getitem is not the changed series
+        tm.assert_series_equal(df["a"], df_orig["a"])
     else:
-        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+        with pytest.raises(TypeError, match="Invalid value"):
             s[0] = "foo"
-        expected = Series(["foo", 2, 3], dtype=object, name="a")
-
-    tm.assert_series_equal(s, expected)
-    tm.assert_frame_equal(df, df_orig)
-    # ensure cached series on getitem is not the changed series
-    tm.assert_series_equal(df["a"], df_orig["a"])
 
 
 @pytest.mark.parametrize(
@@ -805,16 +803,14 @@ def test_set_value_copy_only_necessary_column(indexer_func, indexer, val, col):
     view = df[:]
 
     if val == "a":
-        with tm.assert_produces_warning(
-            FutureWarning, match="Setting an item of incompatible dtype is deprecated"
-        ):
+        with pytest.raises(TypeError, match="Invalid value"):
             indexer_func(df)[indexer] = val
+    else:
+        indexer_func(df)[indexer] = val
 
-    indexer_func(df)[indexer] = val
-
-    assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
-    assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
-    tm.assert_frame_equal(view, df_orig)
+        assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
+        assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
+        tm.assert_frame_equal(view, df_orig)
 
 
 def test_series_midx_slice():

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
@@ -1105,26 +1105,26 @@ def test_putmask_aligns_rhs_no_reference(dtype):
     assert np.shares_memory(arr_a, get_array(df, "a"))
 
 
-@pytest.mark.parametrize(
-    "val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)]
-)
-def test_putmask_dont_copy_some_blocks(val, exp, warn):
+@pytest.mark.parametrize("val, exp, raises", [(5.5, True, True), (5, False, False)])
+def test_putmask_dont_copy_some_blocks(val, exp, raises: bool):
     df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5})
     view = df[:]
     df_orig = df.copy()
     indexer = DataFrame(
         [[True, False, False], [True, False, False]], columns=list("abc")
     )
-    with tm.assert_produces_warning(warn, match="incompatible dtype"):
+    if raises:
+        with pytest.raises(TypeError, match="Invalid value"):
+            df[indexer] = val
+    else:
         df[indexer] = val
-
-    assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
-    # TODO(CoW): Could split blocks to avoid copying the whole block
-    assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
-    assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
-    assert df._mgr._has_no_reference(1) is not exp
-    assert not df._mgr._has_no_reference(2)
-    tm.assert_frame_equal(view, df_orig)
+        assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
+        # TODO(CoW): Could split blocks to avoid copying the whole block
+        assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
+        assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
+        assert df._mgr._has_no_reference(1) is not exp
+        assert not df._mgr._has_no_reference(2)
+        tm.assert_frame_equal(view, df_orig)
 
 
 @pytest.mark.parametrize("dtype", ["int64", "Int64"])

diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py
@@ -49,35 +49,19 @@ def test_loc_setitem_multiindex_columns(self, consolidate):
 def test_37477():
     # fixed by GH#45121
     orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
-    expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})
 
     df = orig.copy()
-    with tm.assert_produces_warning(
-        FutureWarning, match="Setting an item of incompatible dtype"
-    ):
+    with pytest.raises(TypeError, match="Invalid value"):
         df.at[1, "B"] = 1.2
-    tm.assert_frame_equal(df, expected)
 
-    df = orig.copy()
-    with tm.assert_produces_warning(
-        FutureWarning, match="Setting an item of incompatible dtype"
-    ):
+    with pytest.raises(TypeError, match="Invalid value"):
         df.loc[1, "B"] = 1.2
-    tm.assert_frame_equal(df, expected)
 
-    df = orig.copy()
-    with tm.assert_produces_warning(
-        FutureWarning, match="Setting an item of incompatible dtype"
-    ):
+    with pytest.raises(TypeError, match="Invalid value"):
         df.iat[1, 1] = 1.2
-    tm.assert_frame_equal(df, expected)
 
-    df = orig.copy()
-    with tm.assert_produces_warning(
-        FutureWarning, match="Setting an item of incompatible dtype"
-    ):
+    with pytest.raises(TypeError, match="Invalid value"):
         df.iloc[1, 1] = 1.2
-    tm.assert_frame_equal(df, expected)
 
 
 def test_6942(indexer_al):
@@ -107,19 +91,11 @@ def test_26395(indexer_al):
     expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
     tm.assert_frame_equal(df, expected)
 
-    with tm.assert_produces_warning(
-        FutureWarning, match="Setting an item of incompatible dtype"
-    ):
+    with pytest.raises(TypeError, match="Invalid value"):
         indexer_al(df)["C", "D"] = 44.5
-    expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
-    tm.assert_frame_equal(df, expected)
 
-    with tm.assert_produces_warning(
-        FutureWarning, match="Setting an item of incompatible dtype"
-    ):
+    with pytest.raises(TypeError, match="Invalid value"):
         indexer_al(df)["C", "D"] = "hello"
-    expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
-    tm.assert_frame_equal(df, expected)
 
 
 @pytest.mark.xfail(reason="unwanted upcast")