From fd866b77553d1a3fa09aa8af0c14a2f47e148459 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 19 Mar 2020 11:09:58 -0700
Subject: [PATCH] BUG: ExtensionBlock.set not setting values inplace

---
 pandas/core/internals/blocks.py          | 52 ++++++++++++------------
 pandas/tests/indexing/test_iloc.py       | 11 +++++
 pandas/tests/internals/test_internals.py | 17 ++++++++
 3 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index adeb1ae04a58d..ce9e4d2db85fa 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -11,6 +11,7 @@
 import pandas._libs.internals as libinternals
 from pandas._libs.tslibs import Timedelta, conversion
 from pandas._libs.tslibs.timezones import tz_compare
+from pandas._typing import ArrayLike
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
@@ -349,11 +350,12 @@ def iget(self, i):
 
     def set(self, locs, values):
         """
-        Modify Block in-place with new item value
+        Modify block values in-place with new item value.
 
-        Returns
-        -------
-        None
+        Notes
+        -----
+        `set` never creates a new array or new Block, whereas `setitem` _may_
+        create a new array and always creates a new Block.
         """
         self.values[locs] = values
 
@@ -802,7 +804,7 @@ def _replace_single(self, *args, **kwargs):
 
     def setitem(self, indexer, value):
         """
-        Set the value inplace, returning a a maybe different typed block.
+        Attempt self.values[indexer] = value, possibly creating a new array.
 
         Parameters
         ----------
@@ -1644,12 +1646,15 @@ def iget(self, col):
                 raise IndexError(f"{self} only contains one item")
             return self.values
 
-    def should_store(self, value):
+    def should_store(self, value: ArrayLike) -> bool:
+        """
+        Can we set the given array-like value inplace?
+        """
         return isinstance(value, self._holder)
 
-    def set(self, locs, values, check=False):
+    def set(self, locs, values):
         assert locs.tolist() == [0]
-        self.values = values
+        self.values[:] = values
 
     def putmask(
         self, mask, new, align=True, inplace=False, axis=0, transpose=False,
@@ -1760,7 +1765,7 @@ def is_numeric(self):
 
     def setitem(self, indexer, value):
         """
-        Set the value inplace, returning a same-typed block.
+        Attempt self.values[indexer] = value, possibly creating a new array.
 
         This differs from Block.setitem by not allowing setitem to change
         the dtype of the Block.
@@ -2070,7 +2075,7 @@ def to_native_types(
         )
         return formatter.get_result_as_array()
 
-    def should_store(self, value) -> bool:
+    def should_store(self, value: ArrayLike) -> bool:
         # when inserting a column should not coerce integers to floats
         # unnecessarily
         return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype
@@ -2088,7 +2093,7 @@ def _can_hold_element(self, element: Any) -> bool:
             element, (float, int, complex, np.float_, np.int_)
         ) and not isinstance(element, (bool, np.bool_))
 
-    def should_store(self, value) -> bool:
+    def should_store(self, value: ArrayLike) -> bool:
         return issubclass(value.dtype.type, np.complexfloating)
 
 
@@ -2107,7 +2112,7 @@ def _can_hold_element(self, element: Any) -> bool:
             )
         return is_integer(element)
 
-    def should_store(self, value) -> bool:
+    def should_store(self, value: ArrayLike) -> bool:
         return is_integer_dtype(value) and value.dtype == self.dtype
 
 
@@ -2118,6 +2123,9 @@ class DatetimeLikeBlockMixin:
     def _holder(self):
         return DatetimeArray
 
+    def should_store(self, value):
+        return is_dtype_equal(self.dtype, value.dtype)
+
     @property
     def fill_value(self):
         return np.datetime64("NaT", "ns")
@@ -2254,16 +2262,9 @@ def to_native_types(
         ).reshape(i8values.shape)
         return np.atleast_2d(result)
 
-    def should_store(self, value) -> bool:
-        return is_datetime64_dtype(value.dtype)
-
     def set(self, locs, values):
         """
-        Modify Block in-place with new item value
-
-        Returns
-        -------
-        None
+        See Block.set.__doc__
         """
         values = conversion.ensure_datetime64ns(values, copy=False)
 
@@ -2287,6 +2288,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
     _can_hold_element = DatetimeBlock._can_hold_element
     to_native_types = DatetimeBlock.to_native_types
     fill_value = np.datetime64("NaT", "ns")
+    should_store = DatetimeBlock.should_store
 
     @property
     def _holder(self):
@@ -2496,9 +2498,6 @@ def fillna(self, value, **kwargs):
             )
         return super().fillna(value, **kwargs)
 
-    def should_store(self, value) -> bool:
-        return is_timedelta64_dtype(value.dtype)
-
     def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
         values = self.values
@@ -2540,7 +2539,7 @@ def _can_hold_element(self, element: Any) -> bool:
             return issubclass(tipo.type, np.bool_)
         return isinstance(element, (bool, np.bool_))
 
-    def should_store(self, value) -> bool:
+    def should_store(self, value: ArrayLike) -> bool:
         return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype(
             value
         )
@@ -2632,7 +2631,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
     def _can_hold_element(self, element: Any) -> bool:
         return True
 
-    def should_store(self, value) -> bool:
+    def should_store(self, value: ArrayLike) -> bool:
         return not (
             issubclass(
                 value.dtype.type,
@@ -2881,6 +2880,9 @@ def __init__(self, values, placement, ndim=None):
     def _holder(self):
         return Categorical
 
+    def should_store(self, arr: ArrayLike):
+        return isinstance(arr, self._holder) and is_dtype_equal(self.dtype, arr.dtype)
+
     def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
         values = self.values
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index f6b9e9a44ba14..9664f8d7212ad 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -694,6 +694,17 @@ def test_series_indexing_zerodim_np_array(self):
         result = s.iloc[np.array(0)]
         assert result == 1
 
+    def test_iloc_setitem_categorical_updates_inplace(self):
+        # Mixed dtype ensures we go through take_split_path in setitem_with_indexer
+        cat = pd.Categorical(["A", "B", "C"])
+        df = pd.DataFrame({1: cat, 2: [1, 2, 3]})
+
+        # This should modify our original values in-place
+        df.iloc[:, 0] = cat[::-1]
+
+        expected = pd.Categorical(["C", "B", "A"])
+        tm.assert_categorical_equal(cat, expected)
+
 
 class TestILocSetItemDuplicateColumns:
     def test_iloc_setitem_scalar_duplicate_columns(self):
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 1a7d5839d9a11..b97bca9bbdd4f 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -1154,6 +1154,23 @@ def test_binop_other(self, op, value, dtype):
             tm.assert_series_equal(result, expected)
 
 
+class TestShouldStore:
+    def test_should_store_categorical(self):
+        cat = pd.Categorical(["A", "B", "C"])
+        df = pd.DataFrame(cat)
+        blk = df._data.blocks[0]
+
+        # matching dtype
+        assert blk.should_store(cat)
+        assert blk.should_store(cat[:-1])
+
+        # different dtype
+        assert not blk.should_store(cat.as_ordered())
+
+        # ndarray instead of Categorical
+        assert not blk.should_store(np.asarray(cat))
+
+
 @pytest.mark.parametrize(
     "typestr, holder",
     [