Bug in iloc aligned objects (#37728)

pandas-dev · Nov 19, 2020 · 3d259e6 · 3d259e6
1 parent 708b7b6
commit 3d259e6
Show file tree

Hide file tree

Showing 5 changed files with 78 additions and 35 deletions.
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -608,6 +608,7 @@ Indexing
 - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`)
 - Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty :class:`DataFrame` with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`)
 - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)
+- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`)
 
 Missing
 ^^^^^^^

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -681,7 +681,7 @@ def __setitem__(self, key, value):
         self._has_valid_setitem_indexer(key)
 
         iloc = self if self.name == "iloc" else self.obj.iloc
-        iloc._setitem_with_indexer(indexer, value)
+        iloc._setitem_with_indexer(indexer, value, self.name)
 
     def _validate_key(self, key, axis: int):
         """
@@ -1517,7 +1517,7 @@ def _get_setitem_indexer(self, key):
 
     # -------------------------------------------------------------------
 
-    def _setitem_with_indexer(self, indexer, value):
+    def _setitem_with_indexer(self, indexer, value, name="iloc"):
         """
         _setitem_with_indexer is for setting values on a Series/DataFrame
         using positional indexers.
@@ -1593,7 +1593,7 @@ def _setitem_with_indexer(self, indexer, value):
                         new_indexer = convert_from_missing_indexer_tuple(
                             indexer, self.obj.axes
                         )
-                        self._setitem_with_indexer(new_indexer, value)
+                        self._setitem_with_indexer(new_indexer, value, name)
 
                         return
 
@@ -1624,11 +1624,11 @@ def _setitem_with_indexer(self, indexer, value):
         # align and set the values
         if take_split_path:
             # We have to operate column-wise
-            self._setitem_with_indexer_split_path(indexer, value)
+            self._setitem_with_indexer_split_path(indexer, value, name)
         else:
-            self._setitem_single_block(indexer, value)
+            self._setitem_single_block(indexer, value, name)
 
-    def _setitem_with_indexer_split_path(self, indexer, value):
+    def _setitem_with_indexer_split_path(self, indexer, value, name: str):
         """
         Setitem column-wise.
         """
@@ -1642,7 +1642,7 @@ def _setitem_with_indexer_split_path(self, indexer, value):
         if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
             raise ValueError(r"Cannot set values with ndim > 2")
 
-        if isinstance(value, ABCSeries):
+        if isinstance(value, ABCSeries) and name != "iloc":
             value = self._align_series(indexer, value)
 
         # Ensure we have something we can iterate over
@@ -1657,7 +1657,7 @@ def _setitem_with_indexer_split_path(self, indexer, value):
         if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
 
             if isinstance(value, ABCDataFrame):
-                self._setitem_with_indexer_frame_value(indexer, value)
+                self._setitem_with_indexer_frame_value(indexer, value, name)
 
             elif np.ndim(value) == 2:
                 self._setitem_with_indexer_2d_value(indexer, value)
@@ -1714,7 +1714,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value):
             # setting with a list, re-coerces
             self._setitem_single_column(loc, value[:, i].tolist(), pi)
 
-    def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"):
+    def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name: str):
         ilocs = self._ensure_iterable_column_indexer(indexer[1])
 
         sub_indexer = list(indexer)
@@ -1724,7 +1724,13 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"):
 
         unique_cols = value.columns.is_unique
 
-        if not unique_cols and value.columns.equals(self.obj.columns):
+        # We do not want to align the value in case of iloc GH#37728
+        if name == "iloc":
+            for i, loc in enumerate(ilocs):
+                val = value.iloc[:, i]
+                self._setitem_single_column(loc, val, pi)
+
+        elif not unique_cols and value.columns.equals(self.obj.columns):
             # We assume we are already aligned, see
             # test_iloc_setitem_frame_duplicate_columns_multiple_blocks
             for loc in ilocs:
@@ -1787,7 +1793,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
         # reset the sliced object if unique
         self.obj._iset_item(loc, ser)
 
-    def _setitem_single_block(self, indexer, value):
+    def _setitem_single_block(self, indexer, value, name: str):
         """
         _setitem_with_indexer for the case when we have a single Block.
         """
@@ -1815,14 +1821,13 @@ def _setitem_single_block(self, indexer, value):
                 return
 
             indexer = maybe_convert_ix(*indexer)
-
-        if isinstance(value, (ABCSeries, dict)):
+        if isinstance(value, ABCSeries) and name != "iloc" or isinstance(value, dict):
             # TODO(EA): ExtensionBlock.setitem this causes issues with
             # setting for extensionarrays that store dicts. Need to decide
             # if it's worth supporting that.
             value = self._align_series(indexer, Series(value))
 
-        elif isinstance(value, ABCDataFrame):
+        elif isinstance(value, ABCDataFrame) and name != "iloc":
             value = self._align_frame(indexer, value)
 
         # check for chained assignment
@@ -1854,7 +1859,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
             if index.is_unique:
                 new_indexer = index.get_indexer([new_index[-1]])
                 if (new_indexer != -1).any():
-                    return self._setitem_with_indexer(new_indexer, value)
+                    return self._setitem_with_indexer(new_indexer, value, "loc")
 
             # this preserves dtype of the value
             new_values = Series([value])._values

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -289,15 +289,6 @@ def test_setitem_periodindex(self):
         assert isinstance(rs.index, PeriodIndex)
         tm.assert_index_equal(rs.index, rng)
 
-    @pytest.mark.parametrize("klass", [list, np.array])
-    def test_iloc_setitem_bool_indexer(self, klass):
-        # GH: 36741
-        df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
-        indexer = klass([True, False, False])
-        df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
-        expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
-        tm.assert_frame_equal(df, expected)
-
 
 class TestDataFrameSetItemSlicing:
     def test_setitem_slice_position(self):

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -801,6 +801,39 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self):
         with pytest.raises(ValueError, match=msg):
             obj.iloc[nd3] = 0
 
+    def test_iloc_assign_series_to_df_cell(self):
+        # GH 37593
+        df = DataFrame(columns=["a"], index=[0])
+        df.iloc[0, 0] = Series([1, 2, 3])
+        expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0])
+        tm.assert_frame_equal(df, expected)
+
+    @pytest.mark.parametrize("klass", [list, np.array])
+    def test_iloc_setitem_bool_indexer(self, klass):
+        # GH#36741
+        df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
+        indexer = klass([True, False, False])
+        df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
+        expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
+        tm.assert_frame_equal(df, expected)
+
+    @pytest.mark.parametrize("indexer", [[1], slice(1, 2)])
+    def test_setitem_iloc_pure_position_based(self, indexer):
+        # GH#22046
+        df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
+        df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+        df2.iloc[:, indexer] = df1.iloc[:, [0]]
+        expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
+        tm.assert_frame_equal(df2, expected)
+
+    def test_setitem_iloc_dictionary_value(self):
+        # GH#37728
+        df = DataFrame({"x": [1, 2], "y": [2, 2]})
+        rhs = dict(x=9, y=99)
+        df.iloc[1] = rhs
+        expected = DataFrame({"x": [1, 9], "y": [2, 99]})
+        tm.assert_frame_equal(df, expected)
+
 
 class TestILocErrors:
     # NB: this test should work for _any_ Series we can pass as
@@ -966,3 +999,11 @@ def test_iloc(self):
     def test_iloc_getitem_nonunique(self):
         ser = Series([0, 1, 2], index=[0, 1, 0])
         assert ser.iloc[2] == 2
+
+    def test_setitem_iloc_pure_position_based(self):
+        # GH#22046
+        ser1 = Series([1, 2, 3])
+        ser2 = Series([4, 5, 6], index=[1, 0, 2])
+        ser1.iloc[1:3] = ser2.iloc[1:3]
+        expected = Series([1, 5, 6])
+        tm.assert_series_equal(ser1, expected)
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -668,43 +668,48 @@ def test_float_index_at_iat(self):
     def test_rhs_alignment(self):
         # GH8258, tests that both rows & columns are aligned to what is
         # assigned to. covers both uniform data-type & multi-type cases
-        def run_tests(df, rhs, right):
+        def run_tests(df, rhs, right_loc, right_iloc):
             # label, index, slice
             lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4)
             lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3)
 
             left = df.copy()
             left.loc[lbl_one, lbl_two] = rhs
-            tm.assert_frame_equal(left, right)
+            tm.assert_frame_equal(left, right_loc)
 
             left = df.copy()
             left.iloc[idx_one, idx_two] = rhs
-            tm.assert_frame_equal(left, right)
+            tm.assert_frame_equal(left, right_iloc)
 
             left = df.copy()
             left.iloc[slice_one, slice_two] = rhs
-            tm.assert_frame_equal(left, right)
+            tm.assert_frame_equal(left, right_iloc)
 
         xs = np.arange(20).reshape(5, 4)
         cols = ["jim", "joe", "jolie", "joline"]
-        df = DataFrame(xs, columns=cols, index=list("abcde"))
+        df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64")
 
         # right hand side; permute the indices and multiplpy by -2
         rhs = -2 * df.iloc[3:0:-1, 2:0:-1]
 
         # expected `right` result; just multiply by -2
-        right = df.copy()
-        right.iloc[1:4, 1:3] *= -2
+        right_iloc = df.copy()
+        right_iloc["joe"] = [1, 14, 10, 6, 17]
+        right_iloc["jolie"] = [2, 13, 9, 5, 18]
+        right_iloc.iloc[1:4, 1:3] *= -2
+        right_loc = df.copy()
+        right_loc.iloc[1:4, 1:3] *= -2
 
         # run tests with uniform dtypes
-        run_tests(df, rhs, right)
+        run_tests(df, rhs, right_loc, right_iloc)
 
         # make frames multi-type & re-run tests
-        for frame in [df, rhs, right]:
+        for frame in [df, rhs, right_loc, right_iloc]:
             frame["joe"] = frame["joe"].astype("float64")
             frame["jolie"] = frame["jolie"].map("@{}".format)
-
-        run_tests(df, rhs, right)
+        right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0]
+        right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"]
+        run_tests(df, rhs, right_loc, right_iloc)
 
     def test_str_label_slicing_with_negative_step(self):
         SLC = pd.IndexSlice