Skip to content

Commit

Permalink
Bug in iloc aligned objects (#37728)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Nov 19, 2020
1 parent 708b7b6 commit 3d259e6
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 35 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ Indexing
- Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`)
- Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty :class:`DataFrame` with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`)
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)
- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`)

Missing
^^^^^^^
Expand Down
35 changes: 20 additions & 15 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ def __setitem__(self, key, value):
self._has_valid_setitem_indexer(key)

iloc = self if self.name == "iloc" else self.obj.iloc
iloc._setitem_with_indexer(indexer, value)
iloc._setitem_with_indexer(indexer, value, self.name)

def _validate_key(self, key, axis: int):
"""
Expand Down Expand Up @@ -1517,7 +1517,7 @@ def _get_setitem_indexer(self, key):

# -------------------------------------------------------------------

def _setitem_with_indexer(self, indexer, value):
def _setitem_with_indexer(self, indexer, value, name="iloc"):
"""
_setitem_with_indexer is for setting values on a Series/DataFrame
using positional indexers.
Expand Down Expand Up @@ -1593,7 +1593,7 @@ def _setitem_with_indexer(self, indexer, value):
new_indexer = convert_from_missing_indexer_tuple(
indexer, self.obj.axes
)
self._setitem_with_indexer(new_indexer, value)
self._setitem_with_indexer(new_indexer, value, name)

return

Expand Down Expand Up @@ -1624,11 +1624,11 @@ def _setitem_with_indexer(self, indexer, value):
# align and set the values
if take_split_path:
# We have to operate column-wise
self._setitem_with_indexer_split_path(indexer, value)
self._setitem_with_indexer_split_path(indexer, value, name)
else:
self._setitem_single_block(indexer, value)
self._setitem_single_block(indexer, value, name)

def _setitem_with_indexer_split_path(self, indexer, value):
def _setitem_with_indexer_split_path(self, indexer, value, name: str):
"""
Setitem column-wise.
"""
Expand All @@ -1642,7 +1642,7 @@ def _setitem_with_indexer_split_path(self, indexer, value):
if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
raise ValueError(r"Cannot set values with ndim > 2")

if isinstance(value, ABCSeries):
if isinstance(value, ABCSeries) and name != "iloc":
value = self._align_series(indexer, value)

# Ensure we have something we can iterate over
Expand All @@ -1657,7 +1657,7 @@ def _setitem_with_indexer_split_path(self, indexer, value):
if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:

if isinstance(value, ABCDataFrame):
self._setitem_with_indexer_frame_value(indexer, value)
self._setitem_with_indexer_frame_value(indexer, value, name)

elif np.ndim(value) == 2:
self._setitem_with_indexer_2d_value(indexer, value)
Expand Down Expand Up @@ -1714,7 +1714,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value):
# setting with a list, re-coerces
self._setitem_single_column(loc, value[:, i].tolist(), pi)

def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"):
def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name: str):
ilocs = self._ensure_iterable_column_indexer(indexer[1])

sub_indexer = list(indexer)
Expand All @@ -1724,7 +1724,13 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"):

unique_cols = value.columns.is_unique

if not unique_cols and value.columns.equals(self.obj.columns):
# We do not want to align the value in case of iloc GH#37728
if name == "iloc":
for i, loc in enumerate(ilocs):
val = value.iloc[:, i]
self._setitem_single_column(loc, val, pi)

elif not unique_cols and value.columns.equals(self.obj.columns):
# We assume we are already aligned, see
# test_iloc_setitem_frame_duplicate_columns_multiple_blocks
for loc in ilocs:
Expand Down Expand Up @@ -1787,7 +1793,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
# reset the sliced object if unique
self.obj._iset_item(loc, ser)

def _setitem_single_block(self, indexer, value):
def _setitem_single_block(self, indexer, value, name: str):
"""
_setitem_with_indexer for the case when we have a single Block.
"""
Expand Down Expand Up @@ -1815,14 +1821,13 @@ def _setitem_single_block(self, indexer, value):
return

indexer = maybe_convert_ix(*indexer)

if isinstance(value, (ABCSeries, dict)):
if isinstance(value, ABCSeries) and name != "iloc" or isinstance(value, dict):
# TODO(EA): ExtensionBlock.setitem this causes issues with
# setting for extensionarrays that store dicts. Need to decide
# if it's worth supporting that.
value = self._align_series(indexer, Series(value))

elif isinstance(value, ABCDataFrame):
elif isinstance(value, ABCDataFrame) and name != "iloc":
value = self._align_frame(indexer, value)

# check for chained assignment
Expand Down Expand Up @@ -1854,7 +1859,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
if index.is_unique:
new_indexer = index.get_indexer([new_index[-1]])
if (new_indexer != -1).any():
return self._setitem_with_indexer(new_indexer, value)
return self._setitem_with_indexer(new_indexer, value, "loc")

# this preserves dtype of the value
new_values = Series([value])._values
Expand Down
9 changes: 0 additions & 9 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,15 +289,6 @@ def test_setitem_periodindex(self):
assert isinstance(rs.index, PeriodIndex)
tm.assert_index_equal(rs.index, rng)

@pytest.mark.parametrize("klass", [list, np.array])
def test_iloc_setitem_bool_indexer(self, klass):
# GH: 36741
df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
indexer = klass([True, False, False])
df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
tm.assert_frame_equal(df, expected)


class TestDataFrameSetItemSlicing:
def test_setitem_slice_position(self):
Expand Down
41 changes: 41 additions & 0 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,39 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self):
with pytest.raises(ValueError, match=msg):
obj.iloc[nd3] = 0

def test_iloc_assign_series_to_df_cell(self):
# GH 37593
df = DataFrame(columns=["a"], index=[0])
df.iloc[0, 0] = Series([1, 2, 3])
expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0])
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("klass", [list, np.array])
def test_iloc_setitem_bool_indexer(self, klass):
# GH#36741
df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
indexer = klass([True, False, False])
df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("indexer", [[1], slice(1, 2)])
def test_setitem_iloc_pure_position_based(self, indexer):
# GH#22046
df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df2.iloc[:, indexer] = df1.iloc[:, [0]]
expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
tm.assert_frame_equal(df2, expected)

def test_setitem_iloc_dictionary_value(self):
# GH#37728
df = DataFrame({"x": [1, 2], "y": [2, 2]})
rhs = dict(x=9, y=99)
df.iloc[1] = rhs
expected = DataFrame({"x": [1, 9], "y": [2, 99]})
tm.assert_frame_equal(df, expected)


class TestILocErrors:
# NB: this test should work for _any_ Series we can pass as
Expand Down Expand Up @@ -966,3 +999,11 @@ def test_iloc(self):
def test_iloc_getitem_nonunique(self):
ser = Series([0, 1, 2], index=[0, 1, 0])
assert ser.iloc[2] == 2

def test_setitem_iloc_pure_position_based(self):
# GH#22046
ser1 = Series([1, 2, 3])
ser2 = Series([4, 5, 6], index=[1, 0, 2])
ser1.iloc[1:3] = ser2.iloc[1:3]
expected = Series([1, 5, 6])
tm.assert_series_equal(ser1, expected)
27 changes: 16 additions & 11 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,43 +668,48 @@ def test_float_index_at_iat(self):
def test_rhs_alignment(self):
# GH8258, tests that both rows & columns are aligned to what is
# assigned to. covers both uniform data-type & multi-type cases
def run_tests(df, rhs, right):
def run_tests(df, rhs, right_loc, right_iloc):
# label, index, slice
lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4)
lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3)

left = df.copy()
left.loc[lbl_one, lbl_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_loc)

left = df.copy()
left.iloc[idx_one, idx_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_iloc)

left = df.copy()
left.iloc[slice_one, slice_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_iloc)

xs = np.arange(20).reshape(5, 4)
cols = ["jim", "joe", "jolie", "joline"]
df = DataFrame(xs, columns=cols, index=list("abcde"))
df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64")

# right hand side; permute the indices and multiplpy by -2
rhs = -2 * df.iloc[3:0:-1, 2:0:-1]

# expected `right` result; just multiply by -2
right = df.copy()
right.iloc[1:4, 1:3] *= -2
right_iloc = df.copy()
right_iloc["joe"] = [1, 14, 10, 6, 17]
right_iloc["jolie"] = [2, 13, 9, 5, 18]
right_iloc.iloc[1:4, 1:3] *= -2
right_loc = df.copy()
right_loc.iloc[1:4, 1:3] *= -2

# run tests with uniform dtypes
run_tests(df, rhs, right)
run_tests(df, rhs, right_loc, right_iloc)

# make frames multi-type & re-run tests
for frame in [df, rhs, right]:
for frame in [df, rhs, right_loc, right_iloc]:
frame["joe"] = frame["joe"].astype("float64")
frame["jolie"] = frame["jolie"].map("@{}".format)

run_tests(df, rhs, right)
right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0]
right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"]
run_tests(df, rhs, right_loc, right_iloc)

def test_str_label_slicing_with_negative_step(self):
SLC = pd.IndexSlice
Expand Down

0 comments on commit 3d259e6

Please sign in to comment.