Skip to content

Commit

Permalink
Fix an issue in setitem when RangeIndex is present and index does…
Browse files Browse the repository at this point in the history
…n't exist (#58)

Fixes: #13860

This PR fixes an issue in Series.setitem where if the index is a RangeIndex, we are corrupting the series data & index object by not properly handling RangeIndex separately and not making a copy.

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
  • Loading branch information
galipremsagar and bdice committed Oct 11, 2023
1 parent cb6abc5 commit 66fdbe7
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 1 deletion.
23 changes: 22 additions & 1 deletion python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,28 @@ def __setitem__(self, key, value):
and not isinstance(self._frame.index, cudf.MultiIndex)
and is_scalar(value)
):
_append_new_row_inplace(self._frame.index._values, key)
# TODO: Modifying index in place is bad because
# our index are immutable, but columns are not (which
# means our index are mutable with internal APIs).
# Get rid of the deep copy once columns too are
# immutable.
idx_copy = self._frame._index.copy(deep=True)
if (
isinstance(idx_copy, cudf.RangeIndex)
and isinstance(key, int)
and (key == idx_copy[-1] + idx_copy.step)
):
idx_copy = cudf.RangeIndex(
start=idx_copy.start,
stop=idx_copy.stop + idx_copy.step,
step=idx_copy.step,
name=idx_copy.name,
)
else:
if isinstance(idx_copy, cudf.RangeIndex):
idx_copy = idx_copy._as_int_index()
_append_new_row_inplace(idx_copy._values, key)
self._frame._index = idx_copy
_append_new_row_inplace(self._frame._column, value)
return
else:
Expand Down
41 changes: 41 additions & 0 deletions python/cudf/cudf/tests/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,3 +463,44 @@ def test_loc_setitem_series_index_alignment_13031(other_index):
cs.loc[["1", "3"]] = cother

assert_eq(s, cs)


@pytest.mark.parametrize(
"ps",
[
pd.Series([1, 2, 3], index=pd.RangeIndex(0, 3)),
pd.Series([1, 2, 3], index=pd.RangeIndex(start=2, stop=-1, step=-1)),
pd.Series([1, 2, 3], index=pd.RangeIndex(start=1, stop=6, step=2)),
pd.Series(
[1, 2, 3, 4, 5], index=pd.RangeIndex(start=1, stop=-9, step=-2)
),
pd.Series(
[1, 2, 3, 4, 5], index=pd.RangeIndex(start=1, stop=-12, step=-3)
),
pd.Series([1, 2, 3, 4], index=pd.RangeIndex(start=1, stop=14, step=4)),
pd.Series(
[1, 2, 3, 4], index=pd.RangeIndex(start=1, stop=-14, step=-4)
),
],
)
@pytest.mark.parametrize("arg", list(range(-20, 20)) + [5.6, 3.1])
def test_series_set_item_range_index(ps, arg):
gsr = cudf.from_pandas(ps)
psr = ps.copy(deep=True)
psr[arg] = 11
gsr[arg] = 11

assert_eq(psr, gsr, check_index_type=True)


def test_series_set_item_index_reference():
gs1 = cudf.Series([1], index=[7])
gs2 = cudf.Series([2], index=gs1.index)
gs1.loc[11] = 2

ps1 = pd.Series([1], index=[7])
ps2 = pd.Series([2], index=ps1.index)
ps1.loc[11] = 2

assert_eq(ps1, gs1)
assert_eq(ps2, gs2)

0 comments on commit 66fdbe7

Please sign in to comment.