Skip to content

Commit

Permalink
Backport PR #51659 on branch 2.0.x (ENH: Improve replace lazy copy fo…
Browse files Browse the repository at this point in the history
…r categoricals) (#51699)

Backport PR #51659: ENH: Improve replace lazy copy for categoricals

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl committed Mar 1, 2023
1 parent c047d9c commit 3c5fb2e
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 2 deletions.
3 changes: 1 addition & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,8 +701,7 @@ def replace_list(
# TODO: avoid special-casing
# GH49404
if using_cow and inplace:
# TODO(CoW): Optimize
blk = self.copy()
blk = self.copy(deep=self.refs.has_reference())
else:
blk = self if inplace else self.copy()
values = cast(Categorical, blk.values)
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/copy_view/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,38 @@ def test_replace_to_replace_wrong_dtype(using_copy_on_write):
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))


def test_replace_list_categorical(using_copy_on_write):
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
arr = get_array(df, "a")
df.replace(["c"], value="a", inplace=True)
assert np.shares_memory(arr.codes, get_array(df, "a").codes)
if using_copy_on_write:
assert df._mgr._has_no_reference(0)

df_orig = df.copy()
df2 = df.replace(["b"], value="a")
assert not np.shares_memory(arr.codes, get_array(df2, "a").codes)

tm.assert_frame_equal(df, df_orig)


def test_replace_list_inplace_refs_categorical(using_copy_on_write):
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
view = df[:]
df_orig = df.copy()
df.replace(["c"], value="a", inplace=True)
if using_copy_on_write:
assert not np.shares_memory(
get_array(view, "a").codes, get_array(df, "a").codes
)
tm.assert_frame_equal(df_orig, view)
else:
# This could be inplace
assert not np.shares_memory(
get_array(view, "a").codes, get_array(df, "a").codes
)


@pytest.mark.parametrize("to_replace", [1.5, [1.5], []])
def test_replace_inplace(using_copy_on_write, to_replace):
df = DataFrame({"a": [1.5, 2, 3]})
Expand Down

0 comments on commit 3c5fb2e

Please sign in to comment.