Skip to content

Commit

Permalink
Backport PR #51430 on branch 2.0.x (BUG: transpose not respecting CoW) (
Browse files Browse the repository at this point in the history
#51564)

Backport PR #51430: BUG: transpose not respecting CoW

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl committed Feb 23, 2023
1 parent 1070d3a commit 94cfc4a
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 2 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ Copy-on-Write improvements
- :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism
when ``inplace=True``.

- :meth:`DataFrame.transpose` will now respect the Copy-on-Write mechanism.

- Arithmetic operations that can be inplace, e.g. ``ser *= 2`` will now respect the
Copy-on-Write mechanism.

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3545,10 +3545,14 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
if self._can_fast_transpose:
# Note: tests pass without this, but this improves perf quite a bit.
new_vals = self._values.T
if copy:
if copy and not using_copy_on_write():
new_vals = new_vals.copy()

result = self._constructor(new_vals, index=self.columns, columns=self.index)
result = self._constructor(
new_vals, index=self.columns, columns=self.index, copy=False
)
if using_copy_on_write() and len(self) > 0:
result._mgr.add_references(self._mgr) # type: ignore[arg-type]

elif (
self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0])
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ def add_references(self, mgr: BaseBlockManager) -> None:
Adds the references from one manager to another. We assume that both
managers have the same block structure.
"""
if len(self.blocks) != len(mgr.blocks):
# If block structure changes, then we made a copy
return
for i, blk in enumerate(self.blocks):
blk.refs = mgr.blocks[i].refs
# Argument 1 to "add_reference" of "BlockValuesRefs" has incompatible type
Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1593,3 +1593,37 @@ def test_inplace_arithmetic_series_with_reference(using_copy_on_write):
tm.assert_series_equal(ser_orig, view)
else:
assert np.shares_memory(get_array(ser), get_array(view))


@pytest.mark.parametrize("copy", [True, False])
def test_transpose(using_copy_on_write, copy, using_array_manager):
df = DataFrame({"a": [1, 2, 3], "b": 1})
df_orig = df.copy()
result = df.transpose(copy=copy)

if not copy and not using_array_manager or using_copy_on_write:
assert np.shares_memory(get_array(df, "a"), get_array(result, 0))
else:
assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))

result.iloc[0, 0] = 100
if using_copy_on_write:
tm.assert_frame_equal(df, df_orig)


def test_transpose_different_dtypes(using_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": 1.5})
df_orig = df.copy()
result = df.T

assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
result.iloc[0, 0] = 100
if using_copy_on_write:
tm.assert_frame_equal(df, df_orig)


def test_transpose_ea_single_column(using_copy_on_write):
df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
result = df.T

assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))

0 comments on commit 94cfc4a

Please sign in to comment.