Skip to content

Commit

Permalink
Backport PR #51239 on branch 2.0.x (API / CoW: constructing DataFrame…
Browse files Browse the repository at this point in the history
… from DataFrame/BlockManager creates lazy copy) (#51650)

Backport PR #51239: API / CoW: constructing DataFrame from DataFrame/BlockManager creates lazy copy

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl committed Feb 26, 2023
1 parent 9086bde commit 8906d4a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 0 deletions.
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ Copy-on-Write improvements
a modification to the data happens) when constructing a Series from an existing
Series with the default of ``copy=False`` (:issue:`50471`)

- The :class:`DataFrame` constructor will now create a lazy copy (deferring the copy until
a modification to the data happens) when constructing from an existing
:class:`DataFrame` with the default of ``copy=False`` (:issue:`51239`)

- The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary
of Series objects and specifying ``copy=False``, will now use a lazy copy
of those Series objects for the columns of the DataFrame (:issue:`50777`)
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,8 @@ def __init__(
data = data.copy(deep=False)

if isinstance(data, (BlockManager, ArrayManager)):
if using_copy_on_write():
data = data.copy(deep=False)
# first check if a Manager is passed without any other arguments
# -> use fastpath (without checking Manager type)
if index is None and columns is None and dtype is None and not copy:
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/copy_view/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,25 @@ def test_series_from_series_with_reindex(using_copy_on_write):
assert not result._mgr.blocks[0].refs.has_reference()


@pytest.mark.parametrize("func", [lambda x: x, lambda x: x._mgr])
@pytest.mark.parametrize("columns", [None, ["a"]])
def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, func):
df = DataFrame({"a": [1, 2, 3]})
df_orig = df.copy()

new_df = DataFrame(func(df))

assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
new_df.iloc[0] = 100

if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
tm.assert_frame_equal(df, df_orig)
else:
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
tm.assert_frame_equal(df, new_df)


@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
@pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
Expand Down

0 comments on commit 8906d4a

Please sign in to comment.