Skip to content

Commit

Permalink
Backport PR #52031 on branch 2.0.x (BUG-CoW: DataFrame constructed fr…
Browse files Browse the repository at this point in the history
…om Series not respecting CoW) (#52274)

Backport PR #52031: BUG-CoW: DataFrame constructed from Series not respecting CoW

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl committed Mar 29, 2023
1 parent 77627e3 commit 75d7af3
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 3 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@ Copy-on-Write improvements
of Series objects and specifying ``copy=False``, will now use a lazy copy
of those Series objects for the columns of the DataFrame (:issue:`50777`)

- The :class:`DataFrame` constructor, when constructing a DataFrame from a
:class:`Series` and specifying ``copy=False``, will now respect Copy-on-Write.

- The :class:`DataFrame` constructor, when constructing from a NumPy array,
will now copy the array by default to avoid mutating the :class:`DataFrame`
when mutating the array. Specify ``copy=False`` to get the old behavior.
Expand Down
20 changes: 17 additions & 3 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def ndarray_to_mgr(
copy_on_sanitize = False if typ == "array" else copy

vdtype = getattr(values, "dtype", None)
refs = None
if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype):
# GH#19157

Expand Down Expand Up @@ -290,7 +291,20 @@ def ndarray_to_mgr(
if values.ndim == 1:
values = values.reshape(-1, 1)

elif isinstance(values, (np.ndarray, ExtensionArray, ABCSeries, Index)):
elif isinstance(values, ABCSeries):
if not copy_on_sanitize and (
dtype is None or astype_is_view(values.dtype, dtype)
):
refs = values._references

if copy_on_sanitize:
values = values._values.copy()
else:
values = values._values

values = _ensure_2d(values)

elif isinstance(values, (np.ndarray, ExtensionArray, Index)):
# drop subclass info
_copy = (
copy_on_sanitize
Expand Down Expand Up @@ -360,11 +374,11 @@ def ndarray_to_mgr(
]
else:
bp = BlockPlacement(slice(len(columns)))
nb = new_block_2d(values, placement=bp)
nb = new_block_2d(values, placement=bp, refs=refs)
block_values = [nb]
else:
bp = BlockPlacement(slice(len(columns)))
nb = new_block_2d(values, placement=bp)
nb = new_block_2d(values, placement=bp, refs=refs)
block_values = [nb]

if len(columns) == 0:
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/copy_view/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,38 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
assert np.shares_memory(arr_before, arr_after)


@pytest.mark.parametrize(
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
)
def test_dataframe_from_series(using_copy_on_write, data, dtype):
ser = Series(data, dtype=dtype)
ser_orig = ser.copy()
df = DataFrame(ser, dtype=dtype)
assert np.shares_memory(get_array(ser), get_array(df, 0))
if using_copy_on_write:
assert not df._mgr._has_no_reference(0)

df.iloc[0, 0] = data[-1]
if using_copy_on_write:
tm.assert_series_equal(ser, ser_orig)


def test_dataframe_from_series_different_dtype(using_copy_on_write):
ser = Series([1, 2], dtype="int64")
df = DataFrame(ser, dtype="int32")
assert not np.shares_memory(get_array(ser), get_array(df, 0))
if using_copy_on_write:
assert df._mgr._has_no_reference(0)


def test_dataframe_from_series_infer_datetime(using_copy_on_write):
ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object)
df = DataFrame(ser)
assert not np.shares_memory(get_array(ser), get_array(df, 0))
if using_copy_on_write:
assert df._mgr._has_no_reference(0)


@pytest.mark.parametrize("index", [None, [0, 1, 2]])
def test_dataframe_from_dict_of_series_with_dtype(index):
# Variant of above, but now passing a dtype that causes a copy
Expand Down

0 comments on commit 75d7af3

Please sign in to comment.