Skip to content

Commit

Permalink
Backport PR #52276 on branch 2.0.x (API / CoW: Respect CoW for DataFr…
Browse files Browse the repository at this point in the history
…ame(Index)) (#52289)

Backport PR #52276: API / CoW: Respect CoW for DataFrame(Index)

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl committed Mar 30, 2023
1 parent bca610a commit f7917ca
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 13 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ Copy-on-Write improvements
of those Series objects for the columns of the DataFrame (:issue:`50777`)

- The :class:`DataFrame` constructor, when constructing a DataFrame from a
:class:`Series` and specifying ``copy=False``, will now respect Copy-on-Write.
:class:`Series` or :class:`Index` and specifying ``copy=False``, will
now respect Copy-on-Write.

- The :class:`DataFrame` and :class:`Series` constructors, when constructing from
a NumPy array, will now copy the array by default to avoid mutating
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def ndarray_to_mgr(
if values.ndim == 1:
values = values.reshape(-1, 1)

elif isinstance(values, ABCSeries):
elif isinstance(values, (ABCSeries, Index)):
if not copy_on_sanitize and (
dtype is None or astype_is_view(values.dtype, dtype)
):
Expand All @@ -304,7 +304,7 @@ def ndarray_to_mgr(

values = _ensure_2d(values)

elif isinstance(values, (np.ndarray, ExtensionArray, Index)):
elif isinstance(values, (np.ndarray, ExtensionArray)):
# drop subclass info
_copy = (
copy_on_sanitize
Expand Down
22 changes: 12 additions & 10 deletions pandas/tests/copy_view/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,26 +260,28 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
assert np.shares_memory(arr_before, arr_after)


@pytest.mark.parametrize("cons", [Series, Index])
@pytest.mark.parametrize(
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
)
def test_dataframe_from_series(using_copy_on_write, data, dtype):
ser = Series(data, dtype=dtype)
ser_orig = ser.copy()
df = DataFrame(ser, dtype=dtype)
assert np.shares_memory(get_array(ser), get_array(df, 0))
def test_dataframe_from_series_or_index(using_copy_on_write, data, dtype, cons):
obj = cons(data, dtype=dtype)
obj_orig = obj.copy()
df = DataFrame(obj, dtype=dtype)
assert np.shares_memory(get_array(obj), get_array(df, 0))
if using_copy_on_write:
assert not df._mgr._has_no_reference(0)

df.iloc[0, 0] = data[-1]
if using_copy_on_write:
tm.assert_series_equal(ser, ser_orig)
tm.assert_equal(obj, obj_orig)


def test_dataframe_from_series_different_dtype(using_copy_on_write):
ser = Series([1, 2], dtype="int64")
df = DataFrame(ser, dtype="int32")
assert not np.shares_memory(get_array(ser), get_array(df, 0))
@pytest.mark.parametrize("cons", [Series, Index])
def test_dataframe_from_series_or_index_different_dtype(using_copy_on_write, cons):
obj = cons([1, 2], dtype="int64")
df = DataFrame(obj, dtype="int32")
assert not np.shares_memory(get_array(obj), get_array(df, 0))
if using_copy_on_write:
assert df._mgr._has_no_reference(0)

Expand Down

0 comments on commit f7917ca

Please sign in to comment.