Skip to content

Commit

Permalink
Backport PR #51464 on branch 2.0.x (CoW: Ignore copy=True when copy_o…
Browse files Browse the repository at this point in the history
…n_write is enabled) (#51849)

Backport PR #51464: CoW: Ignore copy=True when copy_on_write is enabled

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl committed Mar 8, 2023
1 parent 463c5a9 commit a5019e0
Show file tree
Hide file tree
Showing 13 changed files with 192 additions and 45 deletions.
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11028,7 +11028,7 @@ def to_timestamp(
>>> df2.index
DatetimeIndex(['2023-01-31', '2024-01-31'], dtype='datetime64[ns]', freq=None)
"""
new_obj = self.copy(deep=copy)
new_obj = self.copy(deep=copy and not using_copy_on_write())

axis_name = self._get_axis_name(axis)
old_ax = getattr(self, axis_name)
Expand Down Expand Up @@ -11085,7 +11085,7 @@ def to_period(
>>> idx.to_period("Y")
PeriodIndex(['2001', '2002', '2003'], dtype='period[A-DEC]')
"""
new_obj = self.copy(deep=copy)
new_obj = self.copy(deep=copy and not using_copy_on_write())

axis_name = self._get_axis_name(axis)
old_ax = getattr(self, axis_name)
Expand Down
33 changes: 23 additions & 10 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def set_flags(
>>> df2.flags.allows_duplicate_labels
False
"""
df = self.copy(deep=copy)
df = self.copy(deep=copy and not using_copy_on_write())
if allows_duplicate_labels is not None:
df.flags["allows_duplicate_labels"] = allows_duplicate_labels
return df
Expand Down Expand Up @@ -713,7 +713,7 @@ def _set_axis_nocheck(
else:
# With copy=False, we create a new object but don't copy the
# underlying data.
obj = self.copy(deep=copy)
obj = self.copy(deep=copy and not using_copy_on_write())
setattr(obj, obj._get_axis_name(axis), labels)
return obj

Expand Down Expand Up @@ -742,7 +742,7 @@ def swapaxes(
j = self._get_axis_number(axis2)

if i == j:
return self.copy(deep=copy)
return self.copy(deep=copy and not using_copy_on_write())

mapping = {i: j, j: i}

Expand Down Expand Up @@ -999,7 +999,7 @@ def _rename(
index = mapper

self._check_inplace_and_allows_duplicate_labels(inplace)
result = self if inplace else self.copy(deep=copy)
result = self if inplace else self.copy(deep=copy and not using_copy_on_write())

for axis_no, replacements in enumerate((index, columns)):
if replacements is None:
Expand Down Expand Up @@ -1215,6 +1215,9 @@ class name

inplace = validate_bool_kwarg(inplace, "inplace")

if copy and using_copy_on_write():
copy = False

if mapper is not lib.no_default:
# Use v0.23 behavior if a scalar or list
non_mapper = is_scalar(mapper) or (
Expand Down Expand Up @@ -5322,6 +5325,8 @@ def reindex(

# if all axes that are requested to reindex are equal, then only copy
# if indicated must have index names equal here as well as values
if copy and using_copy_on_write():
copy = False
if all(
self._get_axis(axis_name).identical(ax)
for axis_name, ax in axes.items()
Expand Down Expand Up @@ -5416,10 +5421,14 @@ def _reindex_with_indexers(
# If we've made a copy once, no need to make another one
copy = False

if (copy or copy is None) and new_data is self._mgr:
if (
(copy or copy is None)
and new_data is self._mgr
and not using_copy_on_write()
):
new_data = new_data.copy(deep=copy)
elif using_copy_on_write() and new_data is self._mgr:
new_data = new_data.copy(deep=copy)
new_data = new_data.copy(deep=False)

return self._constructor(new_data).__finalize__(self)

Expand Down Expand Up @@ -6239,6 +6248,9 @@ def astype(
2 2020-01-03
dtype: datetime64[ns]
"""
if copy and using_copy_on_write():
copy = False

if is_dict_like(dtype):
if self.ndim == 1: # i.e. Series
if len(dtype) > 1 or self.name not in dtype:
Expand Down Expand Up @@ -9499,6 +9511,8 @@ def _align_series(
fill_axis: Axis = 0,
):
is_series = isinstance(self, ABCSeries)
if copy and using_copy_on_write():
copy = False

if (not is_series and axis is None) or axis not in [None, 0, 1]:
raise ValueError("Must specify axis=0 or 1")
Expand Down Expand Up @@ -10261,8 +10275,7 @@ def truncate(
if isinstance(ax, MultiIndex):
setattr(result, self._get_axis_name(axis), ax.truncate(before, after))

if copy or (copy is None and not using_copy_on_write()):
result = result.copy(deep=copy)
result = result.copy(deep=copy and not using_copy_on_write())

return result

Expand Down Expand Up @@ -10343,7 +10356,7 @@ def _tz_convert(ax, tz):
raise ValueError(f"The level {level} is not valid")
ax = _tz_convert(ax, tz)

result = self.copy(deep=copy)
result = self.copy(deep=copy and not using_copy_on_write())
result = result.set_axis(ax, axis=axis, copy=False)
return result.__finalize__(self, method="tz_convert")

Expand Down Expand Up @@ -10525,7 +10538,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent):
raise ValueError(f"The level {level} is not valid")
ax = _tz_localize(ax, tz, ambiguous, nonexistent)

result = self.copy(deep=copy)
result = self.copy(deep=copy and not using_copy_on_write())
result = result.set_axis(ax, axis=axis, copy=False)
return result.__finalize__(self, method="tz_localize")

Expand Down
4 changes: 4 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,8 @@ def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") ->
copy = False
else:
copy = True
elif using_copy_on_write():
copy = False

return self.apply(
"astype",
Expand All @@ -457,6 +459,8 @@ def convert(self: T, copy: bool | None) -> T:
copy = False
else:
copy = True
elif using_copy_on_write():
copy = False

return self.apply("convert", copy=copy, using_cow=using_copy_on_write())

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,8 @@ def concat(
copy = False
else:
copy = True
elif copy and using_copy_on_write():
copy = False

op = _Concatenator(
objs,
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4129,7 +4129,7 @@ def swaplevel(
{examples}
"""
assert isinstance(self.index, MultiIndex)
result = self.copy(deep=copy)
result = self.copy(deep=copy and not using_copy_on_write())
result.index = self.index.swaplevel(i, j)
return result

Expand Down Expand Up @@ -5743,7 +5743,7 @@ def to_timestamp(
if not isinstance(self.index, PeriodIndex):
raise TypeError(f"unsupported Type {type(self.index).__name__}")

new_obj = self.copy(deep=copy)
new_obj = self.copy(deep=copy and not using_copy_on_write())
new_index = self.index.to_timestamp(freq=freq, how=how)
setattr(new_obj, "index", new_index)
return new_obj
Expand Down Expand Up @@ -5783,7 +5783,7 @@ def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series
if not isinstance(self.index, DatetimeIndex):
raise TypeError(f"unsupported Type {type(self.index).__name__}")

new_obj = self.copy(deep=copy)
new_obj = self.copy(deep=copy and not using_copy_on_write())
new_index = self.index.to_period(freq=freq)
setattr(new_obj, "index", new_index)
return new_obj
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/copy_view/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,21 @@ def test_concat_mixed_series_frame(using_copy_on_write):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("copy", [True, None, False])
def test_concat_copy_keyword(using_copy_on_write, copy):
df = DataFrame({"a": [1, 2]})
df2 = DataFrame({"b": [1.5, 2.5]})

result = concat([df, df2], axis=1, copy=copy)

if using_copy_on_write or copy is False:
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
else:
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))


@pytest.mark.parametrize(
"func",
[
Expand Down Expand Up @@ -280,3 +295,18 @@ def test_merge_on_key_enlarging_one(using_copy_on_write, func, how):
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
tm.assert_frame_equal(df1, df1_orig)
tm.assert_frame_equal(df2, df2_orig)


@pytest.mark.parametrize("copy", [True, None, False])
def test_merge_copy_keyword(using_copy_on_write, copy):
df = DataFrame({"a": [1, 2]})
df2 = DataFrame({"b": [3, 4.5]})

result = df.merge(df2, copy=copy, left_index=True, right_index=True)

if using_copy_on_write or copy is False:
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
else:
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
91 changes: 83 additions & 8 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def test_copy_shallow(using_copy_on_write):
lambda df, copy: df.rename(columns=str.lower, copy=copy),
lambda df, copy: df.reindex(columns=["a", "c"], copy=copy),
lambda df, copy: df.reindex_like(df, copy=copy),
lambda df, copy: df.align(df, copy=copy)[0],
lambda df, copy: df.set_axis(["a", "b", "c"], axis="index", copy=copy),
lambda df, copy: df.rename_axis(index="test", copy=copy),
lambda df, copy: df.rename_axis(columns="test", copy=copy),
Expand All @@ -84,6 +85,7 @@ def test_copy_shallow(using_copy_on_write):
"rename",
"reindex",
"reindex_like",
"align",
"set_axis",
"rename_axis0",
"rename_axis1",
Expand Down Expand Up @@ -115,22 +117,96 @@ def test_methods_copy_keyword(
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index)
df2 = method(df, copy=copy)

share_memory = (using_copy_on_write and copy is not True) or copy is False
share_memory = using_copy_on_write or copy is False

if request.node.callspec.id.startswith("reindex-"):
# TODO copy=False without CoW still returns a copy in this case
if not using_copy_on_write and not using_array_manager and copy is False:
share_memory = False
# TODO copy=True with CoW still returns a view
if using_copy_on_write:
share_memory = True

if share_memory:
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))


@pytest.mark.parametrize("copy", [True, None, False])
@pytest.mark.parametrize(
"method",
[
lambda ser, copy: ser.rename(index={0: 100}, copy=copy),
lambda ser, copy: ser.reindex(index=ser.index, copy=copy),
lambda ser, copy: ser.reindex_like(ser, copy=copy),
lambda ser, copy: ser.align(ser, copy=copy)[0],
lambda ser, copy: ser.set_axis(["a", "b", "c"], axis="index", copy=copy),
lambda ser, copy: ser.rename_axis(index="test", copy=copy),
lambda ser, copy: ser.astype("int64", copy=copy),
lambda ser, copy: ser.swaplevel(0, 1, copy=copy),
lambda ser, copy: ser.swapaxes(0, 0, copy=copy),
lambda ser, copy: ser.truncate(0, 5, copy=copy),
lambda ser, copy: ser.infer_objects(copy=copy),
lambda ser, copy: ser.to_timestamp(copy=copy),
lambda ser, copy: ser.to_period(freq="D", copy=copy),
lambda ser, copy: ser.tz_localize("US/Central", copy=copy),
lambda ser, copy: ser.tz_convert("US/Central", copy=copy),
lambda ser, copy: ser.set_flags(allows_duplicate_labels=False, copy=copy),
],
ids=[
"rename",
"reindex",
"reindex_like",
"align",
"set_axis",
"rename_axis0",
"astype",
"swaplevel",
"swapaxes",
"truncate",
"infer_objects",
"to_timestamp",
"to_period",
"tz_localize",
"tz_convert",
"set_flags",
],
)
def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write):
index = None
if "to_timestamp" in request.node.callspec.id:
index = period_range("2012-01-01", freq="D", periods=3)
elif "to_period" in request.node.callspec.id:
index = date_range("2012-01-01", freq="D", periods=3)
elif "tz_localize" in request.node.callspec.id:
index = date_range("2012-01-01", freq="D", periods=3)
elif "tz_convert" in request.node.callspec.id:
index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels")
elif "swaplevel" in request.node.callspec.id:
index = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]])

ser = Series([1, 2, 3], index=index)
ser2 = method(ser, copy=copy)

share_memory = using_copy_on_write or copy is False

if share_memory:
assert np.shares_memory(get_array(ser2), get_array(ser))
else:
assert not np.shares_memory(get_array(ser2), get_array(ser))


@pytest.mark.parametrize("copy", [True, None, False])
def test_transpose_copy_keyword(using_copy_on_write, copy, using_array_manager):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
result = df.transpose(copy=copy)
share_memory = using_copy_on_write or copy is False or copy is None
share_memory = share_memory and not using_array_manager

if share_memory:
assert np.shares_memory(get_array(df, "a"), get_array(result, 0))
else:
assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))


# -----------------------------------------------------------------------------
# DataFrame methods returning new DataFrame using shallow copy

Expand Down Expand Up @@ -1119,14 +1195,13 @@ def test_set_flags(using_copy_on_write):
tm.assert_series_equal(ser, expected)


@pytest.mark.parametrize("copy_kwargs", [{"copy": True}, {}])
@pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}])
def test_rename_axis(using_copy_on_write, kwargs, copy_kwargs):
def test_rename_axis(using_copy_on_write, kwargs):
df = DataFrame({"a": [1, 2, 3, 4]}, index=Index([1, 2, 3, 4], name="a"))
df_orig = df.copy()
df2 = df.rename_axis(**kwargs, **copy_kwargs)
df2 = df.rename_axis(**kwargs)

if using_copy_on_write and not copy_kwargs:
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,10 @@ def test_reindex_copies_ea(self, using_copy_on_write):

# pass both columns and index
result2 = df.reindex(columns=cols, index=df.index, copy=True)
assert not np.shares_memory(result2[0].array._data, df[0].array._data)
if using_copy_on_write:
assert np.shares_memory(result2[0].array._data, df[0].array._data)
else:
assert not np.shares_memory(result2[0].array._data, df[0].array._data)

@td.skip_array_manager_not_yet_implemented
def test_reindex_date_fill_value(self):
Expand Down

0 comments on commit a5019e0

Please sign in to comment.