Skip to content

Commit

Permalink
Backport PR #51834: CoW: Set copy=False in internal usages of Series/…
Browse files Browse the repository at this point in the history
…DataFrame constructors (#52012)

CoW: Set copy=False in internal usages of Series/DataFrame constructors (#51834)

(cherry picked from commit c98b7c8)

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
jorisvandenbossche and phofl committed Mar 16, 2023
1 parent 548c7f7 commit d075fda
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 33 deletions.
44 changes: 30 additions & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1592,16 +1592,21 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series:

if isinstance(other, DataFrame):
return self._constructor(
np.dot(lvals, rvals), index=left.index, columns=other.columns
np.dot(lvals, rvals),
index=left.index,
columns=other.columns,
copy=False,
)
elif isinstance(other, Series):
return self._constructor_sliced(np.dot(lvals, rvals), index=left.index)
return self._constructor_sliced(
np.dot(lvals, rvals), index=left.index, copy=False
)
elif isinstance(rvals, (np.ndarray, Index)):
result = np.dot(lvals, rvals)
if result.ndim == 2:
return self._constructor(result, index=left.index)
return self._constructor(result, index=left.index, copy=False)
else:
return self._constructor_sliced(result, index=left.index)
return self._constructor_sliced(result, index=left.index, copy=False)
else: # pragma: no cover
raise TypeError(f"unsupported type: {type(other)}")

Expand Down Expand Up @@ -3571,9 +3576,15 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:

else:
new_arr = self.values.T
if copy:
if copy and not using_copy_on_write():
new_arr = new_arr.copy()
result = self._constructor(new_arr, index=self.columns, columns=self.index)
result = self._constructor(
new_arr,
index=self.columns,
columns=self.index,
# We already made a copy (more than one block)
copy=False,
)

return result.__finalize__(self, method="transpose")

Expand Down Expand Up @@ -3795,7 +3806,7 @@ def _getitem_multilevel(self, key):
else:
new_values = self._values[:, loc]
result = self._constructor(
new_values, index=self.index, columns=result_columns
new_values, index=self.index, columns=result_columns, copy=False
)
if using_copy_on_write() and isinstance(loc, slice):
result._mgr.add_references(self._mgr) # type: ignore[arg-type]
Expand Down Expand Up @@ -4029,7 +4040,7 @@ def _setitem_frame(self, key, value):
if isinstance(key, np.ndarray):
if key.shape != self.shape:
raise ValueError("Array conditional must be same shape as self")
key = self._constructor(key, **self._construct_axes_dict())
key = self._constructor(key, **self._construct_axes_dict(), copy=False)

if key.size and not all(is_bool_dtype(dtype) for dtype in key.dtypes):
raise TypeError(
Expand Down Expand Up @@ -4939,7 +4950,9 @@ def _reindex_multi(
# condition more specific.
indexer = row_indexer, col_indexer
new_values = take_2d_multi(self.values, indexer, fill_value=fill_value)
return self._constructor(new_values, index=new_index, columns=new_columns)
return self._constructor(
new_values, index=new_index, columns=new_columns, copy=False
)
else:
return self._reindex_with_indexers(
{0: [new_index, row_indexer], 1: [new_columns, col_indexer]},
Expand Down Expand Up @@ -10060,7 +10073,7 @@ def corr(
f"'{method}' was supplied"
)

result = self._constructor(correl, index=idx, columns=cols)
result = self._constructor(correl, index=idx, columns=cols, copy=False)
return result.__finalize__(self, method="corr")

def cov(
Expand Down Expand Up @@ -10191,7 +10204,7 @@ def cov(
else:
base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods)

result = self._constructor(base_cov, index=idx, columns=cols)
result = self._constructor(base_cov, index=idx, columns=cols, copy=False)
return result.__finalize__(self, method="cov")

def corrwith(
Expand Down Expand Up @@ -10304,7 +10317,9 @@ def c(x):
return nanops.nancorr(x[0], x[1], method=method)

correl = self._constructor_sliced(
map(c, zip(left.values.T, right.values.T)), index=left.columns
map(c, zip(left.values.T, right.values.T)),
index=left.columns,
copy=False,
)

else:
Expand Down Expand Up @@ -10415,7 +10430,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False):
series_counts = notna(frame).sum(axis=axis)
counts = series_counts._values
result = self._constructor_sliced(
counts, index=frame._get_agg_axis(axis)
counts, index=frame._get_agg_axis(axis), copy=False
)

return result.astype("int64").__finalize__(self, method="count")
Expand Down Expand Up @@ -10524,7 +10539,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
middle = func(arr, axis=0, skipna=skipna)
result = ufunc(result, middle)

res_ser = self._constructor_sliced(result, index=self.index)
res_ser = self._constructor_sliced(result, index=self.index, copy=False)
return res_ser

def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
Expand Down Expand Up @@ -11206,6 +11221,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
).reshape(self.shape),
self.index,
self.columns,
copy=False,
)
return result.__finalize__(self, method="isin")

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,8 @@ def swapaxes(
return self._constructor(
new_values,
*new_axes,
# The no-copy case for CoW is handled above
copy=False,
).__finalize__(self, method="swapaxes")

@final
Expand Down Expand Up @@ -9629,7 +9631,7 @@ def _where(
cond = np.asanyarray(cond)
if cond.shape != self.shape:
raise ValueError("Array conditional must be same shape as self")
cond = self._constructor(cond, **self._construct_axes_dict())
cond = self._constructor(cond, **self._construct_axes_dict(), copy=False)

# make sure we are boolean
fill_value = bool(inplace)
Expand Down Expand Up @@ -9704,7 +9706,9 @@ def _where(

# we are the same shape, so create an actual object for alignment
else:
other = self._constructor(other, **self._construct_axes_dict())
other = self._constructor(
other, **self._construct_axes_dict(), copy=False
)

if axis is None:
axis = 0
Expand Down
38 changes: 21 additions & 17 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ def view(self, dtype: Dtype | None = None) -> Series:
# self.array instead of self._values so we piggyback on PandasArray
# implementation
res_values = self.array.view(dtype)
res_ser = self._constructor(res_values, index=self.index)
res_ser = self._constructor(res_values, index=self.index, copy=False)
if isinstance(res_ser._mgr, SingleBlockManager) and using_copy_on_write():
blk = res_ser._mgr._block
blk.refs = cast("BlockValuesRefs", self._references)
Expand Down Expand Up @@ -1073,7 +1073,7 @@ def _get_values_tuple(self, key: tuple):

# If key is contained, would have returned by now
indexer, new_index = self.index.get_loc_level(key)
new_ser = self._constructor(self._values[indexer], index=new_index)
new_ser = self._constructor(self._values[indexer], index=new_index, copy=False)
if using_copy_on_write() and isinstance(indexer, slice):
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
return new_ser.__finalize__(self)
Expand Down Expand Up @@ -1113,7 +1113,9 @@ def _get_value(self, label, takeable: bool = False):

new_index = mi[loc]
new_index = maybe_droplevels(new_index, label)
new_ser = self._constructor(new_values, index=new_index, name=self.name)
new_ser = self._constructor(
new_values, index=new_index, name=self.name, copy=False
)
if using_copy_on_write() and isinstance(loc, slice):
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
return new_ser.__finalize__(self)
Expand Down Expand Up @@ -1413,7 +1415,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series:
nv.validate_repeat((), {"axis": axis})
new_index = self.index.repeat(repeats)
new_values = self._values.repeat(repeats)
return self._constructor(new_values, index=new_index).__finalize__(
return self._constructor(new_values, index=new_index, copy=False).__finalize__(
self, method="repeat"
)

Expand Down Expand Up @@ -1579,7 +1581,7 @@ def reset_index(
self.index = new_index
else:
return self._constructor(
self._values.copy(), index=new_index
self._values.copy(), index=new_index, copy=False
).__finalize__(self, method="reset_index")
elif inplace:
raise TypeError(
Expand Down Expand Up @@ -2101,7 +2103,7 @@ def mode(self, dropna: bool = True) -> Series:

# Ensure index is type stable (should always use int index)
return self._constructor(
res_values, index=range(len(res_values)), name=self.name
res_values, index=range(len(res_values)), name=self.name, copy=False
)

def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation
Expand Down Expand Up @@ -2365,7 +2367,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series:
dtype: bool
"""
res = self._duplicated(keep=keep)
result = self._constructor(res, index=self.index)
result = self._constructor(res, index=self.index, copy=False)
return result.__finalize__(self, method="duplicated")

def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
Expand Down Expand Up @@ -2543,7 +2545,7 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series:
"""
nv.validate_round(args, kwargs)
result = self._values.round(decimals)
result = self._constructor(result, index=self.index).__finalize__(
result = self._constructor(result, index=self.index, copy=False).__finalize__(
self, method="round"
)

Expand Down Expand Up @@ -2844,7 +2846,7 @@ def diff(self, periods: int = 1) -> Series:
{examples}
"""
result = algorithms.diff(self._values, periods)
return self._constructor(result, index=self.index).__finalize__(
return self._constructor(result, index=self.index, copy=False).__finalize__(
self, method="diff"
)

Expand Down Expand Up @@ -2962,7 +2964,7 @@ def dot(self, other: AnyArrayLike) -> Series | np.ndarray:

if isinstance(other, ABCDataFrame):
return self._constructor(
np.dot(lvals, rvals), index=other.columns
np.dot(lvals, rvals), index=other.columns, copy=False
).__finalize__(self, method="dot")
elif isinstance(other, Series):
return np.dot(lvals, rvals)
Expand Down Expand Up @@ -3264,7 +3266,7 @@ def combine(
# try_float=False is to match agg_series
npvalues = lib.maybe_convert_objects(new_values, try_float=False)
res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
return self._constructor(res_values, index=new_index, name=new_name)
return self._constructor(res_values, index=new_index, name=new_name, copy=False)

def combine_first(self, other) -> Series:
"""
Expand Down Expand Up @@ -3615,7 +3617,7 @@ def sort_values(
return self.copy(deep=None)

result = self._constructor(
self._values[sorted_index], index=self.index[sorted_index]
self._values[sorted_index], index=self.index[sorted_index], copy=False
)

if ignore_index:
Expand Down Expand Up @@ -3863,7 +3865,9 @@ def argsort(
else:
result = np.argsort(values, kind=kind)

res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp)
res = self._constructor(
result, index=self.index, name=self.name, dtype=np.intp, copy=False
)
return res.__finalize__(self, method="argsort")

def nlargest(
Expand Down Expand Up @@ -4238,7 +4242,7 @@ def explode(self, ignore_index: bool = False) -> Series:
else:
index = self.index.repeat(counts)

return self._constructor(values, index=index, name=self.name)
return self._constructor(values, index=index, name=self.name, copy=False)

def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame:
"""
Expand Down Expand Up @@ -4369,7 +4373,7 @@ def map(
dtype: object
"""
new_values = self._map_values(arg, na_action=na_action)
return self._constructor(new_values, index=self.index).__finalize__(
return self._constructor(new_values, index=self.index, copy=False).__finalize__(
self, method="map"
)

Expand Down Expand Up @@ -4663,7 +4667,7 @@ def _reindex_indexer(
new_values = algorithms.take_nd(
self._values, indexer, allow_fill=True, fill_value=None
)
return self._constructor(new_values, index=new_index)
return self._constructor(new_values, index=new_index, copy=False)

def _needs_reindex_multi(self, axes, method, level) -> bool:
"""
Expand Down Expand Up @@ -5378,7 +5382,7 @@ def isin(self, values) -> Series:
dtype: bool
"""
result = algorithms.isin(self._values, values)
return self._constructor(result, index=self.index).__finalize__(
return self._constructor(result, index=self.index, copy=False).__finalize__(
self, method="isin"
)

Expand Down

0 comments on commit d075fda

Please sign in to comment.