Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 91 additions & 65 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,12 @@ def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray:
"""
return arg

def _unwrap_setitem_indexer(self, indexer):
"""
For compatibility with 1D-only ExtensionArrays.
"""
return indexer

def setitem(self, indexer, value):
"""
Attempt self.values[indexer] = value, possibly creating a new array.
Expand Down Expand Up @@ -1344,6 +1350,45 @@ class EABackedBlock(Block):

values: ExtensionArray

def setitem(self, indexer, value):
"""
Attempt self.values[indexer] = value, possibly creating a new array.

This differs from Block.setitem by not allowing setitem to change
the dtype of the Block.

Parameters
----------
indexer : tuple, list-like, array-like, slice, int
The subset of self.values to set
value : object
The value being set

Returns
-------
Block

Notes
-----
`indexer` is a direct slice/positional indexer. `value` must
be a compatible shape.
"""
if not self._can_hold_element(value):
# see TestSetitemFloatIntervalWithIntIntervalValues
nb = self.coerce_to_target_dtype(value)
return nb.setitem(indexer, value)

indexer = self._unwrap_setitem_indexer(indexer)
value = self._maybe_squeeze_arg(value)

values = self.values
if values.ndim == 2:
# TODO: string[pyarrow] tests break if we transpose unconditionally
values = values.T
check_setitem_lengths(indexer, value, values)
values[indexer] = value
return self

def where(self, other, cond) -> list[Block]:
arr = self.values.T

Expand Down Expand Up @@ -1543,75 +1588,68 @@ def _maybe_squeeze_arg(self, arg):
If necessary, squeeze a (N, 1) ndarray to (N,)
"""
# e.g. if we are passed a 2D mask for putmask
if isinstance(arg, np.ndarray) and arg.ndim == self.values.ndim + 1:
if (
isinstance(arg, (np.ndarray, ExtensionArray))
and arg.ndim == self.values.ndim + 1
):
# TODO(EA2D): unnecessary with 2D EAs
assert arg.shape[1] == 1
arg = arg[:, 0]
return arg

@property
def is_view(self) -> bool:
"""Extension arrays are never treated as views."""
return False
# error: No overload variant of "__getitem__" of "ExtensionArray"
# matches argument type "Tuple[slice, int]"
arg = arg[:, 0] # type:ignore[call-overload]
elif isinstance(arg, ABCDataFrame):
# 2022-01-06 only reached for setitem
# TODO: should we avoid getting here with DataFrame?
assert arg.shape[1] == 1
arg = arg._ixs(0, axis=1)._values

@cache_readonly
def is_numeric(self):
return self.values.dtype._is_numeric
return arg

def setitem(self, indexer, value):
def _unwrap_setitem_indexer(self, indexer):
"""
Attempt self.values[indexer] = value, possibly creating a new array.

This differs from Block.setitem by not allowing setitem to change
the dtype of the Block.

Parameters
----------
indexer : tuple, list-like, array-like, slice, int
The subset of self.values to set
value : object
The value being set

Returns
-------
Block
Adapt a 2D-indexer to our 1D values.

Notes
-----
`indexer` is a direct slice/positional indexer. `value` must
be a compatible shape.
This is intended for 'setitem', not 'iget' or '_slice'.
"""
if not self._can_hold_element(value):
# see TestSetitemFloatIntervalWithIntIntervalValues
return self.coerce_to_target_dtype(value).setitem(indexer, value)
# TODO: ATM this doesn't work for iget/_slice, can we change that?

if isinstance(indexer, tuple):
# TODO(EA2D): not needed with 2D EAs
# we are always 1-D
indexer = indexer[0]
if isinstance(indexer, np.ndarray) and indexer.ndim == 2:
# GH#44703
if indexer.shape[1] != 1:
# Should never have length > 2. Caller is responsible for checking.
# Length 1 is reached vis setitem_single_block and setitem_single_column
# each of which pass indexer=(pi,)
if len(indexer) == 2:

if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer):
# GH#44703 went through indexing.maybe_convert_ix
first, second = indexer
if not (
second.size == 1 and (second == 0).all() and first.shape[1] == 1
):
raise NotImplementedError(
"This should not be reached. Please report a bug at "
"github.com/pandas-dev/pandas/"
)
indexer = first[:, 0]

elif lib.is_integer(indexer[1]) and indexer[1] == 0:
# reached via setitem_single_block passing the whole indexer
indexer = indexer[0]
else:
raise NotImplementedError(
"This should not be reached. Please report a bug at "
"github.com/pandas-dev/pandas/"
)
indexer = indexer[:, 0]
return indexer

# TODO(EA2D): not needed with 2D EAS
if isinstance(value, (np.ndarray, ExtensionArray)) and value.ndim == 2:
assert value.shape[1] == 1
# error: No overload variant of "__getitem__" of "ExtensionArray"
# matches argument type "Tuple[slice, int]"
value = value[:, 0] # type: ignore[call-overload]
elif isinstance(value, ABCDataFrame):
# TODO: should we avoid getting here with DataFrame?
assert value.shape[1] == 1
value = value._ixs(0, axis=1)._values
@property
def is_view(self) -> bool:
"""Extension arrays are never treated as views."""
return False

check_setitem_lengths(indexer, value, self.values)
self.values[indexer] = value
return self
@cache_readonly
def is_numeric(self):
return self.values.dtype._is_numeric

def take_nd(
self,
Expand Down Expand Up @@ -1789,18 +1827,6 @@ def is_view(self) -> bool:
# check the ndarray values of the DatetimeIndex values
return self.values._ndarray.base is not None

def setitem(self, indexer, value):
if not self._can_hold_element(value):
return self.coerce_to_target_dtype(value).setitem(indexer, value)

values = self.values
if self.ndim > 1:
# Dont transpose with ndim=1 bc we would fail to invalidate
# arr.freq
values = values.T
values[indexer] = value
return self

def diff(self, n: int, axis: int = 0) -> list[Block]:
"""
1st discrete difference.
Expand Down