Skip to content

Commit

Permalink
REF: better names, stricter typing for mgr/block indexing methods (#5…
Browse files Browse the repository at this point in the history
…3259)

* REF: tighter typing, better names for manager indexing methods

* REF: dont need _slice in SingleManager calls

* fix CoW test

* fix test
  • Loading branch information
jbrockmendel committed May 17, 2023
1 parent 3929ef0 commit 5c8c16e
Show file tree
Hide file tree
Showing 9 changed files with 52 additions and 56 deletions.
8 changes: 4 additions & 4 deletions pandas/_libs/internals.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import numpy as np

from pandas._typing import (
ArrayLike,
T,
Self,
npt,
)

Expand Down Expand Up @@ -76,12 +76,12 @@ class SharedBlock:
class NumpyBlock(SharedBlock):
values: np.ndarray
@final
def getitem_block_index(self: T, slicer: slice) -> T: ...
def slice_block_rows(self, slicer: slice) -> Self: ...

class NDArrayBackedBlock(SharedBlock):
values: NDArrayBackedExtensionArray
@final
def getitem_block_index(self: T, slicer: slice) -> T: ...
def slice_block_rows(self, slicer: slice) -> Self: ...

class Block(SharedBlock): ...

Expand All @@ -95,7 +95,7 @@ class BlockManager:
def __init__(
self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=...
) -> None: ...
def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ...
def get_slice(self, slobj: slice, axis: int = ...) -> Self: ...
def _rebuild_blknos_and_blklocs(self) -> None: ...

class BlockValuesRefs:
Expand Down
10 changes: 5 additions & 5 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ cdef class NumpyBlock(SharedBlock):
# set placement, ndim and refs
self.values = values

cpdef NumpyBlock getitem_block_index(self, slice slicer):
cpdef NumpyBlock slice_block_rows(self, slice slicer):
"""
Perform __getitem__-like specialized to slicing along index.
Expand Down Expand Up @@ -743,7 +743,7 @@ cdef class NDArrayBackedBlock(SharedBlock):
# set placement, ndim and refs
self.values = values

cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer):
cpdef NDArrayBackedBlock slice_block_rows(self, slice slicer):
"""
Perform __getitem__-like specialized to slicing along index.
Expand Down Expand Up @@ -899,15 +899,15 @@ cdef class BlockManager:
# -------------------------------------------------------------------
# Indexing

cdef BlockManager _get_index_slice(self, slice slobj):
cdef BlockManager _slice_mgr_rows(self, slice slobj):
cdef:
SharedBlock blk, nb
BlockManager mgr
ndarray blknos, blklocs

nbs = []
for blk in self.blocks:
nb = blk.getitem_block_index(slobj)
nb = blk.slice_block_rows(slobj)
nbs.append(nb)

new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
Expand All @@ -926,7 +926,7 @@ cdef class BlockManager:
if axis == 0:
new_blocks = self._slice_take_blocks_ax0(slobj)
elif axis == 1:
return self._get_index_slice(slobj)
return self._slice_mgr_rows(slobj)
else:
raise IndexError("Requested axis not found in manager")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,7 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleArrayManager:
new_index = self.index._getitem_slice(slobj)
return type(self)([new_array], [new_index], verify_integrity=False)

def getitem_mgr(self, indexer) -> SingleArrayManager:
def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> SingleArrayManager:
new_array = self.array[indexer]
new_index = self.index[indexer]
return type(self)([new_array], [new_index])
Expand Down
36 changes: 21 additions & 15 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
FillnaOptions,
IgnoreRaise,
QuantileInterpolation,
Self,
Shape,
npt,
)
Expand Down Expand Up @@ -259,36 +260,41 @@ def __len__(self) -> int:
return len(self.values)

@final
def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
def slice_block_columns(self, slc: slice) -> Self:
"""
Perform __getitem__-like, return result as block.
"""
new_mgr_locs = self._mgr_locs[slc]

new_values = self._slice(slc)
refs = self.refs
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)

@final
def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self:
"""
Perform __getitem__-like, return result as block.
Only supports slices that preserve dimensionality.
"""
# Note: the only place where we are called with ndarray[intp]
# is from internals.concat, and we can verify that never happens
# with 1-column blocks, i.e. never for ExtensionBlock.
# Note: only called from is from internals.concat, and we can verify
# that never happens with 1-column blocks, i.e. never for ExtensionBlock.

new_mgr_locs = self._mgr_locs[slicer]
new_mgr_locs = self._mgr_locs[indices]

new_values = self._slice(slicer)
refs = self.refs if isinstance(slicer, slice) else None
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
new_values = self._slice(indices)
return type(self)(new_values, new_mgr_locs, self.ndim, refs=None)

@final
def getitem_block_columns(
self, slicer: slice, new_mgr_locs: BlockPlacement
) -> Block:
) -> Self:
"""
Perform __getitem__-like, return result as block.
Only supports slices that preserve dimensionality.
"""
new_values = self._slice(slicer)

if new_values.ndim != self.values.ndim:
raise ValueError("Only same dim slicing is allowed")

return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs)

@final
Expand Down Expand Up @@ -1997,7 +2003,7 @@ def _slice(
-------
ExtensionArray
"""
# Notes: ndarray[bool] is only reachable when via getitem_mgr, which
# Notes: ndarray[bool] is only reachable when via get_rows_with_mask, which
# is only for Series, i.e. self.ndim == 1.

# return same dims as we currently have
Expand All @@ -2023,7 +2029,7 @@ def _slice(
return self.values[slicer]

@final
def getitem_block_index(self, slicer: slice) -> ExtensionBlock:
def slice_block_rows(self, slicer: slice) -> Self:
"""
Perform __getitem__-like specialized to slicing along index.
"""
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,10 @@ def _get_block_for_concat_plan(
slc = lib.maybe_indices_to_slice(ax0_blk_indexer, max_len)
# TODO: in all extant test cases 2023-04-08 we have a slice here.
# Will this always be the case?
nb = blk.getitem_block(slc)
if isinstance(slc, slice):
nb = blk.slice_block_columns(slc)
else:
nb = blk.take_block_columns(slc)

# assert nb.shape == (len(bp), mgr.shape[1])
return nb
Expand Down
20 changes: 5 additions & 15 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
import pandas.core.algorithms as algos
from pandas.core.arrays import DatetimeArray
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
import pandas.core.common as com
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
Expand Down Expand Up @@ -1872,7 +1871,7 @@ def concat_horizontal(cls, mgrs: list[Self], axes: list[Index]) -> Self:
# We need to do getitem_block here otherwise we would be altering
# blk.mgr_locs in place, which would render it invalid. This is only
# relevant in the copy=False case.
nb = blk.getitem_block(slice(None))
nb = blk.slice_block_columns(slice(None))
nb._mgr_locs = nb._mgr_locs.add(offset)
blocks.append(nb)

Expand Down Expand Up @@ -2018,21 +2017,12 @@ def _blklocs(self):
"""compat with BlockManager"""
return None

def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager:
def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Self:
# similar to get_slice, but not restricted to slice indexer
blk = self._block
if (
using_copy_on_write()
and isinstance(indexer, np.ndarray)
and len(indexer) > 0
and com.is_bool_indexer(indexer)
and indexer.all()
):
if using_copy_on_write() and len(indexer) > 0 and indexer.all():
return type(self)(blk.copy(deep=False), self.index)
array = blk._slice(indexer)
if array.ndim > 1:
# This will be caught by Series._get_values
raise ValueError("dimension-expanding indexing not allowed")
array = blk.values[indexer]

bp = BlockPlacement(slice(0, len(array)))
# TODO(CoW) in theory only need to track reference if new_array is a view
Expand All @@ -2048,7 +2038,7 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager:
raise IndexError("Requested axis not found in manager")

blk = self._block
array = blk._slice(slobj)
array = blk.values[slobj]
bp = BlockPlacement(slice(0, len(array)))
# TODO this method is only used in groupby SeriesSplitter at the moment,
# so passing refs is not yet covered by the tests
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,7 @@ def __getitem__(self, key):
if com.is_bool_indexer(key):
key = check_bool_indexer(self.index, key)
key = np.asarray(key, dtype=bool)
return self._get_values(key)
return self._get_rows_with_mask(key)

return self._get_with(key)

Expand Down Expand Up @@ -1060,8 +1060,8 @@ def _get_values_tuple(self, key: tuple):
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
return new_ser.__finalize__(self)

def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series:
new_mgr = self._mgr.getitem_mgr(indexer)
def _get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Series:
new_mgr = self._mgr.get_rows_with_mask(indexer)
return self._constructor(new_mgr, fastpath=True).__finalize__(self)

def _get_value(self, label, takeable: bool = False):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def test_getitem_slice(self, data):
assert isinstance(result, type(data))

def test_getitem_ellipsis_and_slice(self, data):
# GH#40353 this is called from getitem_block_index
# GH#40353 this is called from slice_block_rows
result = data[..., :]
self.assert_extension_array_equal(result, data)

Expand Down
19 changes: 8 additions & 11 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,12 +942,17 @@ def assert_slice_ok(mgr, axis, slobj):

if isinstance(slobj, slice):
sliced = mgr.get_slice(slobj, axis=axis)
elif mgr.ndim == 1 and axis == 0:
sliced = mgr.getitem_mgr(slobj)
elif (
mgr.ndim == 1
and axis == 0
and isinstance(slobj, np.ndarray)
and slobj.dtype == bool
):
sliced = mgr.get_rows_with_mask(slobj)
else:
# BlockManager doesn't support non-slice, SingleBlockManager
# doesn't support axis > 0
return
raise TypeError(slobj)

mat_slobj = (slice(None),) * axis + (slobj,)
tm.assert_numpy_array_equal(
Expand Down Expand Up @@ -978,14 +983,6 @@ def assert_slice_ok(mgr, axis, slobj):
mgr, ax, np.array([True, True, False], dtype=np.bool_)
)

# fancy indexer
assert_slice_ok(mgr, ax, [])
assert_slice_ok(mgr, ax, list(range(mgr.shape[ax])))

if mgr.shape[ax] >= 3:
assert_slice_ok(mgr, ax, [0, 1, 2])
assert_slice_ok(mgr, ax, [-1, -2, -3])

@pytest.mark.parametrize("mgr", MANAGERS)
def test_take(self, mgr):
def assert_take_ok(mgr, axis, indexer):
Expand Down

0 comments on commit 5c8c16e

Please sign in to comment.