Skip to content

Commit

Permalink
REF: implement NDFrame._from_mgr (#52132)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Jun 25, 2023
1 parent 54bf475 commit 21ff2fb
Show file tree
Hide file tree
Showing 11 changed files with 155 additions and 73 deletions.
2 changes: 1 addition & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ def apply(self) -> DataFrame | Series:
with np.errstate(all="ignore"):
results = self.obj._mgr.apply("apply", func=self.func)
# _constructor will retain self.index and self.columns
return self.obj._constructor(data=results)
return self.obj._constructor_from_mgr(results, axes=results.axes)

# broadcasting
if self.result_type == "broadcast":
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def _reconstruct(result):
return result
if isinstance(result, BlockManager):
# we went through BlockManager.apply e.g. np.sqrt
result = self._constructor(result, **reconstruct_kwargs, copy=False)
result = self._constructor_from_mgr(result, axes=result.axes)
else:
# we converted an array, lost our axes
result = self._constructor(
Expand Down
65 changes: 43 additions & 22 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,8 +637,25 @@ class DataFrame(NDFrame, OpsMixin):
def _constructor(self) -> Callable[..., DataFrame]:
return DataFrame

def _constructor_from_mgr(self, mgr, axes):
if self._constructor is DataFrame:
# we are pandas.DataFrame (or a subclass that doesn't override _constructor)
return self._from_mgr(mgr, axes=axes)
else:
assert axes is mgr.axes
return self._constructor(mgr)

_constructor_sliced: Callable[..., Series] = Series

def _sliced_from_mgr(self, mgr, axes) -> Series:
return Series._from_mgr(mgr, axes)

def _constructor_sliced_from_mgr(self, mgr, axes):
if self._constructor_sliced is Series:
return self._sliced_from_mgr(mgr, axes)
assert axes is mgr.axes
return self._constructor_sliced(mgr)

# ----------------------------------------------------------------------
# Constructors

Expand Down Expand Up @@ -3668,9 +3685,9 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Series:

# if we are a copy, mark as such
copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None
result = self._constructor_sliced(new_mgr, name=self.index[i]).__finalize__(
self
)
result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes)
result._name = self.index[i]
result = result.__finalize__(self)
result._set_is_copy(self, copy=copy)
return result

Expand Down Expand Up @@ -3723,7 +3740,7 @@ def _getitem_nocopy(self, key: list):
copy=False,
only_slice=True,
)
return self._constructor(new_mgr)
return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)

def __getitem__(self, key):
check_dict_or_set_indexers(key)
Expand Down Expand Up @@ -4259,9 +4276,10 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
# Lookup in columns so that if e.g. a str datetime was passed
# we attach the Timestamp object as the name.
name = self.columns[loc]
klass = self._constructor_sliced
# We get index=self.index bc values is a SingleDataManager
return klass(values, name=name, fastpath=True).__finalize__(self)
obj = self._constructor_sliced_from_mgr(values, axes=values.axes)
obj._name = name
return obj.__finalize__(self)

# ----------------------------------------------------------------------
# Lookup Caching
Expand Down Expand Up @@ -4735,7 +4753,7 @@ def predicate(arr: ArrayLike) -> bool:
return True

mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
return type(self)(mgr).__finalize__(self)
return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self)

def insert(
self,
Expand Down Expand Up @@ -5547,7 +5565,7 @@ def shift(
fill_value=fill_value,
allow_dups=True,
)
res_df = self._constructor(mgr)
res_df = self._constructor_from_mgr(mgr, axes=mgr.axes)
return res_df.__finalize__(self, method="shift")

return super().shift(
Expand Down Expand Up @@ -6075,7 +6093,8 @@ class max type

@doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
def isna(self) -> DataFrame:
result = self._constructor(self._mgr.isna(func=isna))
res_mgr = self._mgr.isna(func=isna)
result = self._constructor_from_mgr(res_mgr, axes=res_mgr.axes)
return result.__finalize__(self, method="isna")

@doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
Expand Down Expand Up @@ -6787,7 +6806,7 @@ def sort_values(
self._get_block_manager_axis(axis), default_index(len(indexer))
)

result = self._constructor(new_data)
result = self._constructor_from_mgr(new_data, axes=new_data.axes)
if inplace:
return self._update_inplace(result)
else:
Expand Down Expand Up @@ -7481,7 +7500,7 @@ def _dispatch_frame_op(
if not is_list_like(right):
# i.e. scalar, faster than checking np.ndim(right) == 0
bm = self._mgr.apply(array_op, right=right)
return self._constructor(bm)
return self._constructor_from_mgr(bm, axes=bm.axes)

elif isinstance(right, DataFrame):
assert self.index.equals(right.index)
Expand All @@ -7501,7 +7520,7 @@ def _dispatch_frame_op(
right._mgr, # type: ignore[arg-type]
array_op,
)
return self._constructor(bm)
return self._constructor_from_mgr(bm, axes=bm.axes)

elif isinstance(right, Series) and axis == 1:
# axis=1 means we want to operate row-by-row
Expand Down Expand Up @@ -9480,7 +9499,8 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
axis = 0

new_data = self._mgr.diff(n=periods)
return self._constructor(new_data).__finalize__(self, "diff")
res_df = self._constructor_from_mgr(new_data, axes=new_data.axes)
return res_df.__finalize__(self, "diff")

# ----------------------------------------------------------------------
# Function application
Expand Down Expand Up @@ -10336,12 +10356,13 @@ def _series_round(ser: Series, decimals: int) -> Series:
# Dispatch to Block.round
# Argument "decimals" to "round" of "BaseBlockManager" has incompatible
# type "Union[int, integer[Any]]"; expected "int"
return self._constructor(
self._mgr.round(
decimals=decimals, # type: ignore[arg-type]
using_cow=using_copy_on_write(),
),
).__finalize__(self, method="round")
new_mgr = self._mgr.round(
decimals=decimals, # type: ignore[arg-type]
using_cow=using_copy_on_write(),
)
return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(
self, method="round"
)
else:
raise TypeError("decimals must be an integer, a dict-like or a Series")

Expand Down Expand Up @@ -10893,7 +10914,7 @@ def _get_data() -> DataFrame:
# After possibly _get_data and transposing, we are now in the
# simple case where we can use BlockManager.reduce
res = df._mgr.reduce(blk_func)
out = df._constructor(res).iloc[0]
out = df._constructor_from_mgr(res, axes=res.axes).iloc[0]
if out_dtype is not None:
out = out.astype(out_dtype)
elif (df._mgr.get_dtypes() == object).any():
Expand Down Expand Up @@ -11507,7 +11528,7 @@ def quantile(
res = data._mgr.take(indexer[q_idx], verify=False)
res.axes[1] = q

result = self._constructor(res)
result = self._constructor_from_mgr(res, axes=res.axes)
return result.__finalize__(self, method="quantile")

def to_timestamp(
Expand Down Expand Up @@ -11835,7 +11856,7 @@ def _to_dict_of_blocks(self, copy: bool = True):
mgr = mgr_to_mgr(mgr, "block")
mgr = cast(BlockManager, mgr)
return {
k: self._constructor(v).__finalize__(self)
k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self)
for k, v, in mgr.to_dict(copy=copy).items()
}

Expand Down

0 comments on commit 21ff2fb

Please sign in to comment.