Skip to content

Commit

Permalink
REF: NDFrame dont mixin SelectionMixin (#40857)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Apr 10, 2021
1 parent 9373dbe commit 33fec60
Show file tree
Hide file tree
Showing 9 changed files with 108 additions and 117 deletions.
4 changes: 2 additions & 2 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,12 +918,12 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager
return pytest.raises(expected_exception, match=None)


cython_table = pd.core.base.SelectionMixin._cython_table.items()
cython_table = pd.core.common._cython_table.items()


def get_cython_table_params(ndframe, func_names_and_expected):
"""
Combine frame, functions from SelectionMixin._cython_table
Combine frame, functions from com._cython_table
keys and expected result.
Parameters
Expand Down
53 changes: 46 additions & 7 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from pandas.core.algorithms import safe_sort
from pandas.core.base import (
DataError,
SelectionMixin,
SpecificationError,
)
import pandas.core.common as com
Expand Down Expand Up @@ -173,7 +174,7 @@ def agg(self) -> FrameOrSeriesUnion | None:
return self.agg_list_like()

if callable(arg):
f = obj._get_cython_func(arg)
f = com.get_cython_func(arg)
if f and not args and not kwargs:
return getattr(obj, f)()

Expand Down Expand Up @@ -301,10 +302,10 @@ def transform_str_or_callable(self, func) -> FrameOrSeriesUnion:
kwargs = self.kwargs

if isinstance(func, str):
return obj._try_aggregate_string_function(func, *args, **kwargs)
return self._try_aggregate_string_function(obj, func, *args, **kwargs)

if not args and not kwargs:
f = obj._get_cython_func(func)
f = com.get_cython_func(func)
if f:
return getattr(obj, f)()

Expand All @@ -327,7 +328,10 @@ def agg_list_like(self) -> FrameOrSeriesUnion:
obj = self.obj
arg = cast(List[AggFuncTypeBase], self.f)

if obj._selected_obj.ndim == 1:
if not isinstance(obj, SelectionMixin):
# i.e. obj is Series or DataFrame
selected_obj = obj
elif obj._selected_obj.ndim == 1:
selected_obj = obj._selected_obj
else:
selected_obj = obj._obj_with_exclusions
Expand Down Expand Up @@ -406,13 +410,19 @@ def agg_dict_like(self) -> FrameOrSeriesUnion:
obj = self.obj
arg = cast(AggFuncTypeDict, self.f)

selected_obj = obj._selected_obj
if not isinstance(obj, SelectionMixin):
# i.e. obj is Series or DataFrame
selected_obj = obj
selection = None
else:
selected_obj = obj._selected_obj
selection = obj._selection

arg = self.normalize_dictlike_arg("agg", selected_obj, arg)

if selected_obj.ndim == 1:
# key only used for output
colg = obj._gotitem(obj._selection, ndim=1)
colg = obj._gotitem(selection, ndim=1)
results = {key: colg.agg(how) for key, how in arg.items()}
else:
# key used for column selection and output
Expand Down Expand Up @@ -486,7 +496,7 @@ def maybe_apply_str(self) -> FrameOrSeriesUnion | None:
self.kwargs["axis"] = self.axis
elif self.axis != 0:
raise ValueError(f"Operation {f} does not support axis=1")
return obj._try_aggregate_string_function(f, *self.args, **self.kwargs)
return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs)

def maybe_apply_multiple(self) -> FrameOrSeriesUnion | None:
"""
Expand Down Expand Up @@ -547,6 +557,35 @@ def normalize_dictlike_arg(
func = new_func
return func

def _try_aggregate_string_function(self, obj, arg: str, *args, **kwargs):
"""
if arg is a string, then try to operate on it:
- try to find a function (or attribute) on ourselves
- try to find a numpy function
- raise
"""
assert isinstance(arg, str)

f = getattr(obj, arg, None)
if f is not None:
if callable(f):
return f(*args, **kwargs)

# people may try to aggregate on a non-callable attribute
# but don't let them think they can pass args to it
assert len(args) == 0
assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
return f

f = getattr(np, arg, None)
if f is not None and hasattr(obj, "__array__"):
# in particular exclude Window
return f(obj, *args, **kwargs)

raise AttributeError(
f"'{arg}' is not a valid function for '{type(obj).__name__}' object"
)


class FrameApply(Apply):
obj: DataFrame
Expand Down
76 changes: 2 additions & 74 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
Base and utility classes for pandas objects.
"""

import builtins
import textwrap
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
FrozenSet,
Optional,
Expand Down Expand Up @@ -176,36 +174,6 @@ class SelectionMixin:
_internal_names = ["_cache", "__setstate__"]
_internal_names_set = set(_internal_names)

_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}

_cython_table = {
builtins.sum: "sum",
builtins.max: "max",
builtins.min: "min",
np.all: "all",
np.any: "any",
np.sum: "sum",
np.nansum: "sum",
np.mean: "mean",
np.nanmean: "mean",
np.prod: "prod",
np.nanprod: "prod",
np.std: "std",
np.nanstd: "std",
np.var: "var",
np.nanvar: "var",
np.median: "median",
np.nanmedian: "median",
np.max: "max",
np.nanmax: "max",
np.min: "min",
np.nanmin: "min",
np.cumprod: "cumprod",
np.nancumprod: "cumprod",
np.cumsum: "cumsum",
np.nancumsum: "cumsum",
}

@property
def _selection_name(self):
"""
Expand All @@ -216,6 +184,7 @@ def _selection_name(self):
"""
return self._selection

@final
@property
def _selection_list(self):
if not isinstance(
Expand All @@ -240,6 +209,7 @@ def _selected_obj(self):
def ndim(self) -> int:
return self._selected_obj.ndim

@final
@cache_readonly
def _obj_with_exclusions(self):
# error: "SelectionMixin" has no attribute "obj"
Expand Down Expand Up @@ -308,48 +278,6 @@ def aggregate(self, func, *args, **kwargs):

agg = aggregate

def _try_aggregate_string_function(self, arg: str, *args, **kwargs):
"""
if arg is a string, then try to operate on it:
- try to find a function (or attribute) on ourselves
- try to find a numpy function
- raise
"""
assert isinstance(arg, str)

f = getattr(self, arg, None)
if f is not None:
if callable(f):
return f(*args, **kwargs)

# people may try to aggregate on a non-callable attribute
# but don't let them think they can pass args to it
assert len(args) == 0
assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
return f

f = getattr(np, arg, None)
if f is not None and hasattr(self, "__array__"):
# in particular exclude Window
return f(self, *args, **kwargs)

raise AttributeError(
f"'{arg}' is not a valid function for '{type(self).__name__}' object"
)

def _get_cython_func(self, arg: Callable) -> Optional[str]:
"""
if we define an internal function for this argument, return it
"""
return self._cython_table.get(arg)

def _is_builtin_func(self, arg):
"""
if we define an builtin function for this argument, return it,
otherwise return the arg
"""
return self._builtin_table.get(arg, arg)


class IndexOpsMixin(OpsMixin):
"""
Expand Down
47 changes: 47 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
from __future__ import annotations

import builtins
from collections import (
abc,
defaultdict,
Expand Down Expand Up @@ -532,3 +533,49 @@ def require_length_match(data, index: Index):
"does not match length of index "
f"({len(index)})"
)


_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}

_cython_table = {
builtins.sum: "sum",
builtins.max: "max",
builtins.min: "min",
np.all: "all",
np.any: "any",
np.sum: "sum",
np.nansum: "sum",
np.mean: "mean",
np.nanmean: "mean",
np.prod: "prod",
np.nanprod: "prod",
np.std: "std",
np.nanstd: "std",
np.var: "var",
np.nanvar: "var",
np.median: "median",
np.nanmedian: "median",
np.max: "max",
np.nanmax: "max",
np.min: "min",
np.nanmin: "min",
np.cumprod: "cumprod",
np.nancumprod: "cumprod",
np.cumsum: "cumsum",
np.nancumsum: "cumsum",
}


def get_cython_func(arg: Callable) -> str | None:
"""
if we define an internal function for this argument, return it
"""
return _cython_table.get(arg)


def is_builtin_func(arg):
"""
if we define an builtin function for this argument, return it,
otherwise return the arg
"""
return _builtin_table.get(arg, arg)
19 changes: 2 additions & 17 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,7 @@
)
import pandas.core.algorithms as algos
from pandas.core.arrays import ExtensionArray
from pandas.core.base import (
PandasObject,
SelectionMixin,
)
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.construction import (
create_series_with_explicit_dtype,
Expand Down Expand Up @@ -187,7 +184,7 @@
bool_t = bool # Need alias because NDFrame has def bool:


class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin):
class NDFrame(PandasObject, indexing.IndexingMixin):
"""
N-dimensional analogue of DataFrame. Store multi-dimensional in a
size-mutable, labeled data structure
Expand Down Expand Up @@ -684,18 +681,6 @@ def size(self) -> int:
# error: Incompatible return value type (got "number", expected "int")
return np.prod(self.shape) # type: ignore[return-value]

@final
@property
def _selected_obj(self: FrameOrSeries) -> FrameOrSeries:
""" internal compat with SelectionMixin """
return self

@final
@property
def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries:
""" internal compat with SelectionMixin """
return self

@overload
def set_axis(
self: FrameOrSeries, labels, axis: Axis = ..., inplace: Literal[False] = ...
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
if relabeling:
ret.columns = columns
else:
cyfunc = self._get_cython_func(func)
cyfunc = com.get_cython_func(func)
if cyfunc and not args and not kwargs:
return getattr(self, cyfunc)()

Expand Down Expand Up @@ -536,7 +536,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
result.ravel(), index=data.index, name=data.name
)

func = self._get_cython_func(func) or func
func = com.get_cython_func(func) or func

if not isinstance(func, str):
return self._transform_general(func, *args, **kwargs)
Expand Down Expand Up @@ -1440,7 +1440,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
return self.obj._constructor(result, index=data.index, columns=data.columns)

# optimized transforms
func = self._get_cython_func(func) or func
func = com.get_cython_func(func) or func

if not isinstance(func, str):
return self._transform_general(func, *args, **kwargs)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, FrameOrSeries]]:
)
def apply(self, func, *args, **kwargs):

func = self._is_builtin_func(func)
func = com.is_builtin_func(func)

# this is needed so we don't try and wrap strings. If we could
# resolve functions to their callable functions prior, this
Expand Down Expand Up @@ -1205,7 +1205,7 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)

@final
def _python_agg_general(self, func, *args, **kwargs):
func = self._is_builtin_func(func)
func = com.is_builtin_func(func)
f = lambda x: func(x, *args, **kwargs)

# iterate through "columns" ex exclusions to populate output dict
Expand Down
Loading

0 comments on commit 33fec60

Please sign in to comment.