Skip to content

Commit

Permalink
REF: rename PandasArray->NumpyExtensionArray (#54101)
Browse files Browse the repository at this point in the history
* REF: rename PandasArray->NumpyExtensionArray

* document PandasDtype->NumpyEADtype
  • Loading branch information
jbrockmendel committed Jul 18, 2023
1 parent f77a0e6 commit 3713834
Show file tree
Hide file tree
Showing 48 changed files with 293 additions and 252 deletions.
2 changes: 1 addition & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.api.indexers.VariableOffsetWindowIndexer \
pandas.api.extensions.ExtensionDtype \
pandas.api.extensions.ExtensionArray \
pandas.arrays.PandasArray \
pandas.arrays.NumpyExtensionArray \
pandas.api.extensions.ExtensionArray._accumulate \
pandas.api.extensions.ExtensionArray._concat_same_type \
pandas.api.extensions.ExtensionArray._formatter \
Expand Down
2 changes: 1 addition & 1 deletion doc/source/development/contributing_codebase.rst
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ be located.

8) Is your test for one of the pandas-provided ExtensionArrays (``Categorical``,
``DatetimeArray``, ``TimedeltaArray``, ``PeriodArray``, ``IntervalArray``,
``PandasArray``, ``FloatArray``, ``BoolArray``, ``StringArray``)?
``NumpyExtensionArray``, ``FloatArray``, ``BoolArray``, ``StringArray``)?
This test likely belongs in one of:

- tests.arrays
Expand Down
2 changes: 1 addition & 1 deletion doc/source/reference/extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ objects.
:template: autosummary/class_without_autosummary.rst

api.extensions.ExtensionArray
arrays.PandasArray
arrays.NumpyExtensionArray

.. We need this autosummary so that methods and attributes are generated.
.. Separate block, since they aren't classes.
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor

Other API changes
^^^^^^^^^^^^^^^^^
- :class:`arrays.PandasArray` has been renamed ``NumpyExtensionArray`` and the attached dtype name changed from ``PandasDtype`` to ``NumpyEADtype``; importing ``PandasArray`` still works until the next major version (:issue:`53694`)
-

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
from pandas.core.arrays import (
BaseMaskedArray,
ExtensionArray,
PandasArray,
NumpyExtensionArray,
)
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -307,7 +307,7 @@ def box_expected(expected, box_cls, transpose: bool = True):
if box_cls is pd.array:
if isinstance(expected, RangeIndex):
# pd.array would return an IntegerArray
expected = PandasArray(np.asarray(expected._values))
expected = NumpyExtensionArray(np.asarray(expected._values))
else:
expected = pd.array(expected, copy=False)
elif box_cls is Index:
Expand Down
6 changes: 3 additions & 3 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
PandasDtype,
NumpyEADtype,
)
from pandas.core.dtypes.missing import array_equivalent

Expand Down Expand Up @@ -577,12 +577,12 @@ def raise_assert_detail(

if isinstance(left, np.ndarray):
left = pprint_thing(left)
elif isinstance(left, (CategoricalDtype, PandasDtype, StringDtype)):
elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)):
left = repr(left)

if isinstance(right, np.ndarray):
right = pprint_thing(right)
elif isinstance(right, (CategoricalDtype, PandasDtype, StringDtype)):
elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)):
right = repr(right)

msg += f"""
Expand Down
21 changes: 19 additions & 2 deletions pandas/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
FloatingArray,
IntegerArray,
IntervalArray,
PandasArray,
NumpyExtensionArray,
PeriodArray,
SparseArray,
StringArray,
Expand All @@ -28,9 +28,26 @@
"FloatingArray",
"IntegerArray",
"IntervalArray",
"PandasArray",
"NumpyExtensionArray",
"PeriodArray",
"SparseArray",
"StringArray",
"TimedeltaArray",
]


def __getattr__(name: str):
if name == "PandasArray":
# GH#53694
import warnings

from pandas.util._exceptions import find_stack_level

warnings.warn(
"PandasArray has been renamed NumpyExtensionArray. Use that "
"instead. This alias will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
return NumpyExtensionArray
raise AttributeError(f"module 'pandas.arrays' has no attribute '{name}'")
6 changes: 3 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
BaseMaskedDtype,
CategoricalDtype,
ExtensionDtype,
PandasDtype,
NumpyEADtype,
)
from pandas.core.dtypes.generic import (
ABCDatetimeArray,
Expand Down Expand Up @@ -1439,8 +1439,8 @@ def diff(arr, n: int, axis: AxisInt = 0):
else:
op = operator.sub

if isinstance(dtype, PandasDtype):
# PandasArray cannot necessarily hold shifted versions of itself.
if isinstance(dtype, NumpyEADtype):
# NumpyExtensionArray cannot necessarily hold shifted versions of itself.
arr = arr.to_numpy()
dtype = arr.dtype

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pandas.core.arrays.integer import IntegerArray
from pandas.core.arrays.interval import IntervalArray
from pandas.core.arrays.masked import BaseMaskedArray
from pandas.core.arrays.numpy_ import PandasArray
from pandas.core.arrays.numpy_ import NumpyExtensionArray
from pandas.core.arrays.period import (
PeriodArray,
period_array,
Expand All @@ -34,7 +34,7 @@
"FloatingArray",
"IntegerArray",
"IntervalArray",
"PandasArray",
"NumpyExtensionArray",
"PeriodArray",
"period_array",
"SparseArray",
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,8 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
func(npvalues, limit=limit, mask=mask.T)
npvalues = npvalues.T

# TODO: PandasArray didn't used to copy, need tests for this
# TODO: NumpyExtensionArray didn't used to copy, need tests
# for this
new_values = self._from_backing_data(npvalues)
else:
# fill with value
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2645,18 +2645,18 @@ def _str_map(
# Optimization to apply the callable `f` to the categories once
# and rebuild the result by `take`ing from the result with the codes.
# Returns the same type as the object-dtype implementation though.
from pandas.core.arrays import PandasArray
from pandas.core.arrays import NumpyExtensionArray

categories = self.categories
codes = self.codes
result = PandasArray(categories.to_numpy())._str_map(f, na_value, dtype)
result = NumpyExtensionArray(categories.to_numpy())._str_map(f, na_value, dtype)
return take_nd(result, codes, fill_value=na_value)

def _str_get_dummies(self, sep: str = "|"):
# sep may not be in categories. Just bail on this.
from pandas.core.arrays import PandasArray
from pandas.core.arrays import NumpyExtensionArray

return PandasArray(self.astype(str))._str_get_dummies(sep)
return NumpyExtensionArray(self.astype(str))._str_get_dummies(sep)

# ------------------------------------------------------------------------
# GroupBy Methods
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,8 @@ def _validate_listlike(self, value, allow_object: bool = False):
msg = self._validation_error_message(value, True)
raise TypeError(msg)

# Do type inference if necessary up front (after unpacking PandasArray)
# Do type inference if necessary up front (after unpacking
# NumpyExtensionArray)
# e.g. we passed PeriodIndex.values and got an ndarray of Periods
value = extract_array(value, extract_numpy=True)
value = pd_array(value)
Expand Down
44 changes: 24 additions & 20 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pandas.core.dtypes.astype import astype_array
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import pandas_dtype
from pandas.core.dtypes.dtypes import PandasDtype
from pandas.core.dtypes.dtypes import NumpyEADtype
from pandas.core.dtypes.missing import isna

from pandas.core import (
Expand Down Expand Up @@ -48,7 +48,7 @@

# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
# incompatible with definition in base class "ExtensionArray"
class PandasArray( # type: ignore[misc]
class NumpyExtensionArray( # type: ignore[misc]
OpsMixin,
NDArrayBackedExtensionArray,
ObjectStringArrayMixin,
Expand Down Expand Up @@ -76,19 +76,21 @@ class PandasArray( # type: ignore[misc]
"""

# If you're wondering why pd.Series(cls) doesn't put the array in an
# ExtensionBlock, search for `ABCPandasArray`. We check for
# ExtensionBlock, search for `ABCNumpyExtensionArray`. We check for
# that _typ to ensure that users don't unnecessarily use EAs inside
# pandas internals, which turns off things like block consolidation.
_typ = "npy_extension"
__array_priority__ = 1000
_ndarray: np.ndarray
_dtype: PandasDtype
_dtype: NumpyEADtype
_internal_fill_value = np.nan

# ------------------------------------------------------------------------
# Constructors

def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None:
def __init__(
self, values: np.ndarray | NumpyExtensionArray, copy: bool = False
) -> None:
if isinstance(values, type(self)):
values = values._ndarray
if not isinstance(values, np.ndarray):
Expand All @@ -98,19 +100,19 @@ def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None

if values.ndim == 0:
# Technically we support 2, but do not advertise that fact.
raise ValueError("PandasArray must be 1-dimensional.")
raise ValueError("NumpyExtensionArray must be 1-dimensional.")

if copy:
values = values.copy()

dtype = PandasDtype(values.dtype)
dtype = NumpyEADtype(values.dtype)
super().__init__(values, dtype)

@classmethod
def _from_sequence(
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
) -> PandasArray:
if isinstance(dtype, PandasDtype):
) -> NumpyExtensionArray:
if isinstance(dtype, NumpyEADtype):
dtype = dtype._dtype

# error: Argument "dtype" to "asarray" has incompatible type
Expand All @@ -131,14 +133,14 @@ def _from_sequence(
result = result.copy()
return cls(result)

def _from_backing_data(self, arr: np.ndarray) -> PandasArray:
def _from_backing_data(self, arr: np.ndarray) -> NumpyExtensionArray:
return type(self)(arr)

# ------------------------------------------------------------------------
# Data

@property
def dtype(self) -> PandasDtype:
def dtype(self) -> NumpyEADtype:
return self._dtype

# ------------------------------------------------------------------------
Expand All @@ -151,7 +153,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# Lightly modified version of
# https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html
# The primary modification is not boxing scalar return values
# in PandasArray, since pandas' ExtensionArrays are 1-d.
# in NumpyExtensionArray, since pandas' ExtensionArrays are 1-d.
out = kwargs.get("out", ())

result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
Expand All @@ -175,10 +177,12 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
return result

# Defer to the implementation of the ufunc on unwrapped values.
inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs)
inputs = tuple(
x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in inputs
)
if out:
kwargs["out"] = tuple(
x._ndarray if isinstance(x, PandasArray) else x for x in out
x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in out
)
result = getattr(ufunc, method)(*inputs, **kwargs)

Expand Down Expand Up @@ -499,20 +503,20 @@ def to_numpy(
# ------------------------------------------------------------------------
# Ops

def __invert__(self) -> PandasArray:
def __invert__(self) -> NumpyExtensionArray:
return type(self)(~self._ndarray)

def __neg__(self) -> PandasArray:
def __neg__(self) -> NumpyExtensionArray:
return type(self)(-self._ndarray)

def __pos__(self) -> PandasArray:
def __pos__(self) -> NumpyExtensionArray:
return type(self)(+self._ndarray)

def __abs__(self) -> PandasArray:
def __abs__(self) -> NumpyExtensionArray:
return type(self)(abs(self._ndarray))

def _cmp_method(self, other, op):
if isinstance(other, PandasArray):
if isinstance(other, NumpyExtensionArray):
other = other._ndarray

other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
Expand All @@ -538,7 +542,7 @@ def _cmp_method(self, other, op):

def _wrap_ndarray_result(self, result: np.ndarray):
# If we have timedelta64[ns] result, return a TimedeltaArray instead
# of a PandasArray
# of a NumpyExtensionArray
if result.dtype.kind == "m" and is_supported_unit(
get_unit_from_dtype(result.dtype)
):
Expand Down

0 comments on commit 3713834

Please sign in to comment.