diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 07849702c646d..53432bbc84781 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -190,6 +190,7 @@ Backwards incompatible API changes Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median`) (:issue:`31485`) - :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`) - Passing an integer dtype other than ``int64`` to ``np.array(period_index, dtype=...)`` will now raise ``TypeError`` instead of incorrectly using ``int64`` (:issue:`32255`) +- Passing an invalid ``fill_value`` to :meth:`Categorical.take` raises a ``ValueError`` instead of ``TypeError`` (:issue:`33660`) ``MultiIndex.get_indexer`` interprets `method` argument differently ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py new file mode 100644 index 0000000000000..0ed9de804c55e --- /dev/null +++ b/pandas/core/arrays/_mixins.py @@ -0,0 +1,62 @@ +from typing import Any, Sequence, TypeVar + +import numpy as np + +from pandas.errors import AbstractMethodError + +from pandas.core.algorithms import take +from pandas.core.arrays.base import ExtensionArray + +_T = TypeVar("_T", bound="NDArrayBackedExtensionArray") + + +class NDArrayBackedExtensionArray(ExtensionArray): + """ + ExtensionArray that is backed by a single NumPy ndarray. + """ + + _ndarray: np.ndarray + + def _from_backing_data(self: _T, arr: np.ndarray) -> _T: + """ + Construct a new ExtensionArray `new_array` with `arr` as its _ndarray. + + This should round-trip: + self == self._from_backing_data(self._ndarray) + """ + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + + def take( + self: _T, + indices: Sequence[int], + allow_fill: bool = False, + fill_value: Any = None, + ) -> _T: + if allow_fill: + fill_value = self._validate_fill_value(fill_value) + + new_data = take( + self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value, + ) + return self._from_backing_data(new_data) + + def _validate_fill_value(self, fill_value): + """ + If a fill_value is passed to `take` convert it to a representation + suitable for self._ndarray, raising ValueError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : native representation + + Raises + ------ + ValueError + """ + raise AbstractMethodError(self) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index cdd0717849e96..0bb6ca8315a3a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -49,9 +49,10 @@ from pandas.core import ops from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algorithms -from pandas.core.algorithms import _get_data_algo, factorize, take, take_1d, unique1d +from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d from pandas.core.array_algos.transforms import shift -from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs +from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray +from pandas.core.arrays.base import _extension_array_shared_docs from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs import pandas.core.common as com from pandas.core.construction import array, extract_array, sanitize_array @@ -199,7 +200,7 @@ def contains(cat, key, container): return any(loc_ in container for loc_ in loc) -class Categorical(ExtensionArray, PandasObject): +class Categorical(NDArrayBackedExtensionArray, PandasObject): """ Represent a categorical variable in classic R / S-plus fashion. @@ -1238,7 +1239,7 @@ def shift(self, periods, fill_value=None): def _validate_fill_value(self, fill_value): """ - Convert a user-facing fill_value to a representation to use with our + Convert a user-facing fill_value to a representation to use with our underlying ndarray, raising ValueError if this is not possible. Parameters @@ -1768,7 +1769,7 @@ def fillna(self, value=None, method=None, limit=None): return self._constructor(codes, dtype=self.dtype, fastpath=True) - def take(self, indexer, allow_fill: bool = False, fill_value=None): + def take(self: _T, indexer, allow_fill: bool = False, fill_value=None) -> _T: """ Take elements from the Categorical. @@ -1837,16 +1838,23 @@ def take(self, indexer, allow_fill: bool = False, fill_value=None): Categories (2, object): [a, b] Specifying a fill value that's not in ``self.categories`` - will raise a ``TypeError``. + will raise a ``ValueError``. """ - indexer = np.asarray(indexer, dtype=np.intp) + return NDArrayBackedExtensionArray.take( + self, indexer, allow_fill=allow_fill, fill_value=fill_value + ) - if allow_fill: - # convert user-provided `fill_value` to codes - fill_value = self._validate_fill_value(fill_value) + # ------------------------------------------------------------------ + # NDArrayBackedExtensionArray compat - codes = take(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value) - return self._constructor(codes, dtype=self.dtype, fastpath=True) + @property + def _ndarray(self) -> np.ndarray: + return self._codes + + def _from_backing_data(self, arr: np.ndarray) -> "Categorical": + return self._constructor(arr, dtype=self.dtype, fastpath=True) + + # ------------------------------------------------------------------ def take_nd(self, indexer, allow_fill: bool = False, fill_value=None): # GH#27745 deprecate alias that other EAs dont have diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 27b2ed822a49f..f41964cc7b0c8 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -39,8 +39,9 @@ from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna from pandas.core import missing, nanops, ops -from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts +from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts from pandas.core.array_algos.transforms import shift +from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin import pandas.core.common as com from pandas.core.construction import array, extract_array @@ -425,7 +426,9 @@ def _with_freq(self, freq): return self -class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray): +class DatetimeLikeArrayMixin( + ExtensionOpsMixin, AttributesMixin, NDArrayBackedExtensionArray +): """ Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray @@ -437,6 +440,20 @@ class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray) _generate_range """ + # ------------------------------------------------------------------ + # NDArrayBackedExtensionArray compat + + @property + def _ndarray(self) -> np.ndarray: + # NB: A bunch of Interval tests fail if we use ._data + return self.asi8 + + def _from_backing_data(self: _T, arr: np.ndarray) -> _T: + # Note: we do not retain `freq` + return type(self)(arr, dtype=self.dtype) # type: ignore + + # ------------------------------------------------------------------ + @property def ndim(self) -> int: return self._data.ndim @@ -665,16 +682,6 @@ def unique(self): result = unique1d(self.asi8) return type(self)(result, dtype=self.dtype) - def take(self, indices, allow_fill=False, fill_value=None): - if allow_fill: - fill_value = self._validate_fill_value(fill_value) - - new_values = take( - self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value - ) - - return type(self)(new_values, dtype=self.dtype) - @classmethod def _concat_same_type(cls, to_concat, axis: int = 0):