Skip to content

BUG:  #55624

@bkowshik

Description

@bkowshik

Pandas version checks

  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

Example


>>> merged_df.dtypes

row_id                           int64
series_id                       object
step                             int32
event                           object
score                          float16
timestamp    datetime64[ns, UTC-04:00]
yyyymmdd                        object
dtype: object

Issue Description

>>> merged_df.sort_values(by=['score', 'yyyymmdd'])

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
Cell In[17], line 1
----> 1 merged_df.sort_values(by=['score', 'yyyymmdd'])

File /opt/conda/lib/python3.10/site-packages/pandas/core/frame.py:6751, in DataFrame.sort_values(self, by, axis, ascending, inplace, kind, na_position, ignore_index, key)
   6743     if key is not None:
   6744         # error: List comprehension has incompatible type List[Series];
   6745         # expected List[ndarray]
   6746         keys = [
   6747             Series(k, name=name)  # type: ignore[misc]
   6748             for (k, name) in zip(keys, by)
   6749         ]
-> 6751     indexer = lexsort_indexer(
   6752         keys, orders=ascending, na_position=na_position, key=key
   6753     )
   6754 elif len(by):
   6755     # len(by) == 1
   6757     by = by[0]

File /opt/conda/lib/python3.10/site-packages/pandas/core/sorting.py:341, in lexsort_indexer(keys, orders, na_position, key)
    338 keys = [ensure_key_mapped(k, key) for k in keys]
    340 for k, order in zip(keys, orders):
--> 341     cat = Categorical(k, ordered=True)
    343     if na_position not in ["last", "first"]:
    344         raise ValueError(f"invalid na_position: {na_position}")

File /opt/conda/lib/python3.10/site-packages/pandas/core/arrays/categorical.py:438, in Categorical.__init__(self, values, categories, ordered, dtype, fastpath, copy)
    431             raise TypeError(
    432                 "'values' is not ordered, please "
    433                 "explicitly specify the categories order "
    434                 "by passing in a categories argument."
    435             ) from err
    437     # we're inferring from values
--> 438     dtype = CategoricalDtype(categories, dtype.ordered)
    440 elif is_categorical_dtype(values.dtype):
    441     old_codes = extract_array(values)._codes

File /opt/conda/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:187, in CategoricalDtype.__init__(self, categories, ordered)
    186 def __init__(self, categories=None, ordered: Ordered = False) -> None:
--> 187     self._finalize(categories, ordered, fastpath=False)

File /opt/conda/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:344, in CategoricalDtype._finalize(self, categories, ordered, fastpath)
    341     self.validate_ordered(ordered)
    343 if categories is not None:
--> 344     categories = self.validate_categories(categories, fastpath=fastpath)
    346 self._categories = categories
    347 self._ordered = ordered

File /opt/conda/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:533, in CategoricalDtype.validate_categories(categories, fastpath)
    529     raise TypeError(
    530         f"Parameter 'categories' must be list-like, was {repr(categories)}"
    531     )
    532 if not isinstance(categories, ABCIndex):
--> 533     categories = Index._with_infer(categories, tupleize_cols=False)
    535 if not fastpath:
    536     if categories.hasnans:

File /opt/conda/lib/python3.10/site-packages/pandas/core/indexes/base.py:671, in Index._with_infer(cls, *args, **kwargs)
    665 @classmethod
    666 def _with_infer(cls, *args, **kwargs):
    667     """
    668     Constructor that uses the 1.0.x behavior inferring numeric dtypes
    669     for ndarray[object] inputs.
    670     """
--> 671     result = cls(*args, **kwargs)
    673     if result.dtype == _dtype_obj and not result._is_multi:
    674         # error: Argument 1 to "maybe_convert_objects" has incompatible type
    675         # "Union[ExtensionArray, ndarray[Any, Any]]"; expected
    676         # "ndarray[Any, Any]"
    677         values = lib.maybe_convert_objects(result._values)  # type: ignore[arg-type]

File /opt/conda/lib/python3.10/site-packages/pandas/core/indexes/base.py:562, in Index.__new__(cls, data, dtype, copy, name, tupleize_cols)
    558 arr = ensure_wrapped_if_datetimelike(arr)
    560 klass = cls._dtype_to_subclass(arr.dtype)
--> 562 arr = klass._ensure_array(arr, arr.dtype, copy=False)
    563 return klass._simple_new(arr, name, refs=refs)

File /opt/conda/lib/python3.10/site-packages/pandas/core/indexes/base.py:575, in Index._ensure_array(cls, data, dtype, copy)
    572     raise ValueError("Index data must be 1-dimensional")
    573 elif dtype == np.float16:
    574     # float16 not supported (no indexing engine)
--> 575     raise NotImplementedError("float16 indexes are not supported")
    577 if copy:
    578     # asarray_tuplesafe does not always copy underlying data,
    579     #  so need to make sure that this happens
    580     data = data.copy()

NotImplementedError: float16 indexes are not supported

Expected Behavior

Sort should support the float16 data type too.

Installed Versions

INSTALLED VERSIONS

commit : 0f43794
python : 3.10.12.final.0
python-bits : 64
OS : Linux
OS-release : 5.15.133+
Version : #1 SMP Fri Oct 13 10:14:53 UTC 2023
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : C.UTF-8
LANG : C.UTF-8
LOCALE : en_US.UTF-8

pandas : 2.0.3
numpy : 1.23.5
pytz : 2023.3
dateutil : 2.8.2
setuptools : 68.0.0
pip : 23.1.2
Cython : 0.29.35
pytest : 7.4.1
hypothesis : None
sphinx : None
blosc : None
feather : 0.4.1
xlsxwriter : None
lxml.etree : 4.9.3
html5lib : 1.1
pymysql : None
psycopg2 : None
jinja2 : 3.1.2
IPython : 8.14.0
pandas_datareader: 0.10.0
bs4 : 4.12.2
bottleneck : None
brotli :
fastparquet : None
fsspec : 2023.9.0
gcsfs : 2023.6.0
matplotlib : 3.7.2
numba : 0.57.1
numexpr : 2.8.5
odfpy : None
openpyxl : 3.1.2
pandas_gbq : None
pyarrow : 9.0.0
pyreadstat : None
pyxlsb : None
s3fs : 2023.9.0
scipy : 1.11.2
snappy : None
sqlalchemy : 2.0.17
tables : 3.8.0
tabulate : 0.9.0
xarray : 2023.8.0
xlrd : None
zstandard : 0.19.0
tzdata : 2023.3
qtpy : None
pyqt5 : None

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugNeeds InfoClarification about behavior needed to assess issueNeeds TriageIssue that has not been reviewed by a pandas team member

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions