-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
Example
>>> merged_df.dtypes
row_id int64
series_id object
step int32
event object
score float16
timestamp datetime64[ns, UTC-04:00]
yyyymmdd object
dtype: objectIssue Description
>>> merged_df.sort_values(by=['score', 'yyyymmdd'])
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
Cell In[17], line 1
----> 1 merged_df.sort_values(by=['score', 'yyyymmdd'])
File /opt/conda/lib/python3.10/site-packages/pandas/core/frame.py:6751, in DataFrame.sort_values(self, by, axis, ascending, inplace, kind, na_position, ignore_index, key)
6743 if key is not None:
6744 # error: List comprehension has incompatible type List[Series];
6745 # expected List[ndarray]
6746 keys = [
6747 Series(k, name=name) # type: ignore[misc]
6748 for (k, name) in zip(keys, by)
6749 ]
-> 6751 indexer = lexsort_indexer(
6752 keys, orders=ascending, na_position=na_position, key=key
6753 )
6754 elif len(by):
6755 # len(by) == 1
6757 by = by[0]
File /opt/conda/lib/python3.10/site-packages/pandas/core/sorting.py:341, in lexsort_indexer(keys, orders, na_position, key)
338 keys = [ensure_key_mapped(k, key) for k in keys]
340 for k, order in zip(keys, orders):
--> 341 cat = Categorical(k, ordered=True)
343 if na_position not in ["last", "first"]:
344 raise ValueError(f"invalid na_position: {na_position}")
File /opt/conda/lib/python3.10/site-packages/pandas/core/arrays/categorical.py:438, in Categorical.__init__(self, values, categories, ordered, dtype, fastpath, copy)
431 raise TypeError(
432 "'values' is not ordered, please "
433 "explicitly specify the categories order "
434 "by passing in a categories argument."
435 ) from err
437 # we're inferring from values
--> 438 dtype = CategoricalDtype(categories, dtype.ordered)
440 elif is_categorical_dtype(values.dtype):
441 old_codes = extract_array(values)._codes
File /opt/conda/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:187, in CategoricalDtype.__init__(self, categories, ordered)
186 def __init__(self, categories=None, ordered: Ordered = False) -> None:
--> 187 self._finalize(categories, ordered, fastpath=False)
File /opt/conda/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:344, in CategoricalDtype._finalize(self, categories, ordered, fastpath)
341 self.validate_ordered(ordered)
343 if categories is not None:
--> 344 categories = self.validate_categories(categories, fastpath=fastpath)
346 self._categories = categories
347 self._ordered = ordered
File /opt/conda/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:533, in CategoricalDtype.validate_categories(categories, fastpath)
529 raise TypeError(
530 f"Parameter 'categories' must be list-like, was {repr(categories)}"
531 )
532 if not isinstance(categories, ABCIndex):
--> 533 categories = Index._with_infer(categories, tupleize_cols=False)
535 if not fastpath:
536 if categories.hasnans:
File /opt/conda/lib/python3.10/site-packages/pandas/core/indexes/base.py:671, in Index._with_infer(cls, *args, **kwargs)
665 @classmethod
666 def _with_infer(cls, *args, **kwargs):
667 """
668 Constructor that uses the 1.0.x behavior inferring numeric dtypes
669 for ndarray[object] inputs.
670 """
--> 671 result = cls(*args, **kwargs)
673 if result.dtype == _dtype_obj and not result._is_multi:
674 # error: Argument 1 to "maybe_convert_objects" has incompatible type
675 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected
676 # "ndarray[Any, Any]"
677 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]
File /opt/conda/lib/python3.10/site-packages/pandas/core/indexes/base.py:562, in Index.__new__(cls, data, dtype, copy, name, tupleize_cols)
558 arr = ensure_wrapped_if_datetimelike(arr)
560 klass = cls._dtype_to_subclass(arr.dtype)
--> 562 arr = klass._ensure_array(arr, arr.dtype, copy=False)
563 return klass._simple_new(arr, name, refs=refs)
File /opt/conda/lib/python3.10/site-packages/pandas/core/indexes/base.py:575, in Index._ensure_array(cls, data, dtype, copy)
572 raise ValueError("Index data must be 1-dimensional")
573 elif dtype == np.float16:
574 # float16 not supported (no indexing engine)
--> 575 raise NotImplementedError("float16 indexes are not supported")
577 if copy:
578 # asarray_tuplesafe does not always copy underlying data,
579 # so need to make sure that this happens
580 data = data.copy()
NotImplementedError: float16 indexes are not supportedExpected Behavior
Sort should support the float16 data type too.
Installed Versions
INSTALLED VERSIONS
commit : 0f43794
python : 3.10.12.final.0
python-bits : 64
OS : Linux
OS-release : 5.15.133+
Version : #1 SMP Fri Oct 13 10:14:53 UTC 2023
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : C.UTF-8
LANG : C.UTF-8
LOCALE : en_US.UTF-8
pandas : 2.0.3
numpy : 1.23.5
pytz : 2023.3
dateutil : 2.8.2
setuptools : 68.0.0
pip : 23.1.2
Cython : 0.29.35
pytest : 7.4.1
hypothesis : None
sphinx : None
blosc : None
feather : 0.4.1
xlsxwriter : None
lxml.etree : 4.9.3
html5lib : 1.1
pymysql : None
psycopg2 : None
jinja2 : 3.1.2
IPython : 8.14.0
pandas_datareader: 0.10.0
bs4 : 4.12.2
bottleneck : None
brotli :
fastparquet : None
fsspec : 2023.9.0
gcsfs : 2023.6.0
matplotlib : 3.7.2
numba : 0.57.1
numexpr : 2.8.5
odfpy : None
openpyxl : 3.1.2
pandas_gbq : None
pyarrow : 9.0.0
pyreadstat : None
pyxlsb : None
s3fs : 2023.9.0
scipy : 1.11.2
snappy : None
sqlalchemy : 2.0.17
tables : 3.8.0
tabulate : 0.9.0
xarray : 2023.8.0
xlrd : None
zstandard : 0.19.0
tzdata : 2023.3
qtpy : None
pyqt5 : None