Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: add custom Exception for safe_sort #25569

Closed
wants to merge 12 commits into from
16 changes: 11 additions & 5 deletions pandas/core/algorithms.py
Expand Up @@ -616,7 +616,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
na_value=na_value)

if sort and len(uniques) > 0:
from pandas.core.sorting import safe_sort
from pandas.core.sorting import safe_sort, SortError
if na_sentinel == -1:
# GH-25409 take_1d only works for na_sentinels of -1
try:
Expand All @@ -626,13 +626,19 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
uniques = uniques.take(order)
except TypeError:
# Mixed types, where uniques.argsort fails.
try:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
except SortError as e:
raise TypeError(e) from e
else:
try:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
else:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
except SortError as e:
raise TypeError(e) from e

uniques = _reconstruct_data(uniques, dtype, original)

Expand Down
12 changes: 8 additions & 4 deletions pandas/core/indexes/base.py
Expand Up @@ -43,6 +43,7 @@
import pandas.core.missing as missing
from pandas.core.ops import get_op_result_name, make_invalid_op
import pandas.core.sorting as sorting
from pandas.core.sorting import SortError
from pandas.core.strings import StringMethods

from pandas.io.formats.printing import (
Expand Down Expand Up @@ -2345,7 +2346,7 @@ def union(self, other, sort=None):
if sort is None:
try:
result = sorting.safe_sort(result)
except TypeError as e:
except SortError as e:
warnings.warn("{}, sort order is undefined for "
"incomparable objects".format(e),
RuntimeWarning, stacklevel=3)
Expand Down Expand Up @@ -2432,7 +2433,10 @@ def intersection(self, other, sort=False):
taken = other.take(indexer)

if sort is None:
taken = sorting.safe_sort(taken.values)
try:
taken = sorting.safe_sort(taken.values)
except sorting.SortError as e:
raise TypeError(e) from e
if self.name != other.name:
name = None
else:
Expand Down Expand Up @@ -2504,7 +2508,7 @@ def difference(self, other, sort=None):
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
except SortError:
pass

return this._shallow_copy(the_diff, name=result_name, freq=None)
Expand Down Expand Up @@ -2580,7 +2584,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
except SortError:
pass

attribs = self._get_attributes_dict()
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/reshape/merge.py
Expand Up @@ -1738,7 +1738,10 @@ def _sort_labels(uniques, left, right):
llength = len(left)
labels = np.concatenate([left, right])

_, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
try:
_, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
except sorting.SortError as e:
raise TypeError(e) from e
new_labels = ensure_int64(new_labels)
new_left, new_right = new_labels[:llength], new_labels[llength:]

Expand Down
19 changes: 15 additions & 4 deletions pandas/core/sorting.py
Expand Up @@ -17,6 +17,13 @@
_INT64_MAX = np.iinfo(np.int64).max


class SortError(TypeError):
"""
Error raised when problems arise during sorting due to problems
with input data. Subclass of `TypeError`.
"""


def get_group_index(labels, shape, sort, xnull):
"""
For the particular label_list, gets the offsets into the hypothetical list
Expand Down Expand Up @@ -437,8 +444,9 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
------
TypeError
* If ``values`` is not list-like or if ``labels`` is neither None
nor list-like
* If ``values`` cannot be sorted
nor list-like.
SortError
* If ``values`` cannot be sorted.
ValueError
* If ``labels`` is not None and ``values`` contain duplicates.
"""
Expand All @@ -456,8 +464,11 @@ def sort_mixed(values):
# order ints before strings, safe in py3
str_pos = np.array([isinstance(x, string_types) for x in values],
dtype=bool)
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
try:
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
except TypeError as e:
raise SortError(e) from e
return np.concatenate([nums, np.asarray(strs, dtype=object)])

sorter = None
Expand Down
7 changes: 3 additions & 4 deletions pandas/tests/test_algos.py
Expand Up @@ -24,6 +24,7 @@
import pandas.core.algorithms as algos
from pandas.core.arrays import DatetimeArray
import pandas.core.common as com
from pandas.core.sorting import SortError
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal

Expand Down Expand Up @@ -228,11 +229,9 @@ def test_complex_sorting(self):
# gh 12666 - check no segfault
x17 = np.array([complex(i) for i in range(17)], dtype=object)

msg = ("unorderable types: .* [<>] .*"
"|" # the above case happens for numpy < 1.14
"'[<>]' not supported between instances of .*")
with pytest.raises(TypeError, match=msg):
with pytest.raises(TypeError, match="complex") as excinfo:
algos.factorize(x17[::-1], sort=True)
assert type(excinfo.value.__cause__) == SortError

def test_float64_factorize(self, writable):
data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64)
Expand Down
8 changes: 3 additions & 5 deletions pandas/tests/test_sorting.py
Expand Up @@ -10,7 +10,7 @@
DataFrame, MultiIndex, Series, compat, concat, merge, to_datetime)
from pandas.core import common as com
from pandas.core.sorting import (
decons_group_index, get_group_index, is_int64_overflow_possible,
SortError, decons_group_index, get_group_index, is_int64_overflow_possible,
lexsort_indexer, nargsort, safe_sort)
from pandas.util import testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
Expand Down Expand Up @@ -413,10 +413,8 @@ def test_mixed_integer_from_list(self):
def test_unsortable(self):
# GH 13714
arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
msg = ("unorderable types: .* [<>] .*"
"|" # the above case happens for numpy < 1.14
"'[<>]' not supported between instances of .*")
with pytest.raises(TypeError, match=msg):
msg = "int.*datetime|datetime.*int"
with pytest.raises(SortError, match=msg):
safe_sort(arr)

def test_exceptions(self):
Expand Down