Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Make Series.searchsorted return a scalar, when supplied a scalar #23801

Merged
merged 4 commits into from
Dec 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,7 @@ Other API Changes
has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`).
- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,8 +1350,14 @@ def factorize(self, sort=False, na_sentinel=-1):

Returns
-------
indices : array of ints
Array of insertion points with the same shape as `value`.
int or array of int
A scalar or array of insertion points with the
same shape as `value`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@datapythonista is this the correct format?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks good, but can you use int or array of int (I'd like to parse at some point the types from these, so I prefer the type name int over ints)


.. versionchanged :: 0.24.0
If `value` is a scalar, an int is now always returned.
Previously, scalar inputs returned an 1-item array for
:class:`Series` and :class:`Categorical`.

See Also
--------
Expand All @@ -1372,7 +1378,7 @@ def factorize(self, sort=False, na_sentinel=-1):
dtype: int64

>>> x.searchsorted(4)
array([3])
3

>>> x.searchsorted([0, 4])
array([0, 3])
Expand All @@ -1389,7 +1395,7 @@ def factorize(self, sort=False, na_sentinel=-1):
Categories (4, object): [apple < bread < cheese < milk]

>>> x.searchsorted('bread')
array([1]) # Note: an array, not a scalar
1

>>> x.searchsorted(['bread'], side='right')
array([3])
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2215,8 +2215,10 @@ def __rmatmul__(self, other):
def searchsorted(self, value, side='left', sorter=None):
if sorter is not None:
sorter = ensure_platform_int(sorter)
return self._values.searchsorted(Series(value)._values,
side=side, sorter=sorter)
result = self._values.searchsorted(Series(value)._values,
side=side, sorter=sorter)

return result[0] if is_scalar(value) else result

# -------------------------------------------------------------------
# Combination
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pandas.compat import PYPY

from pandas import Categorical, Index, Series
from pandas.api.types import is_scalar
import pandas.util.testing as tm


Expand Down Expand Up @@ -86,9 +87,11 @@ def test_searchsorted(self):
# Searching for single item argument, side='left' (default)
res_cat = c1.searchsorted('apple')
assert res_cat == 2
assert is_scalar(res_cat)

res_ser = s1.searchsorted('apple')
assert res_ser == 2
assert is_scalar(res_ser)

# Searching for single item array, side='left' (default)
res_cat = c1.searchsorted(['bread'])
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexes/multi/test_monotonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pandas as pd
from pandas import Index, IntervalIndex, MultiIndex
from pandas.api.types import is_scalar


def test_is_monotonic_increasing():
Expand Down Expand Up @@ -182,22 +183,28 @@ def test_searchsorted_monotonic(indices):
# test searchsorted only for increasing
if indices.is_monotonic_increasing:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls don't import is_scalar from testing, import it from the canonical location

pandas.api.types.is_scalar.

ssm_left = indices._searchsorted_monotonic(value, side='left')
assert is_scalar(ssm_left)
assert expected_left == ssm_left

ssm_right = indices._searchsorted_monotonic(value, side='right')
assert is_scalar(ssm_right)
assert expected_right == ssm_right

ss_left = indices.searchsorted(value, side='left')
assert is_scalar(ss_left)
assert expected_left == ss_left

ss_right = indices.searchsorted(value, side='right')
assert is_scalar(ss_right)
assert expected_right == ss_right

elif indices.is_monotonic_decreasing:
ssm_left = indices._searchsorted_monotonic(value, side='left')
assert is_scalar(ssm_left)
assert expected_left == ssm_left

ssm_right = indices._searchsorted_monotonic(value, side='right')
assert is_scalar(ssm_right)
assert expected_right == ssm_right

else:
Expand Down
19 changes: 11 additions & 8 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pandas import (
Categorical, CategoricalIndex, DataFrame, Series, bdate_range, compat,
date_range, isna, notna)
from pandas.api.types import is_scalar
from pandas.core.index import MultiIndex
from pandas.core.indexes.datetimes import Timestamp
from pandas.core.indexes.timedeltas import Timedelta
Expand Down Expand Up @@ -1364,17 +1365,19 @@ def test_numpy_repeat(self):
def test_searchsorted(self):
s = Series([1, 2, 3])

idx = s.searchsorted(1, side='left')
tm.assert_numpy_array_equal(idx, np.array([0], dtype=np.intp))
result = s.searchsorted(1, side='left')
assert is_scalar(result)
assert result == 0

idx = s.searchsorted(1, side='right')
tm.assert_numpy_array_equal(idx, np.array([1], dtype=np.intp))
result = s.searchsorted(1, side='right')
assert is_scalar(result)
assert result == 1

def test_searchsorted_numeric_dtypes_scalar(self):
s = Series([1, 2, 90, 1000, 3e9])
r = s.searchsorted(30)
e = 2
assert r == e
assert is_scalar(r)
assert r == 2

r = s.searchsorted([30])
e = np.array([2], dtype=np.intp)
Expand All @@ -1390,8 +1393,8 @@ def test_search_sorted_datetime64_scalar(self):
s = Series(pd.date_range('20120101', periods=10, freq='2D'))
v = pd.Timestamp('20120102')
r = s.searchsorted(v)
e = 1
assert r == e
assert is_scalar(r)
assert r == 1

def test_search_sorted_datetime64_list(self):
s = Series(pd.date_range('20120101', periods=10, freq='2D'))
Expand Down