Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 32 additions & 11 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
is_integer_dtype,
is_list_like,
is_object_dtype,
is_signed_integer_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.concat import concat_compat
Expand Down Expand Up @@ -518,16 +517,10 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
orig_values = list(values)
values = _ensure_arraylike(orig_values, func_name="isin-targets")

if (
len(values) > 0
and values.dtype.kind in "iufcb"
and not is_signed_integer_dtype(comps)
and not is_dtype_equal(values, comps)
):
# GH#46485 Use object to avoid upcast to float64 later
# TODO: Share with _find_common_type_compat
values = construct_1d_object_array_from_listlike(orig_values)
# Keep values as a numeric ndarray where possible; we handle
# signed/unsigned integer mixes with a fast-path later (after
# comps_array extraction) to avoid object-dtype conversions that
# harm performance for large numeric arrays.

elif isinstance(values, ABCMultiIndex):
# Avoid raising in extract_array
Expand Down Expand Up @@ -579,6 +572,34 @@ def f(c, v):
f = lambda a, b: np.isin(a, b).ravel()

else:
# Fast-path: handle integer-kind mixes without upcasting to float64.
if (
values.dtype.kind in "iu"
and comps_array.dtype.kind in "iu"
# Only apply fast-path for 64-bit integer widths to avoid
# surprising behaviour on platforms or dtypes with different
# itemsize (see GH discussions). Narrowing to 8-byte ints
# keeps the fast-path safe and performant for the common case.
and values.dtype.itemsize == 8
and comps_array.dtype.itemsize == 8
and not is_dtype_equal(values.dtype, comps_array.dtype)
):
try:
if values.size > 0 and comps_array.size > 0:
signed_negative = False
if values.dtype.kind == "i":
signed_negative = values.min() < 0
if comps_array.dtype.kind == "i":
signed_negative = signed_negative or (comps_array.min() < 0)

if not signed_negative:
values_u = values.astype("uint64", copy=False)
comps_u = comps_array.astype("uint64", copy=False)
return htable.ismember(comps_u, values_u)
except Exception:
# fall back to generic path on error
pass

common = np_find_common_type(values.dtype, comps_array.dtype)
values = values.astype(common, copy=False)
comps_array = comps_array.astype(common, copy=False)
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/series/methods/test_isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,16 @@ def test_isin_filtering_on_iterable(data, isin):
expected_result = Series([True, True, False])

tm.assert_series_equal(result, expected_result)


def test_isin_int64_vs_uint64_mismatch():
# Regression test for mixing signed int64 Series with uint64 values
# Ensure we do not implicitly upcast to float64 and return incorrect True
# related to GH# (user report)
ser = Series([1378774140726870442], dtype=np.int64)
vals = [np.uint64(1378774140726870528)]

res = ser.isin(vals)
# different values -> should be False
expected = Series([False])
tm.assert_series_equal(res, expected)
Loading