diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index bbca78459ca75..1e2eaf6be17bc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -57,7 +57,6 @@ is_integer_dtype, is_list_like, is_object_dtype, - is_signed_integer_dtype, needs_i8_conversion, ) from pandas.core.dtypes.concat import concat_compat @@ -518,16 +517,10 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)): orig_values = list(values) values = _ensure_arraylike(orig_values, func_name="isin-targets") - - if ( - len(values) > 0 - and values.dtype.kind in "iufcb" - and not is_signed_integer_dtype(comps) - and not is_dtype_equal(values, comps) - ): - # GH#46485 Use object to avoid upcast to float64 later - # TODO: Share with _find_common_type_compat - values = construct_1d_object_array_from_listlike(orig_values) + # Keep values as a numeric ndarray where possible; we handle + # signed/unsigned integer mixes with a fast-path later (after + # comps_array extraction) to avoid object-dtype conversions that + # harm performance for large numeric arrays. elif isinstance(values, ABCMultiIndex): # Avoid raising in extract_array @@ -579,6 +572,34 @@ def f(c, v): f = lambda a, b: np.isin(a, b).ravel() else: + # Fast-path: handle integer-kind mixes without upcasting to float64. + if ( + values.dtype.kind in "iu" + and comps_array.dtype.kind in "iu" + # Only apply fast-path for 64-bit integer widths to avoid + # surprising behaviour on platforms or dtypes with different + # itemsize (see GH discussions). Narrowing to 8-byte ints + # keeps the fast-path safe and performant for the common case. + and values.dtype.itemsize == 8 + and comps_array.dtype.itemsize == 8 + and not is_dtype_equal(values.dtype, comps_array.dtype) + ): + try: + if values.size > 0 and comps_array.size > 0: + signed_negative = False + if values.dtype.kind == "i": + signed_negative = values.min() < 0 + if comps_array.dtype.kind == "i": + signed_negative = signed_negative or (comps_array.min() < 0) + + if not signed_negative: + values_u = values.astype("uint64", copy=False) + comps_u = comps_array.astype("uint64", copy=False) + return htable.ismember(comps_u, values_u) + except Exception: + # fall back to generic path on error + pass + common = np_find_common_type(values.dtype, comps_array.dtype) values = values.astype(common, copy=False) comps_array = comps_array.astype(common, copy=False) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 4f8484252ba8f..fb5b55ffceced 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -267,3 +267,16 @@ def test_isin_filtering_on_iterable(data, isin): expected_result = Series([True, True, False]) tm.assert_series_equal(result, expected_result) + + +def test_isin_int64_vs_uint64_mismatch(): + # Regression test for mixing signed int64 Series with uint64 values + # Ensure we do not implicitly upcast to float64 and return incorrect True + # related to GH# (user report) + ser = Series([1378774140726870442], dtype=np.int64) + vals = [np.uint64(1378774140726870528)] + + res = ser.isin(vals) + # different values -> should be False + expected = Series([False]) + tm.assert_series_equal(res, expected)