From 73504693e306f5ef243d3a196b22c2b0cbde2a0d Mon Sep 17 00:00:00 2001 From: divya1974 Date: Tue, 7 Oct 2025 01:39:11 -0400 Subject: [PATCH 1/3] BUG: avoid float upcast when mixing signed/unsigned ints in isin (tests added) --- pandas/core/algorithms.py | 15 +++++++++++---- pandas/tests/series/methods/test_isin.py | 13 +++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index bbca78459ca75..c7961d26b1b4b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -522,11 +522,18 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: if ( len(values) > 0 and values.dtype.kind in "iufcb" - and not is_signed_integer_dtype(comps) - and not is_dtype_equal(values, comps) + # If the dtypes differ and either side is unsigned integer, + # prefer object dtype to avoid unsafe upcast to float64 that + # can lose precision for large 64-bit integers. + and (not is_dtype_equal(values, comps)) + and ( + (not is_signed_integer_dtype(comps)) + or (not is_signed_integer_dtype(values)) + ) ): - # GH#46485 Use object to avoid upcast to float64 later - # TODO: Share with _find_common_type_compat + # GH#46485: Use object to avoid upcast to float64 later + # Ensure symmetric behavior when mixing signed and unsigned + # integer dtypes. values = construct_1d_object_array_from_listlike(orig_values) elif isinstance(values, ABCMultiIndex): diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 4f8484252ba8f..fb5b55ffceced 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -267,3 +267,16 @@ def test_isin_filtering_on_iterable(data, isin): expected_result = Series([True, True, False]) tm.assert_series_equal(result, expected_result) + + +def test_isin_int64_vs_uint64_mismatch(): + # Regression test for mixing signed int64 Series with uint64 values + # Ensure we do not implicitly upcast to float64 and return incorrect True + # related to GH# (user report) + ser = Series([1378774140726870442], dtype=np.int64) + vals = [np.uint64(1378774140726870528)] + + res = ser.isin(vals) + # different values -> should be False + expected = Series([False]) + tm.assert_series_equal(res, expected) From 6b417e78dcd51af778e0bdb1f8d7ff83f2c1a626 Mon Sep 17 00:00:00 2001 From: divya1974 Date: Wed, 8 Oct 2025 13:01:30 -0400 Subject: [PATCH 2/3] performance improved approach --- pandas/core/algorithms.py | 44 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c7961d26b1b4b..aeabb0f1b8221 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -57,7 +57,6 @@ is_integer_dtype, is_list_like, is_object_dtype, - is_signed_integer_dtype, needs_i8_conversion, ) from pandas.core.dtypes.concat import concat_compat @@ -518,23 +517,10 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)): orig_values = list(values) values = _ensure_arraylike(orig_values, func_name="isin-targets") - - if ( - len(values) > 0 - and values.dtype.kind in "iufcb" - # If the dtypes differ and either side is unsigned integer, - # prefer object dtype to avoid unsafe upcast to float64 that - # can lose precision for large 64-bit integers. - and (not is_dtype_equal(values, comps)) - and ( - (not is_signed_integer_dtype(comps)) - or (not is_signed_integer_dtype(values)) - ) - ): - # GH#46485: Use object to avoid upcast to float64 later - # Ensure symmetric behavior when mixing signed and unsigned - # integer dtypes. - values = construct_1d_object_array_from_listlike(orig_values) + # Keep values as a numeric ndarray where possible; we handle + # signed/unsigned integer mixes with a fast-path later (after + # comps_array extraction) to avoid object-dtype conversions that + # harm performance for large numeric arrays. elif isinstance(values, ABCMultiIndex): # Avoid raising in extract_array @@ -586,6 +572,28 @@ def f(c, v): f = lambda a, b: np.isin(a, b).ravel() else: + # Fast-path: handle integer-kind mixes without upcasting to float64. + if ( + values.dtype.kind in "iu" + and comps_array.dtype.kind in "iu" + and not is_dtype_equal(values.dtype, comps_array.dtype) + ): + try: + if values.size > 0 and comps_array.size > 0: + signed_negative = False + if values.dtype.kind == "i": + signed_negative = values.min() < 0 + if comps_array.dtype.kind == "i": + signed_negative = signed_negative or (comps_array.min() < 0) + + if not signed_negative: + values_u = values.astype("uint64", copy=False) + comps_u = comps_array.astype("uint64", copy=False) + return htable.ismember(comps_u, values_u) + except Exception: + # fall back to generic path on error + pass + common = np_find_common_type(values.dtype, comps_array.dtype) values = values.astype(common, copy=False) comps_array = comps_array.astype(common, copy=False) From b65a9f2e668d41ec5147db8572b9e01cb9202929 Mon Sep 17 00:00:00 2001 From: divya1974 Date: Wed, 8 Oct 2025 18:07:58 -0400 Subject: [PATCH 3/3] fix tests --- pandas/core/algorithms.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index aeabb0f1b8221..1e2eaf6be17bc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -576,6 +576,12 @@ def f(c, v): if ( values.dtype.kind in "iu" and comps_array.dtype.kind in "iu" + # Only apply fast-path for 64-bit integer widths to avoid + # surprising behaviour on platforms or dtypes with different + # itemsize (see GH discussions). Narrowing to 8-byte ints + # keeps the fast-path safe and performant for the common case. + and values.dtype.itemsize == 8 + and comps_array.dtype.itemsize == 8 and not is_dtype_equal(values.dtype, comps_array.dtype) ): try: