diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..df566d2621af9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -992,6 +992,7 @@ Numeric ^^^^^^^ - Bug in :func:`api.types.infer_dtype` returning "mixed" for complex and ``pd.NA`` mix (:issue:`61976`) - Bug in :func:`api.types.infer_dtype` returning "mixed-integer-float" for float and ``pd.NA`` mix (:issue:`61621`) +- Bug in :meth:`DataFrame.combine_first` where Int64 and UInt64 integers with absolute value greater than ``2**53`` would lose precision after the operation. (:issue:`60128`) - Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`) - Bug in :meth:`DataFrame.cov` raises a ``TypeError`` instead of returning potentially incorrect results or other errors (:issue:`53115`) - Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c41b82bbbc8e..dee66a80111f8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9141,20 +9141,10 @@ def combine_first(self, other: DataFrame) -> DataFrame: 1 0.0 3.0 1.0 2 NaN 3.0 1.0 """ - from pandas.core.computation import expressions def combiner(x: Series, y: Series): - mask = x.isna()._values - - x_values = x._values - y_values = y._values - - # If the column y in other DataFrame is not in first DataFrame, - # just return y_values. - if y.name not in self.columns: - return y_values - - return expressions.where(mask, y_values, x_values) + # GH#60128 The combiner is supposed to preserve EA Dtypes. + return y if y.name not in self.columns else y.where(x.isna(), x) if len(other) == 0: combined = self.reindex( diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 1e594043510ea..e93684b4dc90f 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -398,6 +398,21 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype): ).set_index(["a", "b"]) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "wide_val, dtype", + ( + (1666880195890293744, "UInt64"), + (-1666880195890293744, "Int64"), + ), + ) + def test_combine_first_preserve_EA_precision(self, wide_val, dtype): + # GH#60128 + df1 = DataFrame({"A": [wide_val, 5]}, dtype=dtype) + df2 = DataFrame({"A": [6, 7, wide_val]}, dtype=dtype) + result = df1.combine_first(df2) + expected = DataFrame({"A": [wide_val, 5, wide_val]}, dtype=dtype) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "scalar1, scalar2",