Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,7 @@ Numeric
^^^^^^^
- Bug in :func:`api.types.infer_dtype` returning "mixed" for complex and ``pd.NA`` mix (:issue:`61976`)
- Bug in :func:`api.types.infer_dtype` returning "mixed-integer-float" for float and ``pd.NA`` mix (:issue:`61621`)
- Bug in :meth:`DataFrame.combine_first` where Int64 and UInt64 integers with absolute value greater than ``2**53`` would lose precision after the operation. (:issue:`60128`)
- Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`)
- Bug in :meth:`DataFrame.cov` raises a ``TypeError`` instead of returning potentially incorrect results or other errors (:issue:`53115`)
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
Expand Down
14 changes: 2 additions & 12 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9141,20 +9141,10 @@ def combine_first(self, other: DataFrame) -> DataFrame:
1 0.0 3.0 1.0
2 NaN 3.0 1.0
"""
from pandas.core.computation import expressions

def combiner(x: Series, y: Series):
mask = x.isna()._values

x_values = x._values
y_values = y._values

# If the column y in other DataFrame is not in first DataFrame,
# just return y_values.
if y.name not in self.columns:
return y_values

return expressions.where(mask, y_values, x_values)
# GH#60128 The combiner is supposed to preserve EA Dtypes.
return y if y.name not in self.columns else y.where(x.isna(), x)

if len(other) == 0:
combined = self.reindex(
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/frame/methods/test_combine_first.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,21 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
).set_index(["a", "b"])
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"wide_val, dtype",
(
(1666880195890293744, "UInt64"),
(-1666880195890293744, "Int64"),
),
)
def test_combine_first_preserve_EA_precision(self, wide_val, dtype):
# GH#60128
df1 = DataFrame({"A": [wide_val, 5]}, dtype=dtype)
df2 = DataFrame({"A": [6, 7, wide_val]}, dtype=dtype)
result = df1.combine_first(df2)
expected = DataFrame({"A": [wide_val, 5, wide_val]}, dtype=dtype)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"scalar1, scalar2",
Expand Down
Loading