From 1f7518b231184066439ced1f5408ef831511659b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 22 Jun 2023 12:59:51 +0200 Subject: [PATCH 1/2] BUG: combine_first ignoring others columns if other is empty --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/frame.py | 8 +++++++- pandas/tests/frame/methods/test_combine_first.py | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 511e5793608bc..2436d91690ed3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -491,6 +491,7 @@ Reshaping - Bug in :func:`merge_asof` raising ``KeyError`` for extension dtypes (:issue:`52904`) - Bug in :func:`merge_asof` raising ``ValueError`` for data backed by read-only ndarrays (:issue:`53513`) - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`) +- Bug in :meth:`DataFrame.combine_first` ignoring other's columns if ``other`` is empty (:issue:`53792`) - Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`) - Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`) - Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 352d353ef4dba..44808ce2c66fd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8344,7 +8344,13 @@ def combiner(x, y): return expressions.where(mask, y_values, x_values) - combined = self.combine(other, combiner, overwrite=False) + if other.empty: + combined = self.reindex( + self.columns.append(other.columns.difference(self.columns)), axis=1 + ) + combined = combined.astype(other.dtypes) + else: + combined = self.combine(other, combiner, overwrite=False) dtypes = { col: find_common_type([self.dtypes[col], other.dtypes[col]]) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 9b9c3f88277e8..156e50d50a9ef 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -538,3 +538,11 @@ def test_midx_losing_dtype(): ) expected = DataFrame({"a": [np.nan, 4, 3, 3]}, index=expected_midx) tm.assert_frame_equal(result, expected) + + +def test_combine_first_empty_columns(): + left = DataFrame(columns=["a", "b"]) + right = DataFrame(columns=["a", "c"]) + result = left.combine_first(right) + expected = DataFrame(columns=["a", "b", "c"]) + tm.assert_frame_equal(result, expected) From d176ed46d179a5923b14bc942aeebc061c317fac Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 22 Jun 2023 14:14:27 +0200 Subject: [PATCH 2/2] Fix --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 44808ce2c66fd..cd554afc54e71 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8344,7 +8344,7 @@ def combiner(x, y): return expressions.where(mask, y_values, x_values) - if other.empty: + if len(other) == 0: combined = self.reindex( self.columns.append(other.columns.difference(self.columns)), axis=1 )