Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,8 @@ Reshaping
- Bug in :func:`concat` with mixed integer and bool dtypes incorrectly casting the bools to integers (:issue:`45101`)
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
- Bug in :meth:`DataFrame.combine_first` with non-unique columns incorrectly raising (:issue:`29135`)
- Bug in :meth:`DataFrame.combine` with non-unique columns incorrectly raising (:issue:`51340`)
- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
Expand Down
26 changes: 17 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9096,11 +9096,14 @@ def combine(

# preserve column order
new_columns = self.columns.union(other_columns, sort=False)
this = this.reindex(new_columns, axis=1)
other = other.reindex(new_columns, axis=1)

do_fill = fill_value is not None
result = {}
for col in new_columns:
series = this[col]
other_series = other[col]
for i in range(this.shape[1]):
series = this.iloc[:, i]
other_series = other.iloc[:, i]

this_dtype = series.dtype
other_dtype = other_series.dtype
Expand All @@ -9111,7 +9114,7 @@ def combine(
# don't overwrite columns unnecessarily
# DO propagate if this column is not in the intersection
if not overwrite and other_mask.all():
result[col] = this[col].copy()
result[i] = series.copy()
continue

if do_fill:
Expand All @@ -9120,7 +9123,7 @@ def combine(
series[this_mask] = fill_value
other_series[other_mask] = fill_value

if col not in self.columns:
if new_columns[i] not in self.columns:
# If self DataFrame does not have col in other DataFrame,
# try to promote series, which is all NaN, as other_dtype.
new_dtype = other_dtype
Expand All @@ -9145,10 +9148,10 @@ def combine(
arr, new_dtype
)

result[col] = arr
result[i] = arr

# convert_objects just in case
frame_result = self._constructor(result, index=new_index, columns=new_columns)
frame_result = self._constructor(result, index=new_index)
frame_result.columns = new_columns
return frame_result.__finalize__(self, method="combine")

def combine_first(self, other: DataFrame) -> DataFrame:
Expand Down Expand Up @@ -9212,9 +9215,14 @@ def combiner(x: Series, y: Series):
combined = self.combine(other, combiner, overwrite=False)

dtypes = {
# Check for isinstance(..., (np.dtype, ExtensionDtype))
# to prevent raising on non-unique columns see GH#29135.
# Note we will just not-cast in these cases.
col: find_common_type([self.dtypes[col], other.dtypes[col]])
for col in self.columns.intersection(other.columns)
if combined.dtypes[col] != self.dtypes[col]
if isinstance(combined.dtypes[col], (np.dtype, ExtensionDtype))
and isinstance(self.dtypes[col], (np.dtype, ExtensionDtype))
and combined.dtypes[col] != self.dtypes[col]
}

if dtypes:
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/frame/methods/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,19 @@ def test_combine_generic(self, float_frame):
)
tm.assert_frame_equal(chunk, exp)
tm.assert_frame_equal(chunk2, exp)

def test_combine_nonunique_columns(self):
# GH#51340

df = pd.DataFrame({"A": range(5), "B": range(5)})
df.columns = ["A", "A"]

other = df.copy()
df.iloc[1, :] = None

def combiner(a, b):
return b

result = df.combine(other, combiner)
expected = other.astype("float64")
tm.assert_frame_equal(result, expected)
12 changes: 12 additions & 0 deletions pandas/tests/frame/methods/test_combine_first.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,18 @@ def test_combine_first_preserve_EA_precision(self, wide_val, dtype):
expected = DataFrame({"A": [wide_val, 5, wide_val]}, dtype=dtype)
tm.assert_frame_equal(result, expected)

def test_combine_first_non_unique_columns(self):
# GH#29135
df1 = DataFrame([[1, np.nan], [3, 4]], columns=["P", "Q"], index=["A", "B"])
df2 = DataFrame(
[[5, 6, 7], [8, 9, np.nan]], columns=["P", "Q", "Q"], index=["A", "B"]
)
result = df1.combine_first(df2)
expected = DataFrame(
[[1, 6.0, 7.0], [3, 4.0, 4.0]], index=["A", "B"], columns=["P", "Q", "Q"]
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"scalar1, scalar2",
Expand Down
Loading