diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 12f522301e121..39e5f4af57ec1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1190,6 +1190,7 @@ Reshaping - Bug in :meth:`DataFrame.combine` with non-unique columns incorrectly raising (:issue:`51340`) - Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`) - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) +- Bug in :meth:`DataFrame.join` not producing the correct row order when joining with a list of Series/DataFrames (:issue:`62954`) - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) - Bug in :meth:`Series.combine_first` incorrectly replacing ``None`` entries with ``NaN`` (:issue:`58977`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5f62b5c07a5cf..c9c588059f788 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11393,12 +11393,18 @@ def join( # join indexes only using concat if can_concat: - if how == "left": + if how == "left" or how == "right": res = concat( frames, axis=1, join="outer", verify_integrity=True, sort=sort ) - return res.reindex(self.index) + index = self.index if how == "left" else frames[-1].index + if sort: + index = index.sort_values() + result = res.reindex(index) + return result else: + if how == "outer": + sort = True return concat( frames, axis=1, join=how, verify_integrity=True, sort=sort ) @@ -11409,6 +11415,7 @@ def join( joined = merge( joined, frame, + sort=sort, how=how, left_index=True, right_index=True, diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index aaa9485cab580..b2579bbcc0e44 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -395,29 +395,6 @@ def test_join_list_series(float_frame): tm.assert_frame_equal(result, float_frame) -def test_suppress_future_warning_with_sort_kw(sort): - sort_kw = sort - a = DataFrame({"col1": [1, 2]}, index=["c", "a"]) - - b = DataFrame({"col2": [4, 5]}, index=["b", "a"]) - - c = DataFrame({"col3": [7, 8]}, index=["a", "b"]) - - expected = DataFrame( - { - "col1": {"a": 2.0, "b": float("nan"), "c": 1.0}, - "col2": {"a": 5.0, "b": 4.0, "c": float("nan")}, - "col3": {"a": 7.0, "b": 8.0, "c": float("nan")}, - } - ) - if sort_kw is False: - expected = expected.reindex(index=["c", "a", "b"]) - - with tm.assert_produces_warning(None): - result = a.join([b, c], how="outer", sort=sort_kw) - tm.assert_frame_equal(result, expected) - - class TestDataFrameJoin: def test_join(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 65bfea0b9beea..31df52645f3f9 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -671,6 +671,49 @@ def _check_diff_index(df_list, result, exp_index): with pytest.raises(ValueError, match=msg): df_list[0].join(df_list[1:], on="a") + @pytest.mark.parametrize("how", ["left", "right", "inner", "outer"]) + def test_join_many_sort_unique(self, how, sort): + # https://github.com/pandas-dev/pandas/pull/62954 + df = DataFrame({"a": [1, 2, 3]}, index=[1, 0, 2]) + df2 = DataFrame({"b": [4, 5, 6]}, index=[2, 0, 1]) + if how == "right": + expected = DataFrame({"a": [3, 2, 1], "b": [4, 5, 6]}, index=[2, 0, 1]) + else: + expected = DataFrame({"a": [1, 2, 3], "b": [6, 5, 4]}, index=[1, 0, 2]) + if how == "outer" or sort: + # outer always sorts. + expected = expected.sort_index() + result = df.join([df2], how=how, sort=sort) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("how", ["left", "right", "inner", "outer"]) + def test_join_many_sort_nonunique(self, how, sort): + # https://github.com/pandas-dev/pandas/pull/62954 + df = DataFrame({"a": [1, 2, 3]}, index=[3, 0, 0]) + df2 = DataFrame({"b": [4, 5, 6]}, index=[2, 0, 1]) + if how == "inner": + expected = DataFrame({"a": [2, 3], "b": [5, 5]}, index=[0, 0]) + elif how == "left": + expected = DataFrame( + {"a": [1, 2, 3], "b": [np.nan, 5.0, 5.0]}, index=[3, 0, 0] + ) + elif how == "right": + expected = DataFrame( + {"a": [np.nan, 2.0, 3.0, np.nan], "b": [4, 5, 5, 6]}, index=[2, 0, 0, 1] + ) + else: + expected = DataFrame( + { + "a": [2.0, 3.0, np.nan, np.nan, 1.0], + "b": [5.0, 5.0, 6.0, 4.0, np.nan], + }, + index=[0, 0, 1, 2, 3], + ) + if sort: + expected = expected.sort_index() + result = df.join([df2], how=how, sort=sort) + tm.assert_frame_equal(result, expected) + def test_join_many_mixed(self): df = DataFrame( np.random.default_rng(2).standard_normal((8, 4)),