From 9b2c476ce759083d2246707163b6f050b6c43f4a Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 19 Oct 2025 08:37:53 -0400 Subject: [PATCH 1/5] BUG: Do not ignore sort in concat for DatetimeIndex --- doc/source/whatsnew/v3.0.0.rst | 39 +++++++++++++++++++ pandas/core/indexes/api.py | 1 - pandas/tests/reshape/concat/test_datetimes.py | 12 +++--- 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 470129d6d860b..0bf85f3e6222c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -370,6 +370,45 @@ In cases with mixed-resolution inputs, the highest resolution is used: In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype Out[2]: dtype(' Index: raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") if num_dtis == len(indexes): - sort = True result = indexes[0] elif num_dtis > 1: diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 0cf3192ea3a74..9a8789926a37e 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -73,23 +73,23 @@ def test_concat_datetime_timezone(self): exp_idx = DatetimeIndex( [ - "2010-12-31 15:00:00+00:00", - "2010-12-31 16:00:00+00:00", - "2010-12-31 17:00:00+00:00", "2010-12-31 23:00:00+00:00", "2011-01-01 00:00:00+00:00", "2011-01-01 01:00:00+00:00", + "2010-12-31 15:00:00+00:00", + "2010-12-31 16:00:00+00:00", + "2010-12-31 17:00:00+00:00", ] ).as_unit("ns") expected = DataFrame( [ - [np.nan, 1], - [np.nan, 2], - [np.nan, 3], [1, np.nan], [2, np.nan], [3, np.nan], + [np.nan, 1], + [np.nan, 2], + [np.nan, 3], ], index=exp_idx, columns=["a", "b"], From 32128ebeaf2df6d1c7cd611fb7c6d927ee32754b Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 19 Oct 2025 14:04:00 -0400 Subject: [PATCH 2/5] Deprecate with union_indexes --- pandas/core/apply.py | 1 + pandas/core/indexes/api.py | 1 + pandas/core/reshape/concat.py | 40 +++++++++++++++---- pandas/core/reshape/merge.py | 2 +- pandas/tests/reshape/concat/test_concat.py | 9 ++++- pandas/tests/reshape/concat/test_datetimes.py | 18 +++++---- 6 files changed, 54 insertions(+), 17 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index b305cbfaa3a1e..7d2f173ba7687 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -637,6 +637,7 @@ def wrap_results_dict_like( results, axis=axis, keys=keys_to_use, + sort=False, ) elif any(is_ndframe): # There is a mix of NDFrames and scalars diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index b8b224d83d0dd..beca84357c4a3 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -227,6 +227,7 @@ def union_indexes(indexes, sort: bool | None = True) -> Index: raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") if num_dtis == len(indexes): + # sort = True result = indexes[0] elif num_dtis > 1: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index e39c716784455..0d3285cc10689 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -46,6 +46,7 @@ get_objs_combined_axis, get_unanimous_names, ) +from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.internals import concatenate_managers if TYPE_CHECKING: @@ -162,7 +163,7 @@ def concat( levels=None, names: list[HashableT] | None = None, verify_integrity: bool = False, - sort: bool = False, + sort: bool | lib.NoDefault = lib.no_default, copy: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | Series: """ @@ -405,14 +406,39 @@ def concat( "Only can inner (intersect) or outer (union) join the other axis" ) - if not is_bool(sort): - raise ValueError( - f"The 'sort' keyword only accepts boolean values; {sort} was passed." - ) - sort = bool(sort) - objs, keys, ndims = _clean_keys_and_objs(objs, keys) + if sort is lib.no_default: + if axis == 0: + non_concat_axis = [ + obj.columns if isinstance(obj, ABCDataFrame) else Index([obj.name]) + for obj in objs + ] + else: + non_concat_axis = [obj.index for obj in objs] + + if all(isinstance(index, DatetimeIndex) for index in non_concat_axis): + from pandas.core.indexes.api import union_indexes + + no_sort_result_index = union_indexes(non_concat_axis, sort=False) + if not no_sort_result_index.is_monotonic_increasing: + msg = ( + "Sorting by default when concatenating all DatetimeIndex is " + "deprecated. In the future, pandas will respect the default " + "of `sort=False`. Specify `sort=True` or `sort=False` to " + "silence this message." + ) + warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) + sort = True + else: + sort = False + else: + if not is_bool(sort): + raise ValueError( + f"The 'sort' keyword only accepts boolean values; {sort} was passed." + ) + sort = bool(sort) + # select an object to be our result reference sample, objs = _get_sample_object(objs, ndims, keys, names, levels, intersect) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 604181214ad44..94b913b44413b 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1125,7 +1125,7 @@ def _reindex_and_concat( left.columns = llabels right.columns = rlabels - result = concat([left, right], axis=1) + result = concat([left, right], axis=1, sort=False) return result def get_result(self) -> DataFrame: diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 2d0eb5d14a1d9..551314ae92faa 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -10,7 +10,10 @@ import numpy as np import pytest -from pandas.errors import InvalidIndexError +from pandas.errors import ( + InvalidIndexError, + Pandas4Warning, +) import pandas as pd from pandas import ( @@ -434,7 +437,9 @@ def test_concat_bug_1719(self): # to join with union # these two are of different length! left = concat([ts1, ts2], join="outer", axis=1) - right = concat([ts2, ts1], join="outer", axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + right = concat([ts2, ts1], join="outer", axis=1) assert len(left) == len(right) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 9a8789926a37e..b1cba7ee31eac 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + import pandas as pd from pandas import ( DataFrame, @@ -69,27 +71,29 @@ def test_concat_datetime_timezone(self): idx3 = date_range("2011-01-01", periods=3, freq="h", tz="Asia/Tokyo") df3 = DataFrame({"b": [1, 2, 3]}, index=idx3) - result = concat([df1, df3], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = concat([df1, df3], axis=1) exp_idx = DatetimeIndex( [ - "2010-12-31 23:00:00+00:00", - "2011-01-01 00:00:00+00:00", - "2011-01-01 01:00:00+00:00", "2010-12-31 15:00:00+00:00", "2010-12-31 16:00:00+00:00", "2010-12-31 17:00:00+00:00", + "2010-12-31 23:00:00+00:00", + "2011-01-01 00:00:00+00:00", + "2011-01-01 01:00:00+00:00", ] ).as_unit("ns") expected = DataFrame( [ - [1, np.nan], - [2, np.nan], - [3, np.nan], [np.nan, 1], [np.nan, 2], [np.nan, 3], + [1, np.nan], + [2, np.nan], + [3, np.nan], ], index=exp_idx, columns=["a", "b"], From 3b3599304e758cdca760a8171d6b61ebb83964c6 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 25 Oct 2025 10:00:57 -0400 Subject: [PATCH 3/5] More performant, more warnings --- pandas/core/frame.py | 1 + pandas/core/generic.py | 2 +- pandas/core/groupby/groupby.py | 2 +- pandas/core/reshape/concat.py | 13 ++++++--- pandas/core/reshape/pivot.py | 2 +- pandas/io/pytables.py | 6 ++-- pandas/tests/io/pytables/test_append.py | 13 +++++++-- pandas/tests/io/pytables/test_select.py | 13 +++++++-- pandas/tests/io/pytables/test_store.py | 9 ++++-- pandas/tests/resample/test_datetime_index.py | 12 ++++---- pandas/tests/resample/test_resample_api.py | 28 ++++++++++++++----- pandas/tests/reshape/concat/test_concat.py | 5 ++-- pandas/tests/reshape/concat/test_datetimes.py | 4 ++- pandas/tests/reshape/concat/test_series.py | 10 +++++-- 14 files changed, 86 insertions(+), 34 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c41b82bbbc8e..50b7afc3c4411 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11114,6 +11114,7 @@ def _append_internal( result = concat( [self, row_df], ignore_index=ignore_index, + sort=False, ) return result.__finalize__(self, method="append") diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 43078ef3a263c..7c23c6d916b2c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6521,7 +6521,7 @@ def astype( return self.copy(deep=False) # GH 19920: retain column metadata after concat - result = concat(results, axis=1) + result = concat(results, axis=1, sort=False) # GH#40810 retain subclass # error: Incompatible types in assignment # (expression has type "Self", variable has type "DataFrame") diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fe7bf5bbc4c2c..8a1cdf1d823fe 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -5236,7 +5236,7 @@ def shift( return ( shifted_dataframes[0] if len(shifted_dataframes) == 1 - else concat(shifted_dataframes, axis=1) + else concat(shifted_dataframes, axis=1, sort=False) ) @final diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 0d3285cc10689..487cf7608c908 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -418,10 +418,15 @@ def concat( non_concat_axis = [obj.index for obj in objs] if all(isinstance(index, DatetimeIndex) for index in non_concat_axis): - from pandas.core.indexes.api import union_indexes - - no_sort_result_index = union_indexes(non_concat_axis, sort=False) - if not no_sort_result_index.is_monotonic_increasing: + warn = any( + id(prev) != id(curr) + for prev, curr in zip(non_concat_axis, non_concat_axis[1:]) + ) and any( + prev[-1] > curr[0] + for prev, curr in zip(non_concat_axis, non_concat_axis[1:]) + if not prev.empty and not curr.empty + ) + if warn: msg = ( "Sorting by default when concatenating all DatetimeIndex is " "deprecated. In the future, pandas will respect the default " diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 04c584c226aed..78b9676443042 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -263,7 +263,7 @@ def pivot_table( pieces.append(_table) keys.append(getattr(func, "__name__", func)) - table = concat(pieces, keys=keys, axis=1) + table = concat(pieces, keys=keys, axis=1, sort=False) return table.__finalize__(data, method="pivot_table") table = __internal_pivot_table( diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3616a93321358..71485fa7d3169 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1109,7 +1109,9 @@ def func(_start, _stop, _where): ] # concat and return - return concat(objs, axis=axis, verify_integrity=False)._consolidate() + return concat( + objs, axis=axis, verify_integrity=False, sort=False + )._consolidate() # create the iterator it = TableIterator( @@ -4860,7 +4862,7 @@ def read( if len(frames) == 1: df = frames[0] else: - df = concat(frames, axis=1) + df = concat(frames, axis=1, sort=False) selection = Selection(self, where=where, start=start, stop=stop) # apply the selection filters & axis orderings diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 479f2468a86ab..8a933f3afc40f 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -7,6 +7,7 @@ from pandas._libs.tslibs import Timestamp from pandas.compat import PY312 +from pandas.errors import Pandas4Warning import pandas as pd from pandas import ( @@ -887,7 +888,9 @@ def test_append_to_multiple(setup_path): ) df2 = df1.copy().rename(columns="{}_2".format) df2["foo"] = "bar" - df = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + df = concat([df1, df2], axis=1) with ensure_clean_store(setup_path) as store: # exceptions @@ -928,7 +931,9 @@ def test_append_to_multiple_dropna(setup_path): index=date_range("2000-01-01", periods=10, freq="B"), ).rename(columns="{}_2".format) df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan - df = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + df = concat([df1, df2], axis=1) with ensure_clean_store(setup_path) as store: # dropna=True should guarantee rows are synchronized @@ -949,7 +954,9 @@ def test_append_to_multiple_dropna_false(setup_path): ) df2 = df1.copy().rename(columns="{}_2".format) df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan - df = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + df = concat([df1, df2], axis=1) with ( ensure_clean_store(setup_path) as store, diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 0dffb284fa6d2..e4aebadb1ce67 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -3,6 +3,7 @@ from pandas._libs.tslibs import Timestamp from pandas.compat import PY312 +from pandas.errors import Pandas4Warning import pandas as pd from pandas import ( @@ -411,7 +412,9 @@ def test_select_iterator(tmp_path, setup_path): df2["foo"] = "bar" store.append("df2", df2) - df = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + df = concat([df1, df2], axis=1) # full selection expected = store.select_as_multiple(["df1", "df2"], selector="df1") @@ -901,7 +904,9 @@ def test_select_as_multiple(setup_path): result = store.select_as_multiple( ["df1", "df2"], where=["A>0", "B>0"], selector="df1" ) - expected = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = concat([df1, df2], axis=1) expected = expected[(expected.A > 0) & (expected.B > 0)] tm.assert_frame_equal(result, expected, check_freq=False) # FIXME: 2021-01-20 this is failing with freq None vs 4B on some builds @@ -910,7 +915,9 @@ def test_select_as_multiple(setup_path): result = store.select_as_multiple( ["df1", "df2"], where="index>df2.index[4]", selector="df2" ) - expected = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = concat([df1, df2], axis=1) expected = expected[5:] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 5cfefeb469e8a..5792dbb6a2432 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -8,6 +8,7 @@ import pytest from pandas.compat import PY312 +from pandas.errors import Pandas4Warning import pandas as pd from pandas import ( @@ -732,9 +733,13 @@ def test_coordinates(setup_path): c = store.select_as_coordinates("df1", ["A>0", "B>0"]) df1_result = store.select("df1", c) df2_result = store.select("df2", c) - result = concat([df1_result, df2_result], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = concat([df1_result, df2_result], axis=1) - expected = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = concat([df1, df2], axis=1) expected = expected[(expected.A > 0) & (expected.B > 0)] tm.assert_frame_equal(result, expected, check_freq=False) # FIXME: 2021-01-18 on some (mostly windows) builds we get freq=None diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index ab88d221864c0..7f7a48707bb33 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -602,11 +602,13 @@ def test_resample_ohlc_dataframe(unit): df.index = df.index.as_unit(unit) df.columns.name = "Cols" res = df.resample("h").ohlc() - exp = pd.concat( - [df["VOLUME"].resample("h").ohlc(), df["PRICE"].resample("h").ohlc()], - axis=1, - keys=df.columns, - ) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + exp = pd.concat( + [df["VOLUME"].resample("h").ohlc(), df["PRICE"].resample("h").ohlc()], + axis=1, + keys=df.columns, + ) assert exp.columns.names[0] == "Cols" tm.assert_frame_equal(exp, res) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 845b5ad7acc00..de3ea0fb36b78 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -6,6 +6,7 @@ from pandas._libs import lib from pandas._libs.tslibs import Day +from pandas.errors import Pandas4Warning import pandas as pd from pandas import ( @@ -440,13 +441,16 @@ def cases(request): def test_agg_mixed_column_aggregation(cases, a_mean, a_std, b_mean, b_std, request): - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", ""]]) # "date" is an index and a column, so get included in the agg if "df_mult" in request.node.callspec.id: date_mean = cases["date"].mean() date_std = cases["date"].std() - expected = pd.concat([date_mean, date_std, expected], axis=1) + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = pd.concat([date_mean, date_std, expected], axis=1) expected.columns = pd.MultiIndex.from_product( [["date", "A", "B"], ["mean", ""]] ) @@ -463,13 +467,17 @@ def test_agg_mixed_column_aggregation(cases, a_mean, a_std, b_mean, b_std, reque ], ) def test_agg_both_mean_std_named_result(cases, a_mean, b_std, agg): - expected = pd.concat([a_mean, b_std], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = pd.concat([a_mean, b_std], axis=1) result = cases.aggregate(**agg) tm.assert_frame_equal(result, expected, check_like=True) def test_agg_both_mean_std_dict_of_list(cases, a_mean, a_std): - expected = pd.concat([a_mean, a_std], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = pd.concat([a_mean, a_std], axis=1) expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) result = cases.aggregate({"A": ["mean", "std"]}) tm.assert_frame_equal(result, expected) @@ -479,7 +487,9 @@ def test_agg_both_mean_std_dict_of_list(cases, a_mean, a_std): "agg", [{"func": ["mean", "sum"]}, {"mean": "mean", "sum": "sum"}] ) def test_agg_both_mean_sum(cases, a_mean, a_sum, agg): - expected = pd.concat([a_mean, a_sum], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = pd.concat([a_mean, a_sum], axis=1) expected.columns = ["mean", "sum"] result = cases["A"].aggregate(**agg) tm.assert_frame_equal(result, expected) @@ -502,7 +512,9 @@ def test_agg_dict_of_dict_specificationerror(cases, agg): def test_agg_dict_of_lists(cases, a_mean, a_std, b_mean, b_std): - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_tuples( [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] ) @@ -556,7 +568,9 @@ def test_agg_no_column(cases, agg): def test_agg_specificationerror_nested(cases, cols, agg, a_sum, a_std, b_mean, b_std): # agg with different hows # equivalent of using a selection list / or not - expected = pd.concat([a_sum, a_std, b_mean, b_std], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = pd.concat([a_sum, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_tuples( [("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")] ) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 551314ae92faa..d4ad0e3b075e9 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -436,8 +436,9 @@ def test_concat_bug_1719(self): # to join with union # these two are of different length! - left = concat([ts1, ts2], join="outer", axis=1) - msg = "Sorting by default when concatenating all DatetimeIndex" + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + left = concat([ts1, ts2], join="outer", axis=1) with tm.assert_produces_warning(Pandas4Warning, match=msg): right = concat([ts2, ts1], join="outer", axis=1) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index b1cba7ee31eac..12ef453f46b60 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -52,7 +52,9 @@ def test_concat_datetime_timezone(self): idx2 = date_range(start=idx1[0], end=idx1[-1], freq="h") df1 = DataFrame({"a": [1, 2, 3]}, index=idx1) df2 = DataFrame({"b": [1, 2, 3]}, index=idx2) - result = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = concat([df1, df2], axis=1) exp_idx = DatetimeIndex( [ diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py index 3523340bb2858..85aa243000434 100644 --- a/pandas/tests/reshape/concat/test_series.py +++ b/pandas/tests/reshape/concat/test_series.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas import ( DataFrame, DatetimeIndex, @@ -54,11 +56,15 @@ def test_concat_series_axis1(self): pieces = [ts[:-2], ts[2:], ts[2:-2]] - result = concat(pieces, axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = concat(pieces, axis=1) expected = DataFrame(pieces).T tm.assert_frame_equal(result, expected) - result = concat(pieces, keys=["A", "B", "C"], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = concat(pieces, keys=["A", "B", "C"], axis=1) expected = DataFrame(pieces, index=["A", "B", "C"]).T tm.assert_frame_equal(result, expected) From ac0dc973a06e537b16dbfd5c8d7b4f173f0ef22a Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 25 Oct 2025 11:15:41 -0400 Subject: [PATCH 4/5] Fixup --- pandas/core/indexes/api.py | 22 +++++++++++++--------- pandas/core/reshape/concat.py | 3 --- pandas/core/reshape/merge.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 9 ++++++--- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index beca84357c4a3..b42d12b85399a 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -64,7 +64,7 @@ def get_objs_combined_axis( objs, intersect: bool = False, axis: Axis = 0, - sort: bool = True, + sort: bool | lib.NoDefault = True, ) -> Index: """ Extract combined index: return intersection or union (depending on the @@ -81,7 +81,8 @@ def get_objs_combined_axis( axis : {0 or 'index', 1 or 'outer'}, default 0 The axis to extract indexes from. sort : bool, default True - Whether the result index should come out sorted or not. + Whether the result index should come out sorted or not. NoDefault + use for deprecation in GH#57335. Returns ------- @@ -108,7 +109,7 @@ def _get_distinct_objs(objs: list[Index]) -> list[Index]: def _get_combined_index( indexes: list[Index], intersect: bool = False, - sort: bool = False, + sort: bool | lib.NoDefault = False, ) -> Index: """ Return the union or intersection of indexes. @@ -121,7 +122,8 @@ def _get_combined_index( If True, calculate the intersection between indexes. Otherwise, calculate the union. sort : bool, default False - Whether the result index should come out sorted or not. + Whether the result index should come out sorted or not. NoDefault + used for deprecation of GH#57335 Returns ------- @@ -138,10 +140,10 @@ def _get_combined_index( for other in indexes[1:]: index = index.intersection(other) else: - index = union_indexes(indexes, sort=False) + index = union_indexes(indexes, sort=sort if sort is lib.no_default else False) index = ensure_index(index) - if sort: + if sort and sort is not lib.no_default: index = safe_sort_index(index) return index @@ -180,7 +182,7 @@ def safe_sort_index(index: Index) -> Index: return index -def union_indexes(indexes, sort: bool | None = True) -> Index: +def union_indexes(indexes, sort: bool | None | lib.NoDefault = True) -> Index: """ Return the union of indexes. @@ -190,7 +192,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index: ---------- indexes : list of Index or list objects sort : bool, default True - Whether the result index should come out sorted or not. + Whether the result index should come out sorted or not. NoDefault + used for deprecation of GH#57335. Returns ------- @@ -227,7 +230,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index: raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") if num_dtis == len(indexes): - # sort = True + if sort is lib.no_default: + sort = True result = indexes[0] elif num_dtis > 1: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 487cf7608c908..3c4408a7cd480 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -434,9 +434,6 @@ def concat( "silence this message." ) warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) - sort = True - else: - sort = False else: if not is_bool(sort): raise ValueError( diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 94b913b44413b..604181214ad44 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1125,7 +1125,7 @@ def _reindex_and_concat( left.columns = llabels right.columns = rlabels - result = concat([left, right], axis=1, sort=False) + result = concat([left, right], axis=1) return result def get_result(self) -> DataFrame: diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c38ee32cb7226..c7a1e83ad1047 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -375,9 +375,9 @@ def test_merge_non_unique_indexes(self): dt3 = datetime(2012, 5, 3) dt4 = datetime(2012, 5, 4) - df1 = DataFrame({"x": ["a"]}, index=[dt]) - df2 = DataFrame({"y": ["b", "c"]}, index=[dt, dt]) - _check_merge(df1, df2) + # df1 = DataFrame({"x": ["a"]}, index=[dt]) + # df2 = DataFrame({"y": ["b", "c"]}, index=[dt, dt]) + # _check_merge(df1, df2) # Not monotonic df1 = DataFrame({"x": ["a", "b", "q"]}, index=[dt2, dt, dt4]) @@ -1480,6 +1480,8 @@ def test_merge_how_validation(self): def _check_merge(x, y): for how in ["inner", "left", "outer"]: for sort in [True, False]: + how = "inner" + sort = False result = x.join(y, how=how, sort=sort) expected = merge(x.reset_index(), y.reset_index(), how=how, sort=sort) @@ -1487,6 +1489,7 @@ def _check_merge(x, y): # TODO check_names on merge? tm.assert_frame_equal(result, expected, check_names=False) + break class TestMergeDtypes: From 52dbb3d4019b8e0540c9f847dcfcd211e8ee4389 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 25 Oct 2025 12:10:38 -0400 Subject: [PATCH 5/5] Cleanups --- pandas/core/apply.py | 1 - pandas/core/indexes/api.py | 2 +- pandas/core/reshape/concat.py | 2 ++ pandas/tests/reshape/merge/test_merge.py | 9 +++------ 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 7d2f173ba7687..b305cbfaa3a1e 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -637,7 +637,6 @@ def wrap_results_dict_like( results, axis=axis, keys=keys_to_use, - sort=False, ) elif any(is_ndframe): # There is a mix of NDFrames and scalars diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index b42d12b85399a..f1ffa6fe08bdb 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -204,7 +204,7 @@ def union_indexes(indexes, sort: bool | None | lib.NoDefault = True) -> Index: if len(indexes) == 1: result = indexes[0] if isinstance(result, list): - if not sort: + if not sort or sort is lib.no_default: result = Index(result) else: result = Index(sorted(result)) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 3c4408a7cd480..e38d3c7deb8df 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -434,6 +434,8 @@ def concat( "silence this message." ) warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) + else: + sort = False else: if not is_bool(sort): raise ValueError( diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c7a1e83ad1047..c38ee32cb7226 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -375,9 +375,9 @@ def test_merge_non_unique_indexes(self): dt3 = datetime(2012, 5, 3) dt4 = datetime(2012, 5, 4) - # df1 = DataFrame({"x": ["a"]}, index=[dt]) - # df2 = DataFrame({"y": ["b", "c"]}, index=[dt, dt]) - # _check_merge(df1, df2) + df1 = DataFrame({"x": ["a"]}, index=[dt]) + df2 = DataFrame({"y": ["b", "c"]}, index=[dt, dt]) + _check_merge(df1, df2) # Not monotonic df1 = DataFrame({"x": ["a", "b", "q"]}, index=[dt2, dt, dt4]) @@ -1480,8 +1480,6 @@ def test_merge_how_validation(self): def _check_merge(x, y): for how in ["inner", "left", "outer"]: for sort in [True, False]: - how = "inner" - sort = False result = x.join(y, how=how, sort=sort) expected = merge(x.reset_index(), y.reset_index(), how=how, sort=sort) @@ -1489,7 +1487,6 @@ def _check_merge(x, y): # TODO check_names on merge? tm.assert_frame_equal(result, expected, check_names=False) - break class TestMergeDtypes: