From 603120ba9cb80b241864e728125dadbd40fecdc9 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 19 Oct 2025 08:37:53 -0400 Subject: [PATCH 1/8] DEPR/BUG: Do not ignore sort in concat for DatetimeIndex --- doc/source/whatsnew/v3.0.0.rst | 57 ++++++++++++++++ pandas/core/indexes/api.py | 24 ++++--- pandas/core/reshape/concat.py | 66 +++++++++++++++++-- pandas/tests/io/pytables/test_select.py | 9 ++- pandas/tests/reshape/concat/test_concat.py | 9 ++- pandas/tests/reshape/concat/test_datetimes.py | 6 +- 6 files changed, 149 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 75b4c5c0fe14d..c9146d3893994 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -371,6 +371,63 @@ In cases with mixed-resolution inputs, the highest resolution is used: In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype Out[2]: dtype(' Index: """ Extract combined index: return intersection or union (depending on the @@ -81,7 +81,8 @@ def get_objs_combined_axis( axis : {0 or 'index', 1 or 'outer'}, default 0 The axis to extract indexes from. sort : bool, default True - Whether the result index should come out sorted or not. + Whether the result index should come out sorted or not. NoDefault + use for deprecation in GH#57335. Returns ------- @@ -108,7 +109,7 @@ def _get_distinct_objs(objs: list[Index]) -> list[Index]: def _get_combined_index( indexes: list[Index], intersect: bool = False, - sort: bool = False, + sort: bool | lib.NoDefault = False, ) -> Index: """ Return the union or intersection of indexes. @@ -121,7 +122,8 @@ def _get_combined_index( If True, calculate the intersection between indexes. Otherwise, calculate the union. sort : bool, default False - Whether the result index should come out sorted or not. + Whether the result index should come out sorted or not. NoDefault + used for deprecation of GH#57335 Returns ------- @@ -138,10 +140,10 @@ def _get_combined_index( for other in indexes[1:]: index = index.intersection(other) else: - index = union_indexes(indexes, sort=False) + index = union_indexes(indexes, sort=sort if sort is lib.no_default else False) index = ensure_index(index) - if sort: + if sort and sort is not lib.no_default: index = safe_sort_index(index) return index @@ -180,7 +182,7 @@ def safe_sort_index(index: Index) -> Index: return index -def union_indexes(indexes, sort: bool | None = True) -> Index: +def union_indexes(indexes, sort: bool | None | lib.NoDefault = True) -> Index: """ Return the union of indexes. @@ -190,7 +192,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index: ---------- indexes : list of Index or list objects sort : bool, default True - Whether the result index should come out sorted or not. + Whether the result index should come out sorted or not. NoDefault + used for deprecation of GH#57335. Returns ------- @@ -201,7 +204,7 @@ def union_indexes(indexes, sort: bool | None = True) -> Index: if len(indexes) == 1: result = indexes[0] if isinstance(result, list): - if not sort: + if not sort or sort is lib.no_default: result = Index(result) else: result = Index(sorted(result)) @@ -227,7 +230,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index: raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") if num_dtis == len(indexes): - sort = True + if sort is lib.no_default: + sort = True result = indexes[0] elif num_dtis > 1: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index e39c716784455..c7dc031e1f1e5 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -45,7 +45,9 @@ ensure_index, get_objs_combined_axis, get_unanimous_names, + union_indexes, ) +from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.internals import concatenate_managers if TYPE_CHECKING: @@ -162,7 +164,7 @@ def concat( levels=None, names: list[HashableT] | None = None, verify_integrity: bool = False, - sort: bool = False, + sort: bool | lib.NoDefault = lib.no_default, copy: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | Series: """ @@ -405,13 +407,40 @@ def concat( "Only can inner (intersect) or outer (union) join the other axis" ) - if not is_bool(sort): + objs, keys, ndims = _clean_keys_and_objs(objs, keys) + + if sort is lib.no_default: + if axis == 0: + non_concat_axis = [ + obj.columns if isinstance(obj, ABCDataFrame) else Index([obj.name]) + for obj in objs + ] + else: + non_concat_axis = [obj.index for obj in objs] + + if ( + any(not isinstance(index, DatetimeIndex) for index in non_concat_axis) + or all( + id(prev) == id(curr) + for prev, curr in zip(non_concat_axis, non_concat_axis[1:]) + ) + or ( + all( + prev[-1] <= curr[0] and prev.is_monotonic_increasing + for prev, curr in zip(non_concat_axis, non_concat_axis[1:]) + if not prev.empty and not curr.empty + ) + and non_concat_axis[-1].is_monotonic_increasing + ) + ): + # Sorting or not will not impact the result. + sort = False + elif not is_bool(sort): raise ValueError( f"The 'sort' keyword only accepts boolean values; {sort} was passed." ) - sort = bool(sort) - - objs, keys, ndims = _clean_keys_and_objs(objs, keys) + else: + sort = bool(sort) # select an object to be our result reference sample, objs = _get_sample_object(objs, ndims, keys, names, levels, intersect) @@ -436,9 +465,10 @@ def concat( if len(ndims) > 1: objs = _sanitize_mixed_ndim(objs, sample, ignore_index, bm_axis) + orig_axis = axis axis = 1 - bm_axis if is_frame else 0 names = names or getattr(keys, "names", None) - return _get_result( + result = _get_result( objs, is_series, bm_axis, @@ -452,6 +482,28 @@ def concat( axis, ) + if sort is lib.no_default: + if orig_axis == 0: + non_concat_axis = [ + obj.columns if isinstance(obj, ABCDataFrame) else Index([obj.name]) + for obj in objs + ] + else: + non_concat_axis = [obj.index for obj in objs] + no_sort_result_index = union_indexes(non_concat_axis, sort=False) + orig = result.index if orig_axis == 1 else result.columns + if not no_sort_result_index.equals(orig): + msg = ( + "Sorting by default when concatenating all DatetimeIndex is " + "deprecated. In the future, pandas will respect the default " + "of `sort=False`. Specify `sort=True` or `sort=False` to " + "silence this message. If you see this warnings when not " + "directly calling concat, report a bug to pandas." + ) + warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) + + return result + def _sanitize_mixed_ndim( objs: list[Series | DataFrame], @@ -510,7 +562,7 @@ def _get_result( bm_axis: AxisInt, ignore_index: bool, intersect: bool, - sort: bool, + sort: bool | lib.NoDefault, keys: Iterable[Hashable] | None, levels, verify_integrity: bool, diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 0dffb284fa6d2..c4967d7e4aa81 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -3,6 +3,7 @@ from pandas._libs.tslibs import Timestamp from pandas.compat import PY312 +from pandas.errors import Pandas4Warning import pandas as pd from pandas import ( @@ -901,7 +902,9 @@ def test_select_as_multiple(setup_path): result = store.select_as_multiple( ["df1", "df2"], where=["A>0", "B>0"], selector="df1" ) - expected = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = concat([df1, df2], axis=1) expected = expected[(expected.A > 0) & (expected.B > 0)] tm.assert_frame_equal(result, expected, check_freq=False) # FIXME: 2021-01-20 this is failing with freq None vs 4B on some builds @@ -910,7 +913,9 @@ def test_select_as_multiple(setup_path): result = store.select_as_multiple( ["df1", "df2"], where="index>df2.index[4]", selector="df2" ) - expected = concat([df1, df2], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + expected = concat([df1, df2], axis=1) expected = expected[5:] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 2d0eb5d14a1d9..7d0e534cb7689 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -10,7 +10,10 @@ import numpy as np import pytest -from pandas.errors import InvalidIndexError +from pandas.errors import ( + InvalidIndexError, + Pandas4Warning, +) import pandas as pd from pandas import ( @@ -434,7 +437,9 @@ def test_concat_bug_1719(self): # to join with union # these two are of different length! left = concat([ts1, ts2], join="outer", axis=1) - right = concat([ts2, ts1], join="outer", axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + right = concat([ts2, ts1], join="outer", axis=1) assert len(left) == len(right) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 0cf3192ea3a74..b1cba7ee31eac 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + import pandas as pd from pandas import ( DataFrame, @@ -69,7 +71,9 @@ def test_concat_datetime_timezone(self): idx3 = date_range("2011-01-01", periods=3, freq="h", tz="Asia/Tokyo") df3 = DataFrame({"b": [1, 2, 3]}, index=idx3) - result = concat([df1, df3], axis=1) + msg = "Sorting by default when concatenating all DatetimeIndex" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = concat([df1, df3], axis=1) exp_idx = DatetimeIndex( [ From 91431e9c6a9ffa6387f9ed64ceaae8062497fc9f Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 25 Oct 2025 16:48:12 -0400 Subject: [PATCH 2/8] Fix groupby.shift bug --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/groupby.py | 2 +- pandas/core/reshape/concat.py | 3 ++- .../groupby/methods/test_groupby_shift_diff.py | 18 ++++++++++++++++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c9146d3893994..671e3f9b86fbf 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1222,6 +1222,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) +- Bug in :meth:`DataFrameGroupBy.shift` where the resulting index would be sorted if the input is a :class:`DatetimeIndex` and multiple periods are specified (:issue:`62843`) - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fe7bf5bbc4c2c..8a1cdf1d823fe 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -5236,7 +5236,7 @@ def shift( return ( shifted_dataframes[0] if len(shifted_dataframes) == 1 - else concat(shifted_dataframes, axis=1) + else concat(shifted_dataframes, axis=1, sort=False) ) @final diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index c7dc031e1f1e5..948ecbdb4b0a3 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -419,7 +419,8 @@ def concat( non_concat_axis = [obj.index for obj in objs] if ( - any(not isinstance(index, DatetimeIndex) for index in non_concat_axis) + intersect + or any(not isinstance(index, DatetimeIndex) for index in non_concat_axis) or all( id(prev) == id(curr) for prev, curr in zip(non_concat_axis, non_concat_axis[1:]) diff --git a/pandas/tests/groupby/methods/test_groupby_shift_diff.py b/pandas/tests/groupby/methods/test_groupby_shift_diff.py index 1256046d81949..60e5fd9fa1863 100644 --- a/pandas/tests/groupby/methods/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/methods/test_groupby_shift_diff.py @@ -248,3 +248,21 @@ def test_group_shift_with_multiple_periods_and_both_fill_and_freq_deprecated(): msg = "Passing a 'freq' together with a 'fill_value'" with pytest.raises(ValueError, match=msg): df.groupby("b")[["a"]].shift([1, 2], fill_value=1, freq="h") + + +def test_groupby_shift_multiple_periods_unsorted_index(): + # https://github.com/pandas-dev/pandas/pull/62843 + idx = date_range("1/1/2000", periods=4, freq="h") + df = DataFrame( + {"a": [1, 2, 3], "b": [True, True, False]}, + index=[idx[2], idx[0], idx[1]], + ) + result = df.groupby("b")[["a"]].shift([0, 1], freq="h") + expected = DataFrame( + { + "a_0": [1.0, 2.0, 3.0, np.nan], + "a_1": [3.0, np.nan, 2.0, 1.0], + }, + index=[idx[2], idx[0], idx[1], idx[3]], + ) + tm.assert_frame_equal(result, expected) From f1f23544caf8ba9f46fc5d582fa252cb591b285c Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 25 Oct 2025 18:09:59 -0400 Subject: [PATCH 3/8] Fixups --- pandas/core/reshape/concat.py | 2 +- pandas/tests/io/pytables/test_select.py | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 948ecbdb4b0a3..4124ea3651e62 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -673,7 +673,7 @@ def new_axes( objs: list[Series | DataFrame], bm_axis: AxisInt, intersect: bool, - sort: bool, + sort: bool | lib.NoDefault, keys: Iterable[Hashable] | None, names: list[HashableT] | None, axis: AxisInt, diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index c4967d7e4aa81..0dffb284fa6d2 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -3,7 +3,6 @@ from pandas._libs.tslibs import Timestamp from pandas.compat import PY312 -from pandas.errors import Pandas4Warning import pandas as pd from pandas import ( @@ -902,9 +901,7 @@ def test_select_as_multiple(setup_path): result = store.select_as_multiple( ["df1", "df2"], where=["A>0", "B>0"], selector="df1" ) - msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" - with tm.assert_produces_warning(Pandas4Warning, match=msg): - expected = concat([df1, df2], axis=1) + expected = concat([df1, df2], axis=1) expected = expected[(expected.A > 0) & (expected.B > 0)] tm.assert_frame_equal(result, expected, check_freq=False) # FIXME: 2021-01-20 this is failing with freq None vs 4B on some builds @@ -913,9 +910,7 @@ def test_select_as_multiple(setup_path): result = store.select_as_multiple( ["df1", "df2"], where="index>df2.index[4]", selector="df2" ) - msg = "Sorting by default when concatenating all DatetimeIndex is deprecated" - with tm.assert_produces_warning(Pandas4Warning, match=msg): - expected = concat([df1, df2], axis=1) + expected = concat([df1, df2], axis=1) expected = expected[5:] tm.assert_frame_equal(result, expected) From 9131eaf6d9c90364d247a977cca43bfbd40f8392 Mon Sep 17 00:00:00 2001 From: richard Date: Thu, 30 Oct 2025 22:14:32 -0400 Subject: [PATCH 4/8] Refinements --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/indexes/api.py | 2 +- pandas/core/reshape/concat.py | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4e957009b9a26..83c438c9add03 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -380,7 +380,7 @@ In cases with mixed-resolution inputs, the highest resolution is used: When all objects passed to :func:`concat` have a :class:`DatetimeIndex`, passing ``sort=False`` will now result in the non-concatenation axis not being sorted. Previously, the result would always be sorted along -the non-concatenation axis even when ``sort=False`` is passed. +the non-concatenation axis even when ``sort=False`` is passed. :issue:`57335` If you do not specify the ``sort`` argument, pandas will continue to return a sorted result but this behavior is deprecated and you will receive a warning. diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index f1ffa6fe08bdb..70e7bd3b38527 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -182,7 +182,7 @@ def safe_sort_index(index: Index) -> Index: return index -def union_indexes(indexes, sort: bool | None | lib.NoDefault = True) -> Index: +def union_indexes(indexes, sort: bool | lib.NoDefault = True) -> Index: """ Return the union of indexes. diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 4124ea3651e62..c784121d8d271 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -422,8 +422,7 @@ def concat( intersect or any(not isinstance(index, DatetimeIndex) for index in non_concat_axis) or all( - id(prev) == id(curr) - for prev, curr in zip(non_concat_axis, non_concat_axis[1:]) + prev is curr for prev, curr in zip(non_concat_axis, non_concat_axis[1:]) ) or ( all( From d49552d358b0f545dbf6bd4074df90f2375e7658 Mon Sep 17 00:00:00 2001 From: richard Date: Sat, 1 Nov 2025 07:11:25 -0400 Subject: [PATCH 5/8] concat notes --- pandas/core/apply.py | 6 +++--- pandas/core/arrays/arrow/accessors.py | 2 +- pandas/core/arrays/categorical.py | 2 +- pandas/core/frame.py | 14 ++++++++------ pandas/core/generic.py | 4 ++-- pandas/core/groupby/generic.py | 12 +++++++----- pandas/core/groupby/groupby.py | 10 +++++----- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/range.py | 4 ++-- pandas/core/interchange/from_dataframe.py | 2 +- pandas/core/methods/describe.py | 2 +- pandas/core/methods/selectn.py | 2 +- pandas/core/resample.py | 2 +- pandas/core/reshape/encoding.py | 6 +++--- pandas/core/reshape/melt.py | 8 +++++--- pandas/core/reshape/merge.py | 4 ++-- pandas/core/reshape/pivot.py | 10 +++++----- pandas/core/reshape/reshape.py | 2 +- pandas/core/series.py | 4 ++-- pandas/core/strings/accessor.py | 2 +- pandas/core/window/common.py | 4 ++-- pandas/core/window/rolling.py | 2 +- 22 files changed, 57 insertions(+), 51 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 468f24a07cb4a..52c401d13b930 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -382,7 +382,7 @@ def transform_dict_like(self, func) -> DataFrame: for name, how in func.items(): colg = obj._gotitem(name, ndim=1) results[name] = colg.transform(how, 0, *args, **kwargs) - return concat(results, axis=1) + return concat(results, axis=1) # nobug def transform_str_or_callable(self, func) -> DataFrame | Series: """ @@ -485,7 +485,7 @@ def wrap_results_list_like( obj = self.obj try: - return concat(results, keys=keys, axis=1, sort=False) + return concat(results, keys=keys, axis=1, sort=False) # maybebug except TypeError as err: # we are concatting non-NDFrame objects, # e.g. a list of scalars @@ -635,7 +635,7 @@ def wrap_results_dict_like( keys_to_use = ktu axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1 - result = concat( + result = concat( # maybebug results, axis=axis, keys=keys_to_use, diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py index 7f3da9be0c03d..aea2bb2de2189 100644 --- a/pandas/core/arrays/arrow/accessors.py +++ b/pandas/core/arrays/arrow/accessors.py @@ -496,6 +496,6 @@ def explode(self) -> DataFrame: from pandas import concat pa_type = self._pa_array.type - return concat( + return concat( # nobug [self.field(i) for i in range(pa_type.num_fields)], axis="columns" ) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 41e5c6f65dbb9..dcd87bb075195 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2690,7 +2690,7 @@ def describe(self) -> DataFrame: from pandas import Index from pandas.core.reshape.concat import concat - result = concat([counts, freqs], ignore_index=True, axis=1) + result = concat([counts, freqs], ignore_index=True, axis=1) # nobug result.columns = Index(["counts", "freqs"]) result.index.name = "categories" diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68ea6795d47dd..32c9811de8b87 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6130,7 +6130,7 @@ def shift( .shift(periods=period, freq=freq, axis=axis, fill_value=fill_value) .add_suffix(f"{suffix}_{period}" if suffix else f"_{period}") ) - return concat(shifted_dataframes, axis=1) + return concat(shifted_dataframes, axis=1) # bug elif suffix: raise ValueError("Cannot specify `suffix` if `periods` is an int.") periods = cast(int, periods) @@ -11168,7 +11168,7 @@ def _append_internal( from pandas.core.reshape.concat import concat - result = concat( + result = concat( # possible bug [self, row_df], ignore_index=ignore_index, ) @@ -11396,12 +11396,12 @@ def join( # join indexes only using concat if can_concat: if how == "left": - res = concat( + res = concat( # nobug frames, axis=1, join="outer", verify_integrity=True, sort=sort ) return res.reindex(self.index) else: - return concat( + return concat( # bug frames, axis=1, join=how, verify_integrity=True, sort=sort ) @@ -11590,7 +11590,9 @@ def _series_round(ser: Series, decimals: int) -> Series: if new_cols is not None and len(new_cols) > 0: return self._constructor( - concat(new_cols, axis=1), index=self.index, columns=self.columns + concat(new_cols, axis=1), + index=self.index, + columns=self.columns, # nobug ).__finalize__(self, method="round") else: return self.copy(deep=False) @@ -14173,7 +14175,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: from pandas.core.reshape.concat import concat values = collections.defaultdict(list, values) - result = concat( + result = concat( # nobug ( self.iloc[:, [i]].isin(values[col]) for i, col in enumerate(self.columns) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b542ca1f431c3..34b5476002cfa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6525,7 +6525,7 @@ def astype( return self.copy(deep=False) # GH 19920: retain column metadata after concat - result = concat(results, axis=1) + result = concat(results, axis=1) # nobug # GH#40810 retain subclass # error: Incompatible types in assignment # (expression has type "Self", variable has type "DataFrame") @@ -9507,7 +9507,7 @@ def compare( # error: List item 0 has incompatible type "NDFrame"; expected # "Union[Series, DataFrame]" - diff = concat( + diff = concat( # bug [self, other], # type: ignore[list-item] axis=axis, keys=result_names, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d279594617235..20be980ad1a12 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -549,7 +549,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame: if any(isinstance(x, DataFrame) for x in results.values()): from pandas import concat - res_df = concat( + res_df = concat( # nobug results.values(), axis=1, keys=[key.label for key in results] ) return res_df @@ -722,7 +722,7 @@ def _transform_general( if results: from pandas.core.reshape.concat import concat - concatenated = concat(results, ignore_index=True) + concatenated = concat(results, ignore_index=True) # nobug result = self._set_result_index_ordered(concatenated) else: result = self.obj._constructor(dtype=np.float64) @@ -2238,7 +2238,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): applied.append(res) concat_index = obj.columns - concatenated = concat( + concatenated = concat( # nobug applied, axis=0, verify_integrity=False, ignore_index=True ) concatenated = concatenated.reindex(concat_index, axis=1) @@ -2530,7 +2530,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame: # concat would raise res_df = DataFrame([], columns=columns, index=self._grouper.result_index) else: - res_df = concat(results, keys=columns, axis=1) + res_df = concat(results, keys=columns, axis=1) # nobug if not self.as_index: res_df.index = default_index(len(res_df)) @@ -3390,7 +3390,9 @@ def _wrap_transform_general_frame( # other dimension; this will preserve dtypes # GH14457 if res.index.is_(obj.index): - res_frame = concat([res] * len(group.columns), axis=1, ignore_index=True) + res_frame = concat( + [res] * len(group.columns), axis=1, ignore_index=True + ) # nobug res_frame.columns = group.columns res_frame.index = group.index else: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ca0c43aed4864..22a8826ae35e3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1143,7 +1143,7 @@ def _concat_objects( group_levels = self._grouper.levels group_names = self._grouper.names - result = concat( + result = concat( # maybebug values, axis=0, keys=group_keys, @@ -1152,10 +1152,10 @@ def _concat_objects( sort=False, ) else: - result = concat(values, axis=0) + result = concat(values, axis=0) # maybebug elif not not_indexed_same: - result = concat(values, axis=0) + result = concat(values, axis=0) # maybebug ax = self._selected_obj.index if self.dropna: @@ -1178,7 +1178,7 @@ def _concat_objects( result = result.reindex(ax, axis=0) else: - result = concat(values, axis=0) + result = concat(values, axis=0) # maybebug if self.obj.ndim == 1: name = self.obj.name @@ -5238,7 +5238,7 @@ def shift( return ( shifted_dataframes[0] if len(shifted_dataframes) == 1 - else concat(shifted_dataframes, axis=1, sort=False) + else concat(shifted_dataframes, axis=1, sort=False) # nobug ) @final diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6fafbd9590143..622153c8f3484 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5379,9 +5379,9 @@ def append(self, other: Index | Sequence[Index]) -> Index: names = {obj.name for obj in to_concat} name = None if len(names) > 1 else self.name - return self._concat(to_concat, name) + return self._concat(to_concat, name) # nobug - def _concat(self, to_concat: list[Index], name: Hashable) -> Index: + def _concat(self, to_concat: list[Index], name: Hashable) -> Index: # nobug """ Concatenate multiple Index objects. """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 7a025cdd5fb68..34b8ba0a01246 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1181,7 +1181,7 @@ def insert(self, loc: int, item) -> Index: return super().insert(loc, item) - def _concat(self, indexes: list[Index], name: Hashable) -> Index: + def _concat(self, indexes: list[Index], name: Hashable) -> Index: # nobug """ Overriding parent method for the case of all RangeIndex instances. @@ -1191,7 +1191,7 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64') """ if not all(isinstance(x, RangeIndex) for x in indexes): - result = super()._concat(indexes, name) + result = super()._concat(indexes, name) # nobug if result.dtype.kind == "i": return self._shallow_copy(result._values) return result diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index bcbeb546f845c..e21f39501e42b 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -144,7 +144,7 @@ def _from_dataframe(df: DataFrameXchg, allow_copy: bool = True) -> pd.DataFrame: elif len(pandas_dfs) == 1: pandas_df = pandas_dfs[0] else: - pandas_df = pd.concat(pandas_dfs, axis=0, ignore_index=True, copy=False) + pandas_df = pd.concat(pandas_dfs, axis=0, ignore_index=True, copy=False) # bug index_obj = df.metadata.get("pandas.index", None) if index_obj is not None: diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index d92540af168c0..3eb9675c42904 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -173,7 +173,7 @@ def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame: col_names = reorder_columns(ldesc) d = concat( - [x.reindex(col_names) for x in ldesc], + [x.reindex(col_names) for x in ldesc], # nobug axis=1, ignore_index=True, sort=False, diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py index 59516b16905dc..e7bb48cca70c5 100644 --- a/pandas/core/methods/selectn.py +++ b/pandas/core/methods/selectn.py @@ -186,7 +186,7 @@ def compute(self, method: str) -> Series: # reverse indices inds = narr - 1 - inds - result = concat([dropped.iloc[inds], nan_index]).iloc[:findex] + result = concat([dropped.iloc[inds], nan_index]).iloc[:findex] # nobug result.index = original_index.take(result.index) return result diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2694b581a6707..328e361951d3c 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1025,7 +1025,7 @@ def interpolate( missing_data_points_index = obj.index.difference(final_index) if len(missing_data_points_index) > 0: - result = concat( + result = concat( # nobug [result, obj.loc[missing_data_points_index]] ).sort_index() diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 5c5fed272b925..0210118664fb3 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -223,7 +223,7 @@ def check_len(item, name: str) -> None: dtype=dtype, ) with_dummies.append(dummy) - result = concat(with_dummies, axis=1) + result = concat(with_dummies, axis=1) # nobug else: result = _get_dummies_1d( data, @@ -342,7 +342,7 @@ def get_empty_frame(data) -> DataFrame: ) sparse_series.append(Series(data=sarr, index=index, name=col, copy=False)) - return concat(sparse_series, axis=1) + return concat(sparse_series, axis=1) # nobug else: # ensure ndarray layout is column-major @@ -568,7 +568,7 @@ def from_dummies( "Dummy DataFrame contains unassigned value(s); " f"First instance in row: {assigned.idxmin()}" ) - data_slice = concat( + data_slice = concat( # nobug (data_to_decode.loc[:, prefix_slice], assigned == 0), axis=1 ) else: diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 5d4b15c9a0ca3..b8fed8ae4ef04 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -249,7 +249,9 @@ def melt( if not isinstance(id_data.dtype, np.dtype): # i.e. ExtensionDtype if num_cols_adjusted > 0: - mdata[col] = concat([id_data] * num_cols_adjusted, ignore_index=True) + mdata[col] = concat( + [id_data] * num_cols_adjusted, ignore_index=True + ) # nobug else: # We can't concat empty list. (GH 46044) mdata[col] = type(id_data)([], name=id_data.name, dtype=id_data.dtype) @@ -261,7 +263,7 @@ def melt( if frame.shape[1] > 0 and not any( not isinstance(dt, np.dtype) and dt._supports_2d for dt in frame.dtypes ): - mdata[value_name] = concat( + mdata[value_name] = concat( # nobug [frame.iloc[:, i] for i in range(frame.shape[1])], ignore_index=True ).values else: @@ -666,7 +668,7 @@ def melt_stub(df, stub: str, i, j, value_vars, sep: str): value_vars_flattened.extend(value_var) _melted.append(melt_stub(df, stub, i, j, value_var, sep)) - melted = concat(_melted, axis=1) + melted = concat(_melted, axis=1) # maybebug id_vars = df.columns.difference(value_vars_flattened) new = df[id_vars] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 571f708ccf108..0917c2eb94a3c 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -507,7 +507,7 @@ def _groupby_and_merge( # if we have a missing piece this can be reset from pandas.core.reshape.concat import concat - result = concat(pieces, ignore_index=True) + result = concat(pieces, ignore_index=True) # nobug result = result.reindex(columns=pieces[0].columns) return result, lby @@ -1131,7 +1131,7 @@ def _reindex_and_concat( left.columns = llabels right.columns = rlabels - result = concat([left, right], axis=1) + result = concat([left, right], axis=1) # nobug return result def get_result(self) -> DataFrame: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 04c584c226aed..fbf83806fa1d8 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -263,7 +263,7 @@ def pivot_table( pieces.append(_table) keys.append(getattr(func, "__name__", func)) - table = concat(pieces, keys=keys, axis=1) + table = concat(pieces, keys=keys, axis=1) # nobug return table.__finalize__(data, method="pivot_table") table = __internal_pivot_table( @@ -506,7 +506,7 @@ def _add_margins( margin_dummy[cols] = margin_dummy[cols].apply( maybe_downcast_to_dtype, args=(dtype,) ) - result = concat([result, margin_dummy]) + result = concat([result, margin_dummy]) # nobug result.index.names = row_names return result @@ -608,7 +608,7 @@ def _all_key(key): # GH 49240 return table else: - result = concat(table_pieces, axis=cat_axis) + result = concat(table_pieces, axis=cat_axis) # nobug if len(rows) == 0: return result @@ -1185,7 +1185,7 @@ def _normalize( # Fix Margins if normalize == "columns": column_margin = column_margin / column_margin.sum() - table = concat([table, column_margin], axis=1) + table = concat([table, column_margin], axis=1) # nobug table = table.fillna(0) table.columns = table_columns @@ -1199,7 +1199,7 @@ def _normalize( column_margin = column_margin / column_margin.sum() index_margin = index_margin / index_margin.sum() index_margin.loc[margins_name] = 1 - table = concat([table, column_margin], axis=1) + table = concat([table, column_margin], axis=1) # nobug table = table._append_internal(index_margin, ignore_index=True) table = table.fillna(0) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c14389d753aac..6056efde0092a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1103,7 +1103,7 @@ def stack_reshape( buf.append(data) if len(buf) > 0 and not frame.empty: - result = concat(buf, ignore_index=True) + result = concat(buf, ignore_index=True) # nobug else: # input is empty if len(level) < frame.columns.nlevels: diff --git a/pandas/core/series.py b/pandas/core/series.py index fe71a3ab91933..15ee61de06c94 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2994,7 +2994,7 @@ def searchsorted( # type: ignore[override] def _append_internal(self, to_append: Series, ignore_index: bool = False) -> Series: from pandas.core.reshape.concat import concat - return concat([self, to_append], ignore_index=ignore_index) + return concat([self, to_append], ignore_index=ignore_index) # maybebug def compare( self, @@ -3271,7 +3271,7 @@ def combine_first(self, other) -> Series: if this.dtype.kind == "M" and other.dtype.kind != "M": # TODO: try to match resos? other = to_datetime(other) - combined = concat([this, other]) + combined = concat([this, other]) # bug combined = combined.reindex(new_index) return combined.__finalize__(self, method="combine_first") diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index ff3a17e4d2d5b..76cf5e5767a30 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -659,7 +659,7 @@ def cat( # align if required if any(not data.index.equals(x.index) for x in others): # Need to add keys for uniqueness in case of duplicate columns - others = concat( + others = concat( # bug others, axis=1, join=(join if join == "inner" else "outer"), diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 32af6e8b96584..cfbcf79134447 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -68,9 +68,9 @@ def dataframe_from_int_dict(data, frame_template) -> DataFrame: result_index = arg1.index.union(arg2.index) if len(result_index): # construct result frame - result = concat( + result = concat( # nobug [ - concat( + concat( # nobug [results[i][j] for j in range(len(arg2.columns))], ignore_index=True, ) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e6f84941f6b1a..80660b09e7851 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -770,7 +770,7 @@ def _apply_pairwise( # from flex_binary_moment to a "transform"-like result # per groupby combination old_result_len = len(result) - result = concat( + result = concat( # nobug [ result.take(gb_indices).reindex(result.index) for gb_indices in self._grouper.indices.values() From f713c4fe28b6d17f6f32f6a1c3189a682f2117fd Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 1 Nov 2025 12:47:59 -0400 Subject: [PATCH 6/8] Fix bugs --- pandas/core/apply.py | 5 ++-- pandas/core/frame.py | 6 ++--- pandas/core/generic.py | 2 +- pandas/core/groupby/groupby.py | 8 +++---- pandas/core/interchange/from_dataframe.py | 4 +++- pandas/core/reshape/melt.py | 2 +- pandas/core/series.py | 4 ++-- pandas/core/strings/accessor.py | 2 +- pandas/tests/apply/test_frame_apply.py | 28 +++++++++++++++++++++++ pandas/tests/frame/methods/test_shift.py | 13 +++++++++++ pandas/tests/reshape/merge/test_join.py | 9 ++++++++ pandas/tests/strings/test_cat.py | 13 +++++++++++ 12 files changed, 81 insertions(+), 15 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 52c401d13b930..a62dc21ff85c5 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -485,7 +485,7 @@ def wrap_results_list_like( obj = self.obj try: - return concat(results, keys=keys, axis=1, sort=False) # maybebug + return concat(results, keys=keys, axis=1, sort=False) # nobug except TypeError as err: # we are concatting non-NDFrame objects, # e.g. a list of scalars @@ -635,10 +635,11 @@ def wrap_results_dict_like( keys_to_use = ktu axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1 - result = concat( # maybebug + result = concat( # nobug results, axis=axis, keys=keys_to_use, + sort=False, ) elif any(is_ndframe): # There is a mix of NDFrames and scalars diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 32c9811de8b87..c166545d16f11 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6130,7 +6130,7 @@ def shift( .shift(periods=period, freq=freq, axis=axis, fill_value=fill_value) .add_suffix(f"{suffix}_{period}" if suffix else f"_{period}") ) - return concat(shifted_dataframes, axis=1) # bug + return concat(shifted_dataframes, axis=1, sort=False) # nobug elif suffix: raise ValueError("Cannot specify `suffix` if `periods` is an int.") periods = cast(int, periods) @@ -11168,7 +11168,7 @@ def _append_internal( from pandas.core.reshape.concat import concat - result = concat( # possible bug + result = concat( # nobug [self, row_df], ignore_index=ignore_index, ) @@ -11401,7 +11401,7 @@ def join( ) return res.reindex(self.index) else: - return concat( # bug + return concat( # nobug frames, axis=1, join=how, verify_integrity=True, sort=sort ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 34b5476002cfa..0cfde3e00d2e9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9507,7 +9507,7 @@ def compare( # error: List item 0 has incompatible type "NDFrame"; expected # "Union[Series, DataFrame]" - diff = concat( # bug + diff = concat( # nobug - self and other must have same index/coluns [self, other], # type: ignore[list-item] axis=axis, keys=result_names, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 22a8826ae35e3..965b53ebfda30 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1143,7 +1143,7 @@ def _concat_objects( group_levels = self._grouper.levels group_names = self._grouper.names - result = concat( # maybebug + result = concat( values, axis=0, keys=group_keys, @@ -1152,10 +1152,10 @@ def _concat_objects( sort=False, ) else: - result = concat(values, axis=0) # maybebug + result = concat(values, axis=0) elif not not_indexed_same: - result = concat(values, axis=0) # maybebug + result = concat(values, axis=0) ax = self._selected_obj.index if self.dropna: @@ -1178,7 +1178,7 @@ def _concat_objects( result = result.reindex(ax, axis=0) else: - result = concat(values, axis=0) # maybebug + result = concat(values, axis=0) if self.obj.ndim == 1: name = self.obj.name diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index e21f39501e42b..b3cfd08813694 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -144,7 +144,9 @@ def _from_dataframe(df: DataFrameXchg, allow_copy: bool = True) -> pd.DataFrame: elif len(pandas_dfs) == 1: pandas_df = pandas_dfs[0] else: - pandas_df = pd.concat(pandas_dfs, axis=0, ignore_index=True, copy=False) # bug + pandas_df = pd.concat( + pandas_dfs, axis=0, ignore_index=True, copy=False + ) # nobug index_obj = df.metadata.get("pandas.index", None) if index_obj is not None: diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index b8fed8ae4ef04..0a330e3551448 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -668,7 +668,7 @@ def melt_stub(df, stub: str, i, j, value_vars, sep: str): value_vars_flattened.extend(value_var) _melted.append(melt_stub(df, stub, i, j, value_var, sep)) - melted = concat(_melted, axis=1) # maybebug + melted = concat(_melted, axis=1) # nobug id_vars = df.columns.difference(value_vars_flattened) new = df[id_vars] diff --git a/pandas/core/series.py b/pandas/core/series.py index 15ee61de06c94..b19aaa93655bf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2994,7 +2994,7 @@ def searchsorted( # type: ignore[override] def _append_internal(self, to_append: Series, ignore_index: bool = False) -> Series: from pandas.core.reshape.concat import concat - return concat([self, to_append], ignore_index=ignore_index) # maybebug + return concat([self, to_append], ignore_index=ignore_index) # nobug def compare( self, @@ -3271,7 +3271,7 @@ def combine_first(self, other) -> Series: if this.dtype.kind == "M" and other.dtype.kind != "M": # TODO: try to match resos? other = to_datetime(other) - combined = concat([this, other]) # bug + combined = concat([this, other]) # nobug combined = combined.reindex(new_index) return combined.__finalize__(self, method="combine_first") diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 76cf5e5767a30..c81354dc7bb40 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -659,7 +659,7 @@ def cat( # align if required if any(not data.index.equals(x.index) for x in others): # Need to add keys for uniqueness in case of duplicate columns - others = concat( # bug + others = concat( # nobug others, axis=1, join=(join if join == "inner" else "outer"), diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index a9afb5dbd11d7..0fd962cc6834a 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -915,6 +915,34 @@ def test_listlike_lambda(ops, by_row, expected): tm.assert_equal(result, expected) +def test_listlike_datetime_index_unsorted(): + values = [datetime(2024, 1, 1), datetime(2024, 1, 2)] + df = DataFrame({"a": [1, 2]}, index=[values[1], values[0]]) + result = df.apply([lambda x: x], by_row=False) + expected = DataFrame( + [[1], [2]], + index=[values[1], values[0]], + columns=MultiIndex([["a"], [""]], codes=[[0], [0]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_dictlike_datetime_index_unsorted(): + values = [datetime(2024, 1, 1), datetime(2024, 1, 2), datetime(2024, 1, 3)] + df = DataFrame({"a": [1, 2], "b": [3, 4]}, index=[values[1], values[0]]) + result = df.apply( + {"a": lambda x: x, "b": lambda x: x.shift(freq="D")}, by_row=False + ) + expected = DataFrame( + { + "a": [1.0, 2.0, np.nan], + "b": [4.0, np.nan, 3.0], + }, + index=[values[1], values[0], values[2]], + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "ops", [ diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 37a5b0dec9f82..a5e7b70cb86b8 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -794,3 +794,16 @@ def test_shift_invalid_fill_value_deprecation(self): df["a"].shift(1, fill_value=NaT) with tm.assert_produces_warning(Pandas4Warning, match=msg): df["b"].shift(1, fill_value=NaT) + + def test_shift_dt_index_multiple_periods_unsorted(self): + values = date_range("1/1/2000", periods=4, freq="D") + df = DataFrame({"a": [1, 2]}, index=[values[1], values[0]]) + result = df.shift(periods=[1, 2], freq="D") + expected = DataFrame( + { + "a_1": [1.0, 2.0, np.nan], + "a_2": [2.0, np.nan, 1.0], + }, + index=[values[2], values[1], values[3]], + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 65bfea0b9beea..48383af288f4c 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -1,3 +1,4 @@ +from datetime import datetime import re import numpy as np @@ -671,6 +672,14 @@ def _check_diff_index(df_list, result, exp_index): with pytest.raises(ValueError, match=msg): df_list[0].join(df_list[1:], on="a") + def test_join_many_datetime_unsorted(self): + index = Index([datetime(2024, 1, 2), datetime(2024, 1, 1)]) + df = DataFrame({"a": [1, 2]}, index=index) + df2 = DataFrame({"b": [1, 2]}, index=index) + result = df.join([df2], how="outer") + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=index) + tm.assert_frame_equal(result, expected) + def test_join_many_mixed(self): df = DataFrame( np.random.default_rng(2).standard_normal((8, 4)), diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index 68ca807bde145..3e33546ce8d00 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -1,3 +1,4 @@ +from datetime import datetime import re import numpy as np @@ -343,6 +344,18 @@ def test_str_cat_align_mixed_inputs(join_type): s.str.cat([t, z], join=join_type) +def test_str_cat_datetime_index_unsorted(join_type): + values = [datetime(2024, 1, 1), datetime(2024, 1, 2)] + s = Series(["a", "b"], index=[values[0], values[1]]) + others = Series(["c", "d"], index=[values[1], values[0]]) + result = s.str.cat(others, join=join_type) + if join_type == "right": + expected = Series(["bc", "ad"], index=[values[1], values[0]]) + else: + expected = Series(["ad", "bc"], index=[values[0], values[1]]) + tm.assert_series_equal(result, expected) + + def test_str_cat_all_na(index_or_series, index_or_series2): # GH 24044 box = index_or_series From 2c056262871916702ad258600088e5abe4a565dc Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 1 Nov 2025 13:09:27 -0400 Subject: [PATCH 7/8] Cleanup --- doc/source/whatsnew/v3.0.0.rst | 15 +++++++++---- pandas/core/apply.py | 6 +++--- pandas/core/arrays/arrow/accessors.py | 2 +- pandas/core/arrays/categorical.py | 2 +- pandas/core/frame.py | 12 +++++------ pandas/core/generic.py | 4 ++-- pandas/core/groupby/generic.py | 12 +++++------ pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/range.py | 4 ++-- pandas/core/interchange/from_dataframe.py | 4 +--- pandas/core/methods/describe.py | 2 +- pandas/core/methods/selectn.py | 2 +- pandas/core/resample.py | 2 +- pandas/core/reshape/encoding.py | 6 +++--- pandas/core/reshape/melt.py | 8 +++---- pandas/core/reshape/merge.py | 4 ++-- pandas/core/reshape/pivot.py | 10 ++++----- pandas/core/reshape/reshape.py | 2 +- pandas/core/series.py | 4 ++-- pandas/core/strings/accessor.py | 2 +- pandas/core/window/common.py | 4 ++-- pandas/core/window/rolling.py | 2 +- pandas/tests/apply/test_frame_apply.py | 2 ++ pandas/tests/frame/methods/test_shift.py | 1 + pandas/tests/reshape/merge/test_join.py | 26 ++++++++++++++++++++--- pandas/tests/strings/test_cat.py | 1 + 27 files changed, 85 insertions(+), 60 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 85f7725d6309e..c9907539d89e1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -390,9 +390,9 @@ and users can skip the check by explicitly specifying ``sort=True`` or ``sort=False``. This deprecation can also impact pandas' internal usage of :func:`concat`. -While we have investigated uses of :func:`concat` to determine if this could lead -to a change in behavior of other functions and methods in the API, it is -possible some have been missed. In order to be cautious here, pandas has *not* +Here cases where :func:`concat` was sorting a :class:`DatetimeIndex` but not +other indexes are considered bugs and have been fixed as noted below. However +it is possible some have been missed. In order to be cautious here, pandas has *not* added ``sort=False`` to any internal calls where we believe behavior should not change. If we have missed something, users will not experience a behavior change but they will receive a warning about :func:`concat` even though they are not directly @@ -429,6 +429,14 @@ we may address any potential behavior changes. pd.concat([df1, df2], axis=1, sort=False) +Cases where pandas' internal usage of :func:`concat` resulted in inconsistent sorting +that are now fixed in this release are as follows. + +- :meth:`Series.apply` and :meth:`DataFrame.apply` with a list-like or dict-like ``func`` argument. +- :meth:`Series.shift`, :meth:`DataFrame.shift`, :meth:`.SeriesGroupBy.shift`, :meth:`.DataFrameGroupBy.shift` with the ``periods`` argument a list of length greater than 1. +- :meth:`DataFrame.join` with ``other`` a list of one or more Series or DataFrames and ``how="inner"``, ``how="left"``, or ``how="right"``. +- :meth:`Series.str.cat` with ``others`` a Series or DataFrame. + .. _whatsnew_300.api_breaking.value_counts_sorting: Changed behavior in :meth:`DataFrame.value_counts` and :meth:`DataFrameGroupBy.value_counts` when ``sort=False`` @@ -1233,7 +1241,6 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) -- Bug in :meth:`DataFrameGroupBy.shift` where the resulting index would be sorted if the input is a :class:`DatetimeIndex` and multiple periods are specified (:issue:`62843`) - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index a62dc21ff85c5..7503995c71e43 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -382,7 +382,7 @@ def transform_dict_like(self, func) -> DataFrame: for name, how in func.items(): colg = obj._gotitem(name, ndim=1) results[name] = colg.transform(how, 0, *args, **kwargs) - return concat(results, axis=1) # nobug + return concat(results, axis=1) def transform_str_or_callable(self, func) -> DataFrame | Series: """ @@ -485,7 +485,7 @@ def wrap_results_list_like( obj = self.obj try: - return concat(results, keys=keys, axis=1, sort=False) # nobug + return concat(results, keys=keys, axis=1, sort=False) except TypeError as err: # we are concatting non-NDFrame objects, # e.g. a list of scalars @@ -635,7 +635,7 @@ def wrap_results_dict_like( keys_to_use = ktu axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1 - result = concat( # nobug + result = concat( results, axis=axis, keys=keys_to_use, diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py index aea2bb2de2189..7f3da9be0c03d 100644 --- a/pandas/core/arrays/arrow/accessors.py +++ b/pandas/core/arrays/arrow/accessors.py @@ -496,6 +496,6 @@ def explode(self) -> DataFrame: from pandas import concat pa_type = self._pa_array.type - return concat( # nobug + return concat( [self.field(i) for i in range(pa_type.num_fields)], axis="columns" ) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index dcd87bb075195..41e5c6f65dbb9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2690,7 +2690,7 @@ def describe(self) -> DataFrame: from pandas import Index from pandas.core.reshape.concat import concat - result = concat([counts, freqs], ignore_index=True, axis=1) # nobug + result = concat([counts, freqs], ignore_index=True, axis=1) result.columns = Index(["counts", "freqs"]) result.index.name = "categories" diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b2b44294ae3a1..4cd2ea1385015 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6130,7 +6130,7 @@ def shift( .shift(periods=period, freq=freq, axis=axis, fill_value=fill_value) .add_suffix(f"{suffix}_{period}" if suffix else f"_{period}") ) - return concat(shifted_dataframes, axis=1, sort=False) # nobug + return concat(shifted_dataframes, axis=1, sort=False) elif suffix: raise ValueError("Cannot specify `suffix` if `periods` is an int.") periods = cast(int, periods) @@ -11166,7 +11166,7 @@ def _append_internal( from pandas.core.reshape.concat import concat - result = concat( # nobug + result = concat( [self, row_df], ignore_index=ignore_index, ) @@ -11394,12 +11394,12 @@ def join( # join indexes only using concat if can_concat: if how == "left": - res = concat( # nobug + res = concat( frames, axis=1, join="outer", verify_integrity=True, sort=sort ) return res.reindex(self.index) else: - return concat( # nobug + return concat( frames, axis=1, join=how, verify_integrity=True, sort=sort ) @@ -11590,7 +11590,7 @@ def _series_round(ser: Series, decimals: int) -> Series: return self._constructor( concat(new_cols, axis=1), index=self.index, - columns=self.columns, # nobug + columns=self.columns, ).__finalize__(self, method="round") else: return self.copy(deep=False) @@ -14173,7 +14173,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: from pandas.core.reshape.concat import concat values = collections.defaultdict(list, values) - result = concat( # nobug + result = concat( ( self.iloc[:, [i]].isin(values[col]) for i, col in enumerate(self.columns) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0cfde3e00d2e9..b542ca1f431c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6525,7 +6525,7 @@ def astype( return self.copy(deep=False) # GH 19920: retain column metadata after concat - result = concat(results, axis=1) # nobug + result = concat(results, axis=1) # GH#40810 retain subclass # error: Incompatible types in assignment # (expression has type "Self", variable has type "DataFrame") @@ -9507,7 +9507,7 @@ def compare( # error: List item 0 has incompatible type "NDFrame"; expected # "Union[Series, DataFrame]" - diff = concat( # nobug - self and other must have same index/coluns + diff = concat( [self, other], # type: ignore[list-item] axis=axis, keys=result_names, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 20be980ad1a12..d279594617235 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -549,7 +549,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame: if any(isinstance(x, DataFrame) for x in results.values()): from pandas import concat - res_df = concat( # nobug + res_df = concat( results.values(), axis=1, keys=[key.label for key in results] ) return res_df @@ -722,7 +722,7 @@ def _transform_general( if results: from pandas.core.reshape.concat import concat - concatenated = concat(results, ignore_index=True) # nobug + concatenated = concat(results, ignore_index=True) result = self._set_result_index_ordered(concatenated) else: result = self.obj._constructor(dtype=np.float64) @@ -2238,7 +2238,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): applied.append(res) concat_index = obj.columns - concatenated = concat( # nobug + concatenated = concat( applied, axis=0, verify_integrity=False, ignore_index=True ) concatenated = concatenated.reindex(concat_index, axis=1) @@ -2530,7 +2530,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame: # concat would raise res_df = DataFrame([], columns=columns, index=self._grouper.result_index) else: - res_df = concat(results, keys=columns, axis=1) # nobug + res_df = concat(results, keys=columns, axis=1) if not self.as_index: res_df.index = default_index(len(res_df)) @@ -3390,9 +3390,7 @@ def _wrap_transform_general_frame( # other dimension; this will preserve dtypes # GH14457 if res.index.is_(obj.index): - res_frame = concat( - [res] * len(group.columns), axis=1, ignore_index=True - ) # nobug + res_frame = concat([res] * len(group.columns), axis=1, ignore_index=True) res_frame.columns = group.columns res_frame.index = group.index else: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 965b53ebfda30..ca0c43aed4864 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -5238,7 +5238,7 @@ def shift( return ( shifted_dataframes[0] if len(shifted_dataframes) == 1 - else concat(shifted_dataframes, axis=1, sort=False) # nobug + else concat(shifted_dataframes, axis=1, sort=False) ) @final diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7119dd58dd19d..72f7a1e086b60 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5379,9 +5379,9 @@ def append(self, other: Index | Sequence[Index]) -> Index: names = {obj.name for obj in to_concat} name = None if len(names) > 1 else self.name - return self._concat(to_concat, name) # nobug + return self._concat(to_concat, name) - def _concat(self, to_concat: list[Index], name: Hashable) -> Index: # nobug + def _concat(self, to_concat: list[Index], name: Hashable) -> Index: """ Concatenate multiple Index objects. """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 34b8ba0a01246..7a025cdd5fb68 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1181,7 +1181,7 @@ def insert(self, loc: int, item) -> Index: return super().insert(loc, item) - def _concat(self, indexes: list[Index], name: Hashable) -> Index: # nobug + def _concat(self, indexes: list[Index], name: Hashable) -> Index: """ Overriding parent method for the case of all RangeIndex instances. @@ -1191,7 +1191,7 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: # nobug indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64') """ if not all(isinstance(x, RangeIndex) for x in indexes): - result = super()._concat(indexes, name) # nobug + result = super()._concat(indexes, name) if result.dtype.kind == "i": return self._shallow_copy(result._values) return result diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index b3cfd08813694..bcbeb546f845c 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -144,9 +144,7 @@ def _from_dataframe(df: DataFrameXchg, allow_copy: bool = True) -> pd.DataFrame: elif len(pandas_dfs) == 1: pandas_df = pandas_dfs[0] else: - pandas_df = pd.concat( - pandas_dfs, axis=0, ignore_index=True, copy=False - ) # nobug + pandas_df = pd.concat(pandas_dfs, axis=0, ignore_index=True, copy=False) index_obj = df.metadata.get("pandas.index", None) if index_obj is not None: diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 3eb9675c42904..d92540af168c0 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -173,7 +173,7 @@ def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame: col_names = reorder_columns(ldesc) d = concat( - [x.reindex(col_names) for x in ldesc], # nobug + [x.reindex(col_names) for x in ldesc], axis=1, ignore_index=True, sort=False, diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py index e7bb48cca70c5..59516b16905dc 100644 --- a/pandas/core/methods/selectn.py +++ b/pandas/core/methods/selectn.py @@ -186,7 +186,7 @@ def compute(self, method: str) -> Series: # reverse indices inds = narr - 1 - inds - result = concat([dropped.iloc[inds], nan_index]).iloc[:findex] # nobug + result = concat([dropped.iloc[inds], nan_index]).iloc[:findex] result.index = original_index.take(result.index) return result diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 328e361951d3c..2694b581a6707 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1025,7 +1025,7 @@ def interpolate( missing_data_points_index = obj.index.difference(final_index) if len(missing_data_points_index) > 0: - result = concat( # nobug + result = concat( [result, obj.loc[missing_data_points_index]] ).sort_index() diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 0210118664fb3..5c5fed272b925 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -223,7 +223,7 @@ def check_len(item, name: str) -> None: dtype=dtype, ) with_dummies.append(dummy) - result = concat(with_dummies, axis=1) # nobug + result = concat(with_dummies, axis=1) else: result = _get_dummies_1d( data, @@ -342,7 +342,7 @@ def get_empty_frame(data) -> DataFrame: ) sparse_series.append(Series(data=sarr, index=index, name=col, copy=False)) - return concat(sparse_series, axis=1) # nobug + return concat(sparse_series, axis=1) else: # ensure ndarray layout is column-major @@ -568,7 +568,7 @@ def from_dummies( "Dummy DataFrame contains unassigned value(s); " f"First instance in row: {assigned.idxmin()}" ) - data_slice = concat( # nobug + data_slice = concat( (data_to_decode.loc[:, prefix_slice], assigned == 0), axis=1 ) else: diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 0a330e3551448..5d4b15c9a0ca3 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -249,9 +249,7 @@ def melt( if not isinstance(id_data.dtype, np.dtype): # i.e. ExtensionDtype if num_cols_adjusted > 0: - mdata[col] = concat( - [id_data] * num_cols_adjusted, ignore_index=True - ) # nobug + mdata[col] = concat([id_data] * num_cols_adjusted, ignore_index=True) else: # We can't concat empty list. (GH 46044) mdata[col] = type(id_data)([], name=id_data.name, dtype=id_data.dtype) @@ -263,7 +261,7 @@ def melt( if frame.shape[1] > 0 and not any( not isinstance(dt, np.dtype) and dt._supports_2d for dt in frame.dtypes ): - mdata[value_name] = concat( # nobug + mdata[value_name] = concat( [frame.iloc[:, i] for i in range(frame.shape[1])], ignore_index=True ).values else: @@ -668,7 +666,7 @@ def melt_stub(df, stub: str, i, j, value_vars, sep: str): value_vars_flattened.extend(value_var) _melted.append(melt_stub(df, stub, i, j, value_var, sep)) - melted = concat(_melted, axis=1) # nobug + melted = concat(_melted, axis=1) id_vars = df.columns.difference(value_vars_flattened) new = df[id_vars] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0917c2eb94a3c..571f708ccf108 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -507,7 +507,7 @@ def _groupby_and_merge( # if we have a missing piece this can be reset from pandas.core.reshape.concat import concat - result = concat(pieces, ignore_index=True) # nobug + result = concat(pieces, ignore_index=True) result = result.reindex(columns=pieces[0].columns) return result, lby @@ -1131,7 +1131,7 @@ def _reindex_and_concat( left.columns = llabels right.columns = rlabels - result = concat([left, right], axis=1) # nobug + result = concat([left, right], axis=1) return result def get_result(self) -> DataFrame: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index fbf83806fa1d8..04c584c226aed 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -263,7 +263,7 @@ def pivot_table( pieces.append(_table) keys.append(getattr(func, "__name__", func)) - table = concat(pieces, keys=keys, axis=1) # nobug + table = concat(pieces, keys=keys, axis=1) return table.__finalize__(data, method="pivot_table") table = __internal_pivot_table( @@ -506,7 +506,7 @@ def _add_margins( margin_dummy[cols] = margin_dummy[cols].apply( maybe_downcast_to_dtype, args=(dtype,) ) - result = concat([result, margin_dummy]) # nobug + result = concat([result, margin_dummy]) result.index.names = row_names return result @@ -608,7 +608,7 @@ def _all_key(key): # GH 49240 return table else: - result = concat(table_pieces, axis=cat_axis) # nobug + result = concat(table_pieces, axis=cat_axis) if len(rows) == 0: return result @@ -1185,7 +1185,7 @@ def _normalize( # Fix Margins if normalize == "columns": column_margin = column_margin / column_margin.sum() - table = concat([table, column_margin], axis=1) # nobug + table = concat([table, column_margin], axis=1) table = table.fillna(0) table.columns = table_columns @@ -1199,7 +1199,7 @@ def _normalize( column_margin = column_margin / column_margin.sum() index_margin = index_margin / index_margin.sum() index_margin.loc[margins_name] = 1 - table = concat([table, column_margin], axis=1) # nobug + table = concat([table, column_margin], axis=1) table = table._append_internal(index_margin, ignore_index=True) table = table.fillna(0) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 6056efde0092a..c14389d753aac 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1103,7 +1103,7 @@ def stack_reshape( buf.append(data) if len(buf) > 0 and not frame.empty: - result = concat(buf, ignore_index=True) # nobug + result = concat(buf, ignore_index=True) else: # input is empty if len(level) < frame.columns.nlevels: diff --git a/pandas/core/series.py b/pandas/core/series.py index f793b64bfddd2..1a8645cf1815d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2993,7 +2993,7 @@ def searchsorted( # type: ignore[override] def _append_internal(self, to_append: Series, ignore_index: bool = False) -> Series: from pandas.core.reshape.concat import concat - return concat([self, to_append], ignore_index=ignore_index) # nobug + return concat([self, to_append], ignore_index=ignore_index) def compare( self, @@ -3277,7 +3277,7 @@ def combine_first(self, other) -> Series: stacklevel=find_stack_level(), ) - combined = concat([this, other]) # nobug + combined = concat([this, other]) combined = combined.reindex(new_index) return combined.__finalize__(self, method="combine_first") diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index c81354dc7bb40..ff3a17e4d2d5b 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -659,7 +659,7 @@ def cat( # align if required if any(not data.index.equals(x.index) for x in others): # Need to add keys for uniqueness in case of duplicate columns - others = concat( # nobug + others = concat( others, axis=1, join=(join if join == "inner" else "outer"), diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index cfbcf79134447..32af6e8b96584 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -68,9 +68,9 @@ def dataframe_from_int_dict(data, frame_template) -> DataFrame: result_index = arg1.index.union(arg2.index) if len(result_index): # construct result frame - result = concat( # nobug + result = concat( [ - concat( # nobug + concat( [results[i][j] for j in range(len(arg2.columns))], ignore_index=True, ) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 80660b09e7851..e6f84941f6b1a 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -770,7 +770,7 @@ def _apply_pairwise( # from flex_binary_moment to a "transform"-like result # per groupby combination old_result_len = len(result) - result = concat( # nobug + result = concat( [ result.take(gb_indices).reindex(result.index) for gb_indices in self._grouper.indices.values() diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 0fd962cc6834a..c586e4cee71eb 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -916,6 +916,7 @@ def test_listlike_lambda(ops, by_row, expected): def test_listlike_datetime_index_unsorted(): + # https://github.com/pandas-dev/pandas/pull/62843 values = [datetime(2024, 1, 1), datetime(2024, 1, 2)] df = DataFrame({"a": [1, 2]}, index=[values[1], values[0]]) result = df.apply([lambda x: x], by_row=False) @@ -928,6 +929,7 @@ def test_listlike_datetime_index_unsorted(): def test_dictlike_datetime_index_unsorted(): + # https://github.com/pandas-dev/pandas/pull/62843 values = [datetime(2024, 1, 1), datetime(2024, 1, 2), datetime(2024, 1, 3)] df = DataFrame({"a": [1, 2], "b": [3, 4]}, index=[values[1], values[0]]) result = df.apply( diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index a5e7b70cb86b8..9ad98aa6b3090 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -796,6 +796,7 @@ def test_shift_invalid_fill_value_deprecation(self): df["b"].shift(1, fill_value=NaT) def test_shift_dt_index_multiple_periods_unsorted(self): + # https://github.com/pandas-dev/pandas/pull/62843 values = date_range("1/1/2000", periods=4, freq="D") df = DataFrame({"a": [1, 2]}, index=[values[1], values[0]]) result = df.shift(periods=[1, 2], freq="D") diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 48383af288f4c..fb1f8ed0f7633 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -672,12 +672,32 @@ def _check_diff_index(df_list, result, exp_index): with pytest.raises(ValueError, match=msg): df_list[0].join(df_list[1:], on="a") - def test_join_many_datetime_unsorted(self): + @pytest.mark.parametrize( + "how", + [ + "inner", + pytest.param( + "outer", + marks=pytest.mark.xfail(reason="sort=False not handled correctly"), + ), + "left", + pytest.param( + "right", + marks=pytest.mark.xfail(reason="sort=False not handled correctly"), + ), + ], + ) + def test_join_many_datetime_unsorted(self, how): + # https://github.com/pandas-dev/pandas/pull/62843 index = Index([datetime(2024, 1, 2), datetime(2024, 1, 1)]) df = DataFrame({"a": [1, 2]}, index=index) df2 = DataFrame({"b": [1, 2]}, index=index) - result = df.join([df2], how="outer") - expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=index) + result = df.join([df2], how=how) + if how == "outer": + # Outer always sorts the index. + expected = DataFrame({"a": [2, 1], "b": [2, 1]}, index=[index[1], index[0]]) + else: + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=index) tm.assert_frame_equal(result, expected) def test_join_many_mixed(self): diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index 3e33546ce8d00..48b255ac7bf69 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -345,6 +345,7 @@ def test_str_cat_align_mixed_inputs(join_type): def test_str_cat_datetime_index_unsorted(join_type): + # https://github.com/pandas-dev/pandas/pull/62843 values = [datetime(2024, 1, 1), datetime(2024, 1, 2)] s = Series(["a", "b"], index=[values[0], values[1]]) others = Series(["c", "d"], index=[values[1], values[0]]) From d93ac9095976637f140fa4e27c083a3cfa27e408 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 1 Nov 2025 13:11:43 -0400 Subject: [PATCH 8/8] Cleanup --- pandas/core/frame.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4cd2ea1385015..7f34cf98f4bc3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11588,9 +11588,7 @@ def _series_round(ser: Series, decimals: int) -> Series: if new_cols is not None and len(new_cols) > 0: return self._constructor( - concat(new_cols, axis=1), - index=self.index, - columns=self.columns, + concat(new_cols, axis=1), index=self.index, columns=self.columns ).__finalize__(self, method="round") else: return self.copy(deep=False)