Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,45 @@ In cases with mixed-resolution inputs, the highest resolution is used:
In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype
Out[2]: dtype('<M8[ns]')

.. _whatsnew_300.api_breaking.concat_datetime_sorting:

:func:`concat` no longer ignores ``sort`` when all objects have a :class:`DatetimeIndex`
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

When all objects passed to :func:`concat` have a :class:`DatetimeIndex`, the ``sort``
argument is no longer ignored. Previously, the result would always be sorted along
the non-concatenation axis even when ``sort=False`` (the default).

.. ipython:: python

idx1 = pd.date_range("2025-01-02", periods=3, freq="h")
df1 = pd.DataFrame({"a": [1, 2, 3]}, index=idx1)
df1

idx2 = pd.date_range("2025-01-01", periods=3, freq="h")
df2 = pd.DataFrame({"b": [1, 2, 3]}, index=idx2)
df2

*Old behavior*

.. code-block:: ipython

In [3]: pd.concat([df1, df2], axis=1, sort=False)
Out[3]:
a b
2025-01-01 00:00:00 NaN 1.0
2025-01-01 01:00:00 NaN 2.0
2025-01-01 02:00:00 NaN 3.0
2025-01-02 00:00:00 1.0 NaN
2025-01-02 01:00:00 2.0 NaN
2025-01-02 02:00:00 3.0 NaN

*New behavior*

.. ipython:: python

pd.concat([df1, df2], axis=1, sort=False)

.. _whatsnew_300.api_breaking.value_counts_sorting:

Changed behavior in :meth:`DataFrame.value_counts` and :meth:`DataFrameGroupBy.value_counts` when ``sort=False``
Expand Down
1 change: 1 addition & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11114,6 +11114,7 @@ def _append_internal(
result = concat(
[self, row_df],
ignore_index=ignore_index,
sort=False,
)
return result.__finalize__(self, method="append")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6521,7 +6521,7 @@ def astype(
return self.copy(deep=False)

# GH 19920: retain column metadata after concat
result = concat(results, axis=1)
result = concat(results, axis=1, sort=False)
# GH#40810 retain subclass
# error: Incompatible types in assignment
# (expression has type "Self", variable has type "DataFrame")
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5236,7 +5236,7 @@ def shift(
return (
shifted_dataframes[0]
if len(shifted_dataframes) == 1
else concat(shifted_dataframes, axis=1)
else concat(shifted_dataframes, axis=1, sort=False)
)

@final
Expand Down
24 changes: 14 additions & 10 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_objs_combined_axis(
objs,
intersect: bool = False,
axis: Axis = 0,
sort: bool = True,
sort: bool | lib.NoDefault = True,
) -> Index:
"""
Extract combined index: return intersection or union (depending on the
Expand All @@ -81,7 +81,8 @@ def get_objs_combined_axis(
axis : {0 or 'index', 1 or 'outer'}, default 0
The axis to extract indexes from.
sort : bool, default True
Whether the result index should come out sorted or not.
Whether the result index should come out sorted or not. NoDefault
use for deprecation in GH#57335.

Returns
-------
Expand All @@ -108,7 +109,7 @@ def _get_distinct_objs(objs: list[Index]) -> list[Index]:
def _get_combined_index(
indexes: list[Index],
intersect: bool = False,
sort: bool = False,
sort: bool | lib.NoDefault = False,
) -> Index:
"""
Return the union or intersection of indexes.
Expand All @@ -121,7 +122,8 @@ def _get_combined_index(
If True, calculate the intersection between indexes. Otherwise,
calculate the union.
sort : bool, default False
Whether the result index should come out sorted or not.
Whether the result index should come out sorted or not. NoDefault
used for deprecation of GH#57335

Returns
-------
Expand All @@ -138,10 +140,10 @@ def _get_combined_index(
for other in indexes[1:]:
index = index.intersection(other)
else:
index = union_indexes(indexes, sort=False)
index = union_indexes(indexes, sort=sort if sort is lib.no_default else False)
index = ensure_index(index)

if sort:
if sort and sort is not lib.no_default:
index = safe_sort_index(index)
return index

Expand Down Expand Up @@ -180,7 +182,7 @@ def safe_sort_index(index: Index) -> Index:
return index


def union_indexes(indexes, sort: bool | None = True) -> Index:
def union_indexes(indexes, sort: bool | None | lib.NoDefault = True) -> Index:
"""
Return the union of indexes.

Expand All @@ -190,7 +192,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
----------
indexes : list of Index or list objects
sort : bool, default True
Whether the result index should come out sorted or not.
Whether the result index should come out sorted or not. NoDefault
used for deprecation of GH#57335.

Returns
-------
Expand All @@ -201,7 +204,7 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
if len(indexes) == 1:
result = indexes[0]
if isinstance(result, list):
if not sort:
if not sort or sort is lib.no_default:
result = Index(result)
else:
result = Index(sorted(result))
Expand All @@ -227,7 +230,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

if num_dtis == len(indexes):
sort = True
if sort is lib.no_default:
sort = True
result = indexes[0]

elif num_dtis > 1:
Expand Down
44 changes: 37 additions & 7 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
get_objs_combined_axis,
get_unanimous_names,
)
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.internals import concatenate_managers

if TYPE_CHECKING:
Expand Down Expand Up @@ -162,7 +163,7 @@ def concat(
levels=None,
names: list[HashableT] | None = None,
verify_integrity: bool = False,
sort: bool = False,
sort: bool | lib.NoDefault = lib.no_default,
copy: bool | lib.NoDefault = lib.no_default,
) -> DataFrame | Series:
"""
Expand Down Expand Up @@ -405,14 +406,43 @@ def concat(
"Only can inner (intersect) or outer (union) join the other axis"
)

if not is_bool(sort):
raise ValueError(
f"The 'sort' keyword only accepts boolean values; {sort} was passed."
)
sort = bool(sort)

objs, keys, ndims = _clean_keys_and_objs(objs, keys)

if sort is lib.no_default:
if axis == 0:
non_concat_axis = [
obj.columns if isinstance(obj, ABCDataFrame) else Index([obj.name])
for obj in objs
]
else:
non_concat_axis = [obj.index for obj in objs]

if all(isinstance(index, DatetimeIndex) for index in non_concat_axis):
warn = any(
id(prev) != id(curr)
for prev, curr in zip(non_concat_axis, non_concat_axis[1:])
) and any(
prev[-1] > curr[0]
for prev, curr in zip(non_concat_axis, non_concat_axis[1:])
if not prev.empty and not curr.empty
)
if warn:
msg = (
"Sorting by default when concatenating all DatetimeIndex is "
"deprecated. In the future, pandas will respect the default "
"of `sort=False`. Specify `sort=True` or `sort=False` to "
"silence this message."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
else:
sort = False
else:
if not is_bool(sort):
raise ValueError(
f"The 'sort' keyword only accepts boolean values; {sort} was passed."
)
sort = bool(sort)

# select an object to be our result reference
sample, objs = _get_sample_object(objs, ndims, keys, names, levels, intersect)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def pivot_table(
pieces.append(_table)
keys.append(getattr(func, "__name__", func))

table = concat(pieces, keys=keys, axis=1)
table = concat(pieces, keys=keys, axis=1, sort=False)
return table.__finalize__(data, method="pivot_table")

table = __internal_pivot_table(
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1109,7 +1109,9 @@ def func(_start, _stop, _where):
]

# concat and return
return concat(objs, axis=axis, verify_integrity=False)._consolidate()
return concat(
objs, axis=axis, verify_integrity=False, sort=False
)._consolidate()

# create the iterator
it = TableIterator(
Expand Down Expand Up @@ -4860,7 +4862,7 @@ def read(
if len(frames) == 1:
df = frames[0]
else:
df = concat(frames, axis=1)
df = concat(frames, axis=1, sort=False)

selection = Selection(self, where=where, start=start, stop=stop)
# apply the selection filters & axis orderings
Expand Down
13 changes: 10 additions & 3 deletions pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from pandas._libs.tslibs import Timestamp
from pandas.compat import PY312
from pandas.errors import Pandas4Warning

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -887,7 +888,9 @@ def test_append_to_multiple(setup_path):
)
df2 = df1.copy().rename(columns="{}_2".format)
df2["foo"] = "bar"
df = concat([df1, df2], axis=1)
msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
df = concat([df1, df2], axis=1)

with ensure_clean_store(setup_path) as store:
# exceptions
Expand Down Expand Up @@ -928,7 +931,9 @@ def test_append_to_multiple_dropna(setup_path):
index=date_range("2000-01-01", periods=10, freq="B"),
).rename(columns="{}_2".format)
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
df = concat([df1, df2], axis=1)
msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
df = concat([df1, df2], axis=1)

with ensure_clean_store(setup_path) as store:
# dropna=True should guarantee rows are synchronized
Expand All @@ -949,7 +954,9 @@ def test_append_to_multiple_dropna_false(setup_path):
)
df2 = df1.copy().rename(columns="{}_2".format)
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
df = concat([df1, df2], axis=1)
msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
df = concat([df1, df2], axis=1)

with (
ensure_clean_store(setup_path) as store,
Expand Down
13 changes: 10 additions & 3 deletions pandas/tests/io/pytables/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pandas._libs.tslibs import Timestamp
from pandas.compat import PY312
from pandas.errors import Pandas4Warning

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -411,7 +412,9 @@ def test_select_iterator(tmp_path, setup_path):
df2["foo"] = "bar"
store.append("df2", df2)

df = concat([df1, df2], axis=1)
msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
df = concat([df1, df2], axis=1)

# full selection
expected = store.select_as_multiple(["df1", "df2"], selector="df1")
Expand Down Expand Up @@ -901,7 +904,9 @@ def test_select_as_multiple(setup_path):
result = store.select_as_multiple(
["df1", "df2"], where=["A>0", "B>0"], selector="df1"
)
expected = concat([df1, df2], axis=1)
msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
expected = concat([df1, df2], axis=1)
expected = expected[(expected.A > 0) & (expected.B > 0)]
tm.assert_frame_equal(result, expected, check_freq=False)
# FIXME: 2021-01-20 this is failing with freq None vs 4B on some builds
Expand All @@ -910,7 +915,9 @@ def test_select_as_multiple(setup_path):
result = store.select_as_multiple(
["df1", "df2"], where="index>df2.index[4]", selector="df2"
)
expected = concat([df1, df2], axis=1)
msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
expected = concat([df1, df2], axis=1)
expected = expected[5:]
tm.assert_frame_equal(result, expected)

Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

from pandas.compat import PY312
from pandas.errors import Pandas4Warning

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -732,9 +733,13 @@ def test_coordinates(setup_path):
c = store.select_as_coordinates("df1", ["A>0", "B>0"])
df1_result = store.select("df1", c)
df2_result = store.select("df2", c)
result = concat([df1_result, df2_result], axis=1)
msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = concat([df1_result, df2_result], axis=1)

expected = concat([df1, df2], axis=1)
msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
expected = concat([df1, df2], axis=1)
expected = expected[(expected.A > 0) & (expected.B > 0)]
tm.assert_frame_equal(result, expected, check_freq=False)
# FIXME: 2021-01-18 on some (mostly windows) builds we get freq=None
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,11 +602,13 @@ def test_resample_ohlc_dataframe(unit):
df.index = df.index.as_unit(unit)
df.columns.name = "Cols"
res = df.resample("h").ohlc()
exp = pd.concat(
[df["VOLUME"].resample("h").ohlc(), df["PRICE"].resample("h").ohlc()],
axis=1,
keys=df.columns,
)
msg = "Sorting by default when concatenating all DatetimeIndex is deprecated"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
exp = pd.concat(
[df["VOLUME"].resample("h").ohlc(), df["PRICE"].resample("h").ohlc()],
axis=1,
keys=df.columns,
)
assert exp.columns.names[0] == "Cols"
tm.assert_frame_equal(exp, res)

Expand Down
Loading
Loading