update fix ignored sort in api.py and add test (#43833)

pandas-dev · Oct 10, 2021 · 01b8d2a · 01b8d2a
1 parent b49ff3e
commit 01b8d2a
Show file tree

Hide file tree

Showing 8 changed files with 37 additions and 23 deletions.
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -513,6 +513,7 @@ Reshaping
 - Bug in :func:`concat` of ``bool`` and ``boolean`` dtypes resulting in ``object`` dtype instead of ``boolean`` dtype (:issue:`42800`)
 - Bug in :func:`crosstab` when inputs are are categorical Series, there are categories that are not present in one or both of the Series, and ``margins=True``. Previously the margin value for missing categories was ``NaN``. It is now correctly reported as 0 (:issue:`43505`)
 - Bug in :func:`concat` would fail when the ``objs`` argument all had the same index and the ``keys`` argument contained duplicates (:issue:`43595`)
+- Bug in :func:`concat` which ignored the ``sort`` parameter (:issue:`43375`)
 
 Sparse
 ^^^^^^

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
@@ -55,7 +55,7 @@ def is_integer_array(values: np.ndarray, skipna: bool = ...): ...
 def is_bool_array(values: np.ndarray, skipna: bool = ...): ...
 def fast_multiget(mapping: dict, keys: np.ndarray, default=...) -> np.ndarray: ...
 def fast_unique_multiple_list_gen(gen: Generator, sort: bool = ...) -> list: ...
-def fast_unique_multiple_list(lists: list, sort: bool = ...) -> list: ...
+def fast_unique_multiple_list(lists: list, sort: bool | None = ...) -> list: ...
 def fast_unique_multiple(arrays: list, sort: bool = ...) -> list: ...
 def map_infer(
     arr: np.ndarray,

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -350,7 +350,7 @@ def fast_unique_multiple(list arrays, sort: bool = True):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def fast_unique_multiple_list(lists: list, sort: bool = True) -> list:
+def fast_unique_multiple_list(lists: list, sort: bool | None = True) -> list:
     cdef:
         list buf
         Py_ssize_t k = len(lists)

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -147,7 +147,7 @@ def _get_combined_index(
         for other in indexes[1:]:
             index = index.intersection(other)
     else:
-        index = union_indexes(indexes, sort=sort)
+        index = union_indexes(indexes, sort=False)
         index = ensure_index(index)
 
     if sort:
@@ -163,7 +163,7 @@ def _get_combined_index(
     return index
 
 
-def union_indexes(indexes, sort: bool = True) -> Index:
+def union_indexes(indexes, sort: bool | None = True) -> Index:
     """
     Return the union of indexes.
 
@@ -219,7 +219,7 @@ def conv(i):
             return result.union_many(indexes[1:])
         else:
             for other in indexes[1:]:
-                result = result.union(other)
+                result = result.union(other, sort=None if sort else False)
             return result
     elif kind == "array":
         index = indexes[0]

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -2354,10 +2354,8 @@ def test_construct_with_two_categoricalindex_series(self):
         )
         result = DataFrame([s1, s2])
         expected = DataFrame(
-            np.array(
-                [[np.nan, 39.0, np.nan, 6.0, 4.0], [2.0, 152.0, 2.0, 242.0, 150.0]]
-            ),
-            columns=["f", "female", "m", "male", "unknown"],
+            np.array([[39, 6, 4, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]]),
+            columns=["female", "male", "unknown", "f", "m"],
         )
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/reshape/concat/test_sort.py b/pandas/tests/reshape/concat/test_sort.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 import pandas as pd
 from pandas import DataFrame
 import pandas._testing as tm
@@ -81,3 +83,12 @@ def test_concat_aligned_sort_does_not_raise(self):
         expected = DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"])
         result = pd.concat([df, df], ignore_index=True, sort=True)
         tm.assert_frame_equal(result, expected)
+
+    def test_concat_frame_with_sort_false(self):
+        # GH 43375
+        result = pd.concat(
+            [DataFrame({i: i}, index=[i]) for i in range(2, 0, -1)], sort=False
+        )
+        expected = DataFrame([[2, np.nan], [np.nan, 1]], index=[2, 1], columns=[2, 1])
+
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
@@ -746,11 +746,11 @@ def test_unbalanced(self):
         )
         df["id"] = df.index
         exp_data = {
-            "X": ["X1", "X1", "X2", "X2"],
-            "A": [1.0, 3.0, 2.0, 4.0],
-            "B": [5.0, np.nan, 6.0, np.nan],
-            "id": [0, 0, 1, 1],
-            "year": [2010, 2011, 2010, 2011],
+            "X": ["X1", "X2", "X1", "X2"],
+            "A": [1.0, 2.0, 3.0, 4.0],
+            "B": [5.0, 6.0, np.nan, np.nan],
+            "id": [0, 1, 0, 1],
+            "year": [2010, 2010, 2011, 2011],
         }
         expected = DataFrame(exp_data)
         expected = expected.set_index(["id", "year"])[["X", "A", "B"]]
@@ -993,10 +993,10 @@ def test_nonnumeric_suffix(self):
         )
         expected = DataFrame(
             {
-                "A": ["X1", "X1", "X2", "X2"],
-                "colname": ["placebo", "test", "placebo", "test"],
-                "result": [5.0, np.nan, 6.0, np.nan],
-                "treatment": [1.0, 3.0, 2.0, 4.0],
+                "A": ["X1", "X2", "X1", "X2"],
+                "colname": ["placebo", "placebo", "test", "test"],
+                "result": [5.0, 6.0, np.nan, np.nan],
+                "treatment": [1.0, 2.0, 3.0, 4.0],
             }
         )
         expected = expected.set_index(["A", "colname"])
@@ -1040,10 +1040,10 @@ def test_float_suffix(self):
         )
         expected = DataFrame(
             {
-                "A": ["X1", "X1", "X1", "X1", "X2", "X2", "X2", "X2"],
-                "colname": [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
-                "result": [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
-                "treatment": [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0],
+                "A": ["X1", "X2", "X1", "X2", "X1", "X2", "X1", "X2"],
+                "colname": [1.2, 1.2, 1.0, 1.0, 1.1, 1.1, 2.1, 2.1],
+                "result": [5.0, 6.0, 0.0, 9.0, np.nan, np.nan, np.nan, np.nan],
+                "treatment": [np.nan, np.nan, np.nan, np.nan, 1.0, 2.0, 3.0, 4.0],
             }
         )
         expected = expected.set_index(["A", "colname"])

diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py
@@ -278,7 +278,11 @@ def test_str_cat_align_mixed_inputs(join):
     expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"])
     # joint index of rhs [t, u]; u will be forced have index of s
     rhs_idx = (
-        t.index.intersection(s.index) if join == "inner" else t.index.union(s.index)
+        t.index.intersection(s.index)
+        if join == "inner"
+        else t.index.union(s.index)
+        if join == "outer"
+        else t.index.append(s.index.difference(t.index))
     )
 
     expected = expected_outer.loc[s.index.join(rhs_idx, how=join)]