diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a778e7eed3843..380915b3494a3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1176,6 +1176,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ +- Bug in :func:`concat` with mixed integer and bool dtypes incorrectly casting the bools to integers (:issue:`45101`) - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) - Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 51c5a4afe14c5..9298d930f266b 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -161,6 +161,10 @@ def _get_result_dtype( # coerce to object target_dtype = np.dtype(object) kinds = {"o"} + elif "b" in kinds and len(kinds) > 1: + # GH#21108, GH#45101 + target_dtype = np.dtype(object) + kinds = {"o"} else: # error: Argument 1 to "np_find_common_type" has incompatible type # "*Set[Union[ExtensionDtype, Any]]"; expected "dtype[Any]" diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 2ad46ac009928..b6da5ed76816b 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -12,6 +12,28 @@ class TestDataFrameConcat: + @pytest.mark.xfail(reason="GH#62888 the `mi[2][1] is 1` check fails") + def test_concat_multiindex_level_bool_and_numeric(self): + # GH#21108, GH#45101 + left = DataFrame([123, 456], columns=["data"], index=[True, False]) + right = DataFrame( + [55, 983, 69, 112, 0], columns=["data"], index=[1, 2, 3, 4, 99] + ) + result = concat({"One": left, "Two": right}) + + # in particular, the first two entries should not be cast to ints, the + # other 1 should not cast to True + mi = pd.MultiIndex.from_arrays( + [ + ["One"] * 2 + ["Two"] * 5, + np.array([True, False, 1, 2, 3, 4, 99], dtype=object), + ], + ) + assert mi[0][1] is True + assert type(mi[2][1]) is int + expected = DataFrame({"data": [123, 456, 55, 983, 69, 112, 0]}, index=mi) + tm.assert_frame_equal(result, expected) + def test_concat_multiple_frames_dtypes(self): # GH#2759 df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64) diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py index 3523340bb2858..05a0405473cc1 100644 --- a/pandas/tests/reshape/concat/test_series.py +++ b/pandas/tests/reshape/concat/test_series.py @@ -14,6 +14,18 @@ class TestSeriesConcat: + @pytest.mark.parametrize("bool_dtype", [bool, "boolean"]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, "Int64", "Float64"]) + def test_concat_bool_and_numeric(self, bool_dtype, dtype): + # GH#21108, GH#45101 + left = Series([True, False], dtype=bool_dtype) + right = Series([1, 2], dtype=dtype) + result = concat([left, right], ignore_index=True) + expected = Series([True, False, 1, 2], dtype=object) + assert result.iloc[0] is True + assert type(result.iloc[2]) in [int, float] # i.e. not bool + tm.assert_series_equal(result, expected) + def test_concat_series(self): ts = Series( np.arange(20, dtype=np.float64),