diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 91f5cd1679a61..694ff60166d43 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -107,6 +107,7 @@ is_list_like, is_scalar, is_sequence, + is_string_dtype, needs_i8_conversion, pandas_dtype, ) @@ -4454,8 +4455,12 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: cols_droplevel = maybe_droplevels(cols, key) if ( not isinstance(cols_droplevel, MultiIndex) + and is_string_dtype(cols_droplevel.dtype) and not cols_droplevel.any() ): + # if cols_droplevel contains only empty strings, + # value.reindex(cols_droplevel, axis=1) would be full of NaNs + # see GH#62518 and GH#61841 return if len(cols_droplevel) and not cols_droplevel.equals(value.columns): value = value.reindex(cols_droplevel, axis=1) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 2fe8f9affba2d..35ef7625b6e76 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -271,3 +271,24 @@ def test_multiindex_assign_aligns_as_implicit_tuple(self): df1["C"] = s1 tm.assert_frame_equal(df1, df2) tm.assert_frame_equal(df1, df3) + + def test_multiindex_assign_alignment_with_non_string_dtype(self): + # GH 62518 + columns = MultiIndex.from_arrays( + [["a", "a", "z", "z"], pd.Categorical([1, 2, 1, 2])] + ) + + meta = DataFrame(columns=columns, dtype=object) + meta["z"] = meta["z"].astype("int64") + + result = DataFrame( + data={ + ("a", 1): Series([], dtype=object), + ("a", 2): Series([], dtype=object), + ("z", 1): Series([], dtype="int64"), + ("z", 2): Series([], dtype="int64"), + }, + columns=columns, + ) + + tm.assert_frame_equal(meta, result)