From f75274a26e2d6d96ac4aa98050ab391ab1510af3 Mon Sep 17 00:00:00 2001 From: zacharym-collins Date: Thu, 18 Dec 2025 08:19:52 -0600 Subject: [PATCH 1/6] TST: Add regression test for GH#63412 --- pandas/tests/io/pytables/test_store.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index d11495902f76c..d77fe3fd8d0d3 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1129,3 +1129,16 @@ def test_select_categorical_string_columns(tmp_path, model): result = store.select("df", "modelId == model") expected = df[df["modelId"] == model] tm.assert_frame_equal(result, expected) + + +def test_to_hdf_multiindex_string_dtype_crash(tmp_path): + # GH#63412 + path = tmp_path / "test.h5" + index = MultiIndex.from_tuples( + [("a", "x"), ("b", "y")], + names=["level1", "level2"] + ) + df = DataFrame({"value": [1, 2]}, index=index) + df.to_hdf(path, key="test") + result = read_hdf(path, key="test") + tm.assert_frame_equal(df, result, check_dtype=False) From c80c8eff89ebafba35b55b9c79b476bafebf141b Mon Sep 17 00:00:00 2001 From: zacharym-collins Date: Thu, 18 Dec 2025 08:22:41 -0600 Subject: [PATCH 2/6] STY: apply ruff formatting --- pandas/tests/io/pytables/test_store.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index d77fe3fd8d0d3..74055582dc89c 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1134,10 +1134,7 @@ def test_select_categorical_string_columns(tmp_path, model): def test_to_hdf_multiindex_string_dtype_crash(tmp_path): # GH#63412 path = tmp_path / "test.h5" - index = MultiIndex.from_tuples( - [("a", "x"), ("b", "y")], - names=["level1", "level2"] - ) + index = MultiIndex.from_tuples([("a", "x"), ("b", "y")], names=["level1", "level2"]) df = DataFrame({"value": [1, 2]}, index=index) df.to_hdf(path, key="test") result = read_hdf(path, key="test") From d0c02fbcd5feacc7c67eb4a2dd9cfa1c55d0f07f Mon Sep 17 00:00:00 2001 From: zacharym-collins Date: Thu, 18 Dec 2025 08:24:58 -0600 Subject: [PATCH 3/6] BUG: Fix regression in to_hdf with MultiIndex and StringDtype (GH#63412) --- pandas/io/pytables.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8d84bef91bb03..a30b6c725070d 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3148,9 +3148,13 @@ def write_multi_index(self, key: str, index: MultiIndex) -> None: ): # write the level if isinstance(lev.dtype, ExtensionDtype): - raise NotImplementedError( - "Saving a MultiIndex with an extension dtype is not supported." - ) + # GH 63412 + if isinstance(lev.dtype, StringDtype): + lev = lev.astype(object) + else: + raise NotImplementedError( + "Saving a MultiIndex with an extension dtype is not supported." + ) level_key = f"{key}_level{i}" conv_level = _convert_index(level_key, lev, self.encoding, self.errors) self.write_array(level_key, conv_level.values) From 2c79460be747cba776e46618531dd296510e2c90 Mon Sep 17 00:00:00 2001 From: zacharym-collins Date: Thu, 18 Dec 2025 08:39:09 -0600 Subject: [PATCH 4/6] TST: Enable HDF5 tests for StringDtype --- pandas/tests/io/pytables/test_put.py | 6 ------ pandas/tests/io/pytables/test_read.py | 6 ------ pandas/tests/io/pytables/test_round_trip.py | 6 ------ 3 files changed, 18 deletions(-) diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 0cfa24e889e6f..a195b32fa1a15 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -314,12 +314,6 @@ def test_column_multiindex(tmp_path, setup_path, using_infer_string): path = tmp_path / setup_path with HDFStore(path) as store: - if using_infer_string: - # TODO(infer_string) make this work for string dtype - msg = "Saving a MultiIndex with an extension dtype is not supported." - with pytest.raises(NotImplementedError, match=msg): - store.put("df", df) - return store.put("df", df) tm.assert_frame_equal( store["df"], expected, check_index_type=True, check_column_type=True diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index 70696a502a111..75a0cb0fe3be5 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -181,12 +181,6 @@ def test_read_hdf_open_store(tmp_path, setup_path, using_infer_string): df = df.set_index(keys="E", append=True) path = tmp_path / setup_path - if using_infer_string: - # TODO(infer_string) make this work for string dtype - msg = "Saving a MultiIndex with an extension dtype is not supported." - with pytest.raises(NotImplementedError, match=msg): - df.to_hdf(path, key="df", mode="w") - return df.to_hdf(path, key="df", mode="w") direct = read_hdf(path, "df") with HDFStore(path, mode="r") as store: diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index aacdfb1f82f6d..7d4303722534e 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -434,12 +434,6 @@ def test_store_hierarchical( ): frame = multiindex_dataframe_random_data - if using_infer_string: - # TODO(infer_string) make this work for string dtype - msg = "Saving a MultiIndex with an extension dtype is not supported." - with pytest.raises(NotImplementedError, match=msg): - _check_roundtrip(frame, tm.assert_frame_equal, path=temp_file) - return _check_roundtrip(frame, tm.assert_frame_equal, path=temp_file) _check_roundtrip(frame.T, tm.assert_frame_equal, path=temp_file) _check_roundtrip(frame["A"], tm.assert_series_equal, path=temp_file) From 37a3e8e2d2eeb52c966ecba08450ca5b2b255ff4 Mon Sep 17 00:00:00 2001 From: zacharym-collins Date: Thu, 18 Dec 2025 11:17:24 -0600 Subject: [PATCH 5/6] REF: Simplify StringDtype check in write_multi_index (GH#63414) --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a30b6c725070d..99005b740a484 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3150,7 +3150,7 @@ def write_multi_index(self, key: str, index: MultiIndex) -> None: if isinstance(lev.dtype, ExtensionDtype): # GH 63412 if isinstance(lev.dtype, StringDtype): - lev = lev.astype(object) + pass else: raise NotImplementedError( "Saving a MultiIndex with an extension dtype is not supported." From c2b8a26bb89e6f7936c3c55b9db25fe67125aadf Mon Sep 17 00:00:00 2001 From: zacharym-collins Date: Thu, 18 Dec 2025 11:23:31 -0600 Subject: [PATCH 6/6] REF: Simplify StringDtype check to allow fallthrough --- pandas/io/pytables.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 99005b740a484..fa01fd5e4379c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3147,14 +3147,12 @@ def write_multi_index(self, key: str, index: MultiIndex) -> None: zip(index.levels, index.codes, index.names, strict=True) ): # write the level - if isinstance(lev.dtype, ExtensionDtype): - # GH 63412 - if isinstance(lev.dtype, StringDtype): - pass - else: - raise NotImplementedError( - "Saving a MultiIndex with an extension dtype is not supported." - ) + if isinstance(lev.dtype, ExtensionDtype) and not isinstance( + lev.dtype, StringDtype + ): + raise NotImplementedError( + "Saving a MultiIndex with an extension dtype is not supported." + ) level_key = f"{key}_level{i}" conv_level = _convert_index(level_key, lev, self.encoding, self.errors) self.write_array(level_key, conv_level.values)