From 8ea183220fc8e78523535fa6e098cdb39f36e455 Mon Sep 17 00:00:00 2001 From: YukunR <845351766@qq.com> Date: Tue, 4 Nov 2025 21:26:45 +0800 Subject: [PATCH 1/2] BUG: validate path type in read_parquet; reject non-path/file-like (gh-62922) --- pandas/io/parquet.py | 9 +++++++++ pandas/tests/io/test_parquet.py | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 878f51a2b9eac..d53c64e04e9fd 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -27,6 +27,8 @@ ) from pandas.util._validators import check_dtype_backend +from pandas.core.dtypes.common import is_file_like + from pandas import ( DataFrame, get_option, @@ -656,6 +658,13 @@ def read_parquet( 0 3 8 1 4 9 """ + # gh-62922: validate path type early to match documented API expectations + # and provide a consistent, clear user error immediately. + if not (isinstance(path, (str, os.PathLike)) or is_file_like(path)): + raise TypeError( + f"read_parquet expected str/os.PathLike or file-like object, " + f"got {type(path).__name__} type" + ) impl = get_engine(engine) check_dtype_backend(dtype_backend) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 2927b24624026..eff6763e807e1 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -248,6 +248,42 @@ def check_partition_names(path, expected): assert dataset.partitioning.schema.names == expected +def test_read_parquet_invalid_path_types(tmp_path, engine): + # GH #62922 + df = pd.DataFrame({"a": [1]}) + path = tmp_path / "test_read_parquet.parquet" + df.to_parquet(path, engine=engine) + + bad_path_types = [ + [str(path)], # list + (str(path),), # tuple + b"raw-bytes", # bytes + ] + for bad in bad_path_types: + match = ( + f"read_parquet expected str/os.PathLike or file-like object, " + f"got {type(bad).__name__} type" + ) + with pytest.raises(TypeError, match=match): + read_parquet(bad, engine=engine) + + +def test_read_parquet_valid_path_types(tmp_path, engine): + # GH #62922 + df = pd.DataFrame({"a": [1]}) + path = tmp_path / "test_read_parquet.parquet" + df.to_parquet(path, engine=engine) + # str + read_parquet(str(path), engine=engine) + # os.PathLike + read_parquet(pathlib.Path(path), engine=engine) + # file-like object + buf = BytesIO() + df.to_parquet(buf, engine=engine) + buf.seek(0) + read_parquet(buf, engine=engine) + + def test_invalid_engine(df_compat, temp_file): msg = "engine must be one of 'pyarrow', 'fastparquet'" with pytest.raises(ValueError, match=msg): From 9ff91098382685a69a9a48ddd122d760ffff010c Mon Sep 17 00:00:00 2001 From: YukunR <845351766@qq.com> Date: Wed, 5 Nov 2025 10:59:21 +0800 Subject: [PATCH 2/2] TST: remove redundant invalid-path cases; assert TypeError for list (gh-62922) --- pandas/tests/io/test_parquet.py | 34 ++++++--------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index eff6763e807e1..181af93265fe7 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -254,34 +254,12 @@ def test_read_parquet_invalid_path_types(tmp_path, engine): path = tmp_path / "test_read_parquet.parquet" df.to_parquet(path, engine=engine) - bad_path_types = [ - [str(path)], # list - (str(path),), # tuple - b"raw-bytes", # bytes - ] - for bad in bad_path_types: - match = ( - f"read_parquet expected str/os.PathLike or file-like object, " - f"got {type(bad).__name__} type" - ) - with pytest.raises(TypeError, match=match): - read_parquet(bad, engine=engine) - - -def test_read_parquet_valid_path_types(tmp_path, engine): - # GH #62922 - df = pd.DataFrame({"a": [1]}) - path = tmp_path / "test_read_parquet.parquet" - df.to_parquet(path, engine=engine) - # str - read_parquet(str(path), engine=engine) - # os.PathLike - read_parquet(pathlib.Path(path), engine=engine) - # file-like object - buf = BytesIO() - df.to_parquet(buf, engine=engine) - buf.seek(0) - read_parquet(buf, engine=engine) + msg = ( + f"read_parquet expected str/os.PathLike or file-like object, " + f"got list type" + ) + with pytest.raises(TypeError, match=msg): + read_parquet([str(path)], engine=engine) def test_invalid_engine(df_compat, temp_file):