diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 19f1e41f5b22f..f7a306bd49b53 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -65,6 +65,10 @@ def to_feather( if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") + # accept and ignore optional no-op flag for draft feature + if "preserve_sparse" in kwargs: + kwargs.pop("preserve_sparse") + with get_handle( path, "wb", storage_options=storage_options, is_text=False ) as handles: @@ -79,6 +83,7 @@ def read_feather( use_threads: bool = True, storage_options: StorageOptions | None = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, + preserve_sparse: bool = False, ) -> DataFrame: """ Load a feather-format object from the file path. @@ -141,6 +146,9 @@ def read_feather( check_dtype_backend(dtype_backend) + # accept and ignore optional no-op flag for draft feature + _ = preserve_sparse + with get_handle( path, "rb", storage_options=storage_options, is_text=False ) as handles: diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 878f51a2b9eac..12b7f5ebd2dfc 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -481,6 +481,11 @@ def to_parquet( """ if isinstance(partition_cols, str): partition_cols = [partition_cols] + # accept and ignore optional no-op flag for draft feature + # (do not forward to engines) + if "preserve_sparse" in kwargs: + kwargs.pop("preserve_sparse") + impl = get_engine(engine) path_or_buf: FilePath | WriteBuffer[bytes] = io.BytesIO() if path is None else path diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 5b4bbb9e686d3..ef793ad08077b 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -18,6 +18,12 @@ openpyxl = pytest.importorskip("openpyxl") +# xfail marker for pending autofilter feature; see #62994 +xfail_autofilter = pytest.mark.xfail( + reason="Excel header autofilter not yet implemented on main; see #62994", + strict=False, +) + @pytest.fixture def ext(): @@ -155,6 +161,90 @@ def test_engine_kwargs_append_data_only(tmp_excel, data_only, expected): ) +@xfail_autofilter +def test_to_excel_autofilter_openpyxl(tmp_excel): + # Ensure that writing with autofilter=True sets auto_filter.ref + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.to_excel(tmp_excel, engine="openpyxl", index=False, autofilter=True) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + # Expect filter over the full range, e.g. A1:B3 (header + 2 rows) + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Verify filter covers all columns (A and B) + assert "A" in ws.auto_filter.ref + assert "B" in ws.auto_filter.ref + + +@xfail_autofilter +def test_to_excel_autofilter_startrow_startcol_openpyxl(tmp_excel): + # Test autofilter with nonzero startrow and startcol + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.to_excel( + tmp_excel, + engine="openpyxl", + index=False, + autofilter=True, + startrow=2, + startcol=1, + ) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Filter should be offset by startrow=2 and startcol=1 (B3:D5) + assert ws.auto_filter.ref.startswith("B") + assert "3" in ws.auto_filter.ref + + +@xfail_autofilter +def test_to_excel_autofilter_multiindex_merge_cells_openpyxl(tmp_excel): + # Test autofilter with MultiIndex columns and merge_cells=True + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="openpyxl", + index=False, + autofilter=True, + merge_cells=True, + ) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + + +@xfail_autofilter +def test_to_excel_autofilter_multiindex_no_merge_openpyxl(tmp_excel): + # Test autofilter with MultiIndex columns and merge_cells=False + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="openpyxl", + index=False, + autofilter=True, + merge_cells=False, + ) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + + @pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"]) @pytest.mark.parametrize("kwarg_value", [True, False]) def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value): diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index b2e6c845e5019..510c604ce4eaf 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -3,12 +3,19 @@ import pytest +import pandas as pd from pandas import DataFrame from pandas.io.excel import ExcelWriter xlsxwriter = pytest.importorskip("xlsxwriter") +# xfail marker for pending autofilter feature; see #62994 +xfail_autofilter = pytest.mark.xfail( + reason="Excel header autofilter not yet implemented on main; see #62994", + strict=False, +) + @pytest.fixture def ext(): @@ -84,3 +91,103 @@ def test_book_and_sheets_consistent(tmp_excel): assert writer.sheets == {} sheet = writer.book.add_worksheet("test_name") assert writer.sheets == {"test_name": sheet} + + +@xfail_autofilter +def test_to_excel_autofilter_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + # Write with xlsxwriter, verify via openpyxl that an autofilter exists + df.to_excel(tmp_excel, engine="xlsxwriter", index=False, autofilter=True) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Verify filter covers all columns (A and B) + assert "A" in ws.auto_filter.ref + assert "B" in ws.auto_filter.ref + finally: + wb.close() + + +@xfail_autofilter +def test_to_excel_autofilter_startrow_startcol_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.to_excel( + tmp_excel, + engine="xlsxwriter", + index=False, + autofilter=True, + startrow=2, + startcol=1, + ) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Filter should be offset by startrow=2 and startcol=1 (B3:D5) + assert ws.auto_filter.ref.startswith("B") + assert "3" in ws.auto_filter.ref + finally: + wb.close() + + +@xfail_autofilter +def test_to_excel_autofilter_multiindex_merge_cells_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="xlsxwriter", + index=False, + autofilter=True, + merge_cells=True, + ) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + finally: + wb.close() + + +@xfail_autofilter +def test_to_excel_autofilter_multiindex_no_merge_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="xlsxwriter", + index=False, + autofilter=True, + merge_cells=False, + ) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + finally: + wb.close() diff --git a/pandas/tests/io/test_feather_sparse.py b/pandas/tests/io/test_feather_sparse.py new file mode 100644 index 0000000000000..3cb62ce1c7b25 --- /dev/null +++ b/pandas/tests/io/test_feather_sparse.py @@ -0,0 +1,36 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +pa = pytest.importorskip("pyarrow") + +xfail_sparse = pytest.mark.xfail( + reason="pending implementation of preserve_sparse for Feather", + strict=False, +) + + +@xfail_sparse +@pytest.mark.parametrize( + "subtype, fill_value, data", + [ + ("int64", 0, [0, 0, 3, 0, 5]), + ("float64", 0.0, [0.0, 0.0, 1.5, 0.0, 2.5]), + ("boolean", False, [False, False, True, False, True]), + ], +) +def test_feather_sparse_roundtrip(tmp_path, subtype, fill_value, data): + path = tmp_path / "out.feather" + s = pd.Series(pd.arrays.SparseArray(data, fill_value=fill_value)) + df = pd.DataFrame({"s": s, "x": np.arange(len(s))}) + + df.to_feather(path, preserve_sparse=True) + df2 = pd.read_feather(path, preserve_sparse=True) + + assert isinstance(df2["s"].dtype, pd.SparseDtype) + assert df2["s"].dtype.fill_value == fill_value + tm.assert_series_equal( + df2["s"].sparse.to_dense(), s.sparse.to_dense(), check_dtype=False + ) diff --git a/pandas/tests/io/test_parquet_sparse.py b/pandas/tests/io/test_parquet_sparse.py new file mode 100644 index 0000000000000..b2ff7c8400f80 --- /dev/null +++ b/pandas/tests/io/test_parquet_sparse.py @@ -0,0 +1,36 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +pa = pytest.importorskip("pyarrow") + +xfail_sparse = pytest.mark.xfail( + reason="pending implementation of preserve_sparse for Parquet", + strict=False, +) + + +@xfail_sparse +@pytest.mark.parametrize( + "subtype, fill_value, data", + [ + ("int64", 0, [0, 0, 3, 0, 5]), + ("float64", 0.0, [0.0, 0.0, 1.5, 0.0, 2.5]), + ("boolean", False, [False, False, True, False, True]), + ], +) +def test_parquet_sparse_roundtrip(tmp_path, subtype, fill_value, data): + path = tmp_path / "out.parquet" + s = pd.Series(pd.arrays.SparseArray(data, fill_value=fill_value)) + df = pd.DataFrame({"s": s, "x": np.arange(len(s))}) + + df.to_parquet(path, preserve_sparse=True) + df2 = pd.read_parquet(path, preserve_sparse=True) + + assert isinstance(df2["s"].dtype, pd.SparseDtype) + assert df2["s"].dtype.fill_value == fill_value + tm.assert_series_equal( + df2["s"].sparse.to_dense(), s.sparse.to_dense(), check_dtype=False + )