diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e2ba7c312a213..32976a3cc578e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -189,8 +189,8 @@ from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: - from collections.abc import Callable from collections.abc import ( + Callable, Hashable, Iterator, Mapping, @@ -2180,139 +2180,128 @@ def to_excel( freeze_panes: tuple[int, int] | None = None, storage_options: StorageOptions | None = None, engine_kwargs: dict[str, Any] | None = None, + autofilter: bool = False, ) -> None: """ - Write {klass} to an Excel sheet. + Write object to an Excel sheet. - To write a single {klass} to an Excel .xlsx file it is only necessary to - specify a target file name. To write to multiple sheets it is necessary to - create an `ExcelWriter` object with a target file name, and specify a sheet - in the file to write to. + To write a single object to an Excel .xlsx file it is only necessary + to specify a target file name. - Multiple sheets may be written to by specifying unique `sheet_name`. - With all data written to the file it is necessary to save the changes. - Note that creating an `ExcelWriter` object with a file name that already - exists will result in the contents of the existing file being erased. + .. code-block:: python + + df.to_excel("path_to_file.xlsx") + + To write to different sheets of the same .xlsx file it is necessary to + create an `ExcelWriter` object with a target file name, + and specify a sheet in the file to write to. + + .. code-block:: python + + with pd.ExcelWriter("path_to_file.xlsx") as writer: + df1.to_excel(writer, sheet_name="Sheet_name_1") + df2.to_excel(writer, sheet_name="Sheet_name_2") + + When using `ExcelWriter`, note that the objects are not written until the + `ExcelWriter` object is closed. Parameters ---------- - excel_writer : path-like, file-like, or ExcelWriter object - File path or existing ExcelWriter. + excel_writer : string, path object or ExcelWriter object + File path or existing ExcelWriter + If a string is passed, a new ExcelWriter object is created. sheet_name : str, default 'Sheet1' Name of sheet which will contain DataFrame. na_rep : str, default '' - Missing data representation. - float_format : str, optional - Format string for floating point numbers. For example - ``float_format="%.2f"`` will format 0.1234 to 0.12. - columns : sequence or list of str, optional - Columns to write. + Missing data representation + float_format : str, default None + Format string for floating point numbers + columns : sequence, optional + Columns to write header : bool or list of str, default True - Write out the column names. If a list of string is given it is - assumed to be aliases for the column names. + Write out the column names. If a list of string is given + it is assumed to be aliases for the column names index : bool, default True - Write row names (index). - index_label : str or sequence, optional - Column label for index column(s) if desired. If not specified, and + Write row names (index) + index_label : str or sequence, default None + Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. startrow : int, default 0 Upper left cell row to dump data frame. + Per default (0) header is written, too. startcol : int, default 0 Upper left cell column to dump data frame. engine : str, optional - Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this - via the options ``io.excel.xlsx.writer`` or - ``io.excel.xlsm.writer``. - - merge_cells : bool or 'columns', default False - If True, write MultiIndex index and columns as merged cells. - If 'columns', merge MultiIndex column cells only. - {encoding_parameter} + Write engine to use, 'openpyxl' or 'xlsxwriter'. + Defaults to 'xlsxwriter'. + merge_cells : bool, default True + Write MultiIndex and Hierarchical Rows as merged cells. + The indices corresponding to each row will be combined and + presented as a single cell. inf_rep : str, default 'inf' - Representation for infinity (there is no native representation for - infinity in Excel). - {verbose_parameter} - freeze_panes : tuple of int (length 2), optional - Specifies the one-based bottommost row and rightmost column that - is to be frozen. - {storage_options} + Representation for infinity (there is no native Numpy representation + for infinity in integer dtypes) + freeze_panes : tuple of int (length 2), default None + First rows to freeze panes on. Only applicable when `freeze_panes` + is passed as a tuple. + storage_options : dict, optional + Extra options that make sense for a particular storage connection, + e.g. host, port, username, password, etc., if using a URL that + requires authentication. + engine_kwargs : dict, optional + Arbitrary keyword arguments passed to excel engine. + autofilter : bool, default False + Whether to apply autofilter to the header row. - .. versionadded:: {storage_options_versionadded} - {extra_parameters} See Also -------- - to_csv : Write DataFrame to a comma-separated values (csv) file. + read_excel : Read from an Excel file into a DataFrame. + ExcelFile : Class for parsing tabular excel files. ExcelWriter : Class for writing DataFrame objects into excel sheets. - read_excel : Read an Excel file into a pandas DataFrame. - read_csv : Read a comma-separated values (csv) file into DataFrame. - io.formats.style.Styler.to_excel : Add styles to Excel sheet. Notes ----- - For compatibility with :meth:`~DataFrame.to_csv`, - to_excel serializes lists and dicts to strings before writing. - - Once a workbook has been saved it is not possible to write further - data without rewriting the whole workbook. - - pandas will check the number of rows, columns, - and cell character count does not exceed Excel's limitations. - All other limitations must be checked by the user. + The `engine` keyword is not supported when `excel_writer` is an + existing `ExcelWriter`. Examples -------- - - Create, write to and save a workbook: - - >>> df1 = pd.DataFrame( - ... [["a", "b"], ["c", "d"]], - ... index=["row 1", "row 2"], - ... columns=["col 1", "col 2"], - ... ) - >>> df1.to_excel("output.xlsx") # doctest: +SKIP - - To specify the sheet name: - - >>> df1.to_excel("output.xlsx", sheet_name="Sheet_name_1") # doctest: +SKIP - - If you wish to write to more than one sheet in the workbook, it is - necessary to specify an ExcelWriter object: - - >>> df2 = df1.copy() - >>> with pd.ExcelWriter("output.xlsx") as writer: # doctest: +SKIP - ... df1.to_excel(writer, sheet_name="Sheet_name_1") - ... df2.to_excel(writer, sheet_name="Sheet_name_2") - - ExcelWriter can also be used to append to an existing Excel file: - - >>> with pd.ExcelWriter("output.xlsx", mode="a") as writer: # doctest: +SKIP - ... df1.to_excel(writer, sheet_name="Sheet_name_3") - - To set the library that is used to write the Excel file, - you can pass the `engine` keyword (the default engine is - automatically chosen depending on the file extension): - - >>> df1.to_excel("output1.xlsx", engine="xlsxwriter") # doctest: +SKIP + >>> df = pd.DataFrame({{"A": [1, 2, 3], "B": [4, 5, 6]}}) + >>> df.to_excel("pandas_simple.xlsx") + >>> df.to_excel("pandas_simple.xlsx", engine="openpyxl") """ - if engine_kwargs is None: - engine_kwargs = {} + # Import ExcelWriter here to avoid circular import + from pandas import ExcelWriter - df = self if isinstance(self, ABCDataFrame) else self.to_frame() + if isinstance(excel_writer, ExcelWriter): + if engine is not None: + raise ValueError( + "engine should not be specified when passing an ExcelWriter" + ) + engine = excel_writer.engine + else: + excel_writer = ExcelWriter( + excel_writer, + engine=engine, + engine_kwargs=engine_kwargs, + storage_options=storage_options, + ) + # Import ExcelFormatter here to avoid circular import from pandas.io.formats.excel import ExcelFormatter formatter = ExcelFormatter( - df, + self, na_rep=na_rep, - cols=columns, - header=header, float_format=float_format, + columns=columns, + header=header, index=index, index_label=index_label, - merge_cells=merge_cells, inf_rep=inf_rep, ) + formatter.write( excel_writer, sheet_name=sheet_name, @@ -2322,8 +2311,13 @@ def to_excel( engine=engine, storage_options=storage_options, engine_kwargs=engine_kwargs, + autofilter=autofilter, ) + if not isinstance(excel_writer, ExcelWriter): + # we need to close the writer if we created it + excel_writer.close() + @final @doc( storage_options=_shared_docs["storage_options"], @@ -4851,7 +4845,6 @@ def sort_values( ignore_index: bool = ..., key: ValueKeyFunc = ..., ) -> Self: ... - @overload def sort_values( self, @@ -9614,10 +9607,10 @@ def align( 1 1 2 3 4 2 6 7 8 9 >>> other - A B C D - 2 10 20 30 40 - 3 60 70 80 90 - 4 600 700 800 900 + A B C D E + 2 10 20 30 40 NaN + 3 60 70 80 90 NaN + 4 600 700 800 900 NaN Align on columns: @@ -12044,7 +12037,6 @@ def last_valid_index(self) -> Hashable: {see_also}\ {examples} """ - _sum_prod_doc = """ {desc} diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d1ae59e0e5866..becc9380b9cf6 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1209,6 +1209,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter: bool = False, ) -> None: """ Write given formatted cells into Excel an excel sheet @@ -1223,6 +1224,8 @@ def _write_cells( startcol : upper left cell column to dump data frame freeze_panes: int tuple of length 2 contains the bottom-most row and right-most column to freeze + autofilter : bool, default False + If True, apply an autofilter to the header row over the written data range. """ raise NotImplementedError diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index e9a06076f3aff..dd5112432ae69 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -99,10 +99,17 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter: bool = False, ) -> None: """ Write the frame cells using odf """ + if autofilter: + raise NotImplementedError( + "Autofilter is not supported with the 'odf' engine. " + "Please use 'openpyxl' or 'xlsxwriter' engine instead." + ) + from odf.table import ( Table, TableCell, diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 867d11583dcc0..6376bd3bdf2d8 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -449,6 +449,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter: bool = False, ) -> None: # Write the frame cells using openpyxl. sheet_name = self._get_sheet_name(sheet_name) @@ -486,6 +487,11 @@ def _write_cells( row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 ) + min_r = None + min_c = None + max_r = None + max_c = None + for cell in cells: xcell = wks.cell( row=startrow + cell.row + 1, column=startcol + cell.col + 1 @@ -506,10 +512,23 @@ def _write_cells( for k, v in style_kwargs.items(): setattr(xcell, k, v) + abs_row = startrow + cell.row + 1 + abs_col = startcol + cell.col + 1 + + # track bounds (1-based for openpyxl) + if min_r is None or abs_row < min_r: + min_r = abs_row + if min_c is None or abs_col < min_c: + min_c = abs_col + if max_r is None or abs_row > max_r: + max_r = abs_row + if max_c is None or abs_col > max_c: + max_c = abs_col + if cell.mergestart is not None and cell.mergeend is not None: wks.merge_cells( - start_row=startrow + cell.row + 1, - start_column=startcol + cell.col + 1, + start_row=abs_row, + start_column=abs_col, end_column=startcol + cell.mergeend + 1, end_row=startrow + cell.mergestart + 1, ) @@ -532,6 +551,14 @@ def _write_cells( for k, v in style_kwargs.items(): setattr(xcell, k, v) + if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None: + # Convert numeric bounds to Excel-style range e.g. A1:D10 + from openpyxl.utils import get_column_letter + + start_ref = f"{get_column_letter(min_c)}{min_r}" + end_ref = f"{get_column_letter(max_c)}{max_r}" + wks.auto_filter.ref = f"{start_ref}:{end_ref}" + class OpenpyxlReader(BaseExcelReader["Workbook"]): @doc(storage_options=_shared_docs["storage_options"]) diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 4a7b8eee2bfce..d9df2595cb32c 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -245,6 +245,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter: bool = False, ) -> None: # Write the frame cells using xlsxwriter. sheet_name = self._get_sheet_name(sheet_name) @@ -258,6 +259,11 @@ def _write_cells( if validate_freeze_panes(freeze_panes): wks.freeze_panes(*(freeze_panes)) + min_r = None + min_c = None + max_r = None + max_c = None + for cell in cells: val, fmt = self._value_with_fmt(cell.val) @@ -271,14 +277,31 @@ def _write_cells( style = self.book.add_format(_XlsxStyler.convert(cell.style, fmt)) style_dict[stylekey] = style + abs_row = startrow + cell.row + abs_col = startcol + cell.col + + # track bounds + if min_r is None or abs_row < min_r: + min_r = abs_row + if min_c is None or abs_col < min_c: + min_c = abs_col + if max_r is None or abs_row > max_r: + max_r = abs_row + if max_c is None or abs_col > max_c: + max_c = abs_col + if cell.mergestart is not None and cell.mergeend is not None: wks.merge_range( - startrow + cell.row, - startcol + cell.col, + abs_row, + abs_col, startrow + cell.mergestart, startcol + cell.mergeend, val, style, ) else: - wks.write(startrow + cell.row, startcol + cell.col, val, style) + wks.write(abs_row, abs_col, val, style) + + if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None: + # Apply autofilter over the used range. xlsxwriter uses 0-based indices. + wks.autofilter(min_r, min_c, max_r, max_c) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index d4d47253a5f82..f8978feb4a2a6 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -884,6 +884,7 @@ def write( engine: str | None = None, storage_options: StorageOptions | None = None, engine_kwargs: dict | None = None, + autofilter: bool = False, ) -> None: """ writer : path-like, file-like, or ExcelWriter object @@ -938,6 +939,7 @@ def write( startrow=startrow, startcol=startcol, freeze_panes=freeze_panes, + autofilter=autofilter, ) finally: # make sure to close opened file handles diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py index 7843bb59f97cf..502c04f9781cc 100644 --- a/pandas/tests/io/excel/test_odswriter.py +++ b/pandas/tests/io/excel/test_odswriter.py @@ -104,3 +104,13 @@ def test_cell_value_type( cell = sheet_cells[0] assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value + + +def test_to_excel_autofilter_odfpy_raises(tmp_excel): + # Test that autofilter=True raises NotImplementedError with odfpy engine + from pandas import DataFrame + + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + msg = "Autofilter is not supported with the 'odf' engine" + with pytest.raises(NotImplementedError, match=msg): + df.to_excel(tmp_excel, engine="odf", autofilter=True) \ No newline at end of file diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 5b4bbb9e686d3..1fb6fdae3b5de 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -155,6 +155,86 @@ def test_engine_kwargs_append_data_only(tmp_excel, data_only, expected): ) +def test_to_excel_autofilter_openpyxl(tmp_excel): + # Ensure that writing with autofilter=True sets auto_filter.ref + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.to_excel(tmp_excel, engine="openpyxl", index=False, autofilter=True) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + # Expect filter over the full range, e.g. A1:B3 (header + 2 rows) + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Verify filter covers all columns (A and B) + assert "A" in ws.auto_filter.ref + assert "B" in ws.auto_filter.ref + + +def test_to_excel_autofilter_startrow_startcol_openpyxl(tmp_excel): + # Test autofilter with nonzero startrow and startcol + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.to_excel( + tmp_excel, + engine="openpyxl", + index=False, + autofilter=True, + startrow=2, + startcol=1, + ) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Filter should be offset by startrow=2 and startcol=1 (B3:D5) + assert ws.auto_filter.ref.startswith("B") + assert "3" in ws.auto_filter.ref + + +def test_to_excel_autofilter_multiindex_merge_cells_openpyxl(tmp_excel): + # Test autofilter with MultiIndex columns and merge_cells=True + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="openpyxl", + index=False, + autofilter=True, + merge_cells=True, + ) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + + +def test_to_excel_autofilter_multiindex_no_merge_openpyxl(tmp_excel): + # Test autofilter with MultiIndex columns and merge_cells=False + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="openpyxl", + index=False, + autofilter=True, + merge_cells=False, + ) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + + @pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"]) @pytest.mark.parametrize("kwarg_value", [True, False]) def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value): diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index b2e6c845e5019..2637337e3f0c7 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -3,6 +3,7 @@ import pytest +import pandas as pd from pandas import DataFrame from pandas.io.excel import ExcelWriter @@ -84,3 +85,99 @@ def test_book_and_sheets_consistent(tmp_excel): assert writer.sheets == {} sheet = writer.book.add_worksheet("test_name") assert writer.sheets == {"test_name": sheet} + + +def test_to_excel_autofilter_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + # Write with xlsxwriter, verify via openpyxl that an autofilter exists + df.to_excel(tmp_excel, engine="xlsxwriter", index=False, autofilter=True) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Verify filter covers all columns (A and B) + assert "A" in ws.auto_filter.ref + assert "B" in ws.auto_filter.ref + finally: + wb.close() + + +def test_to_excel_autofilter_startrow_startcol_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.to_excel( + tmp_excel, + engine="xlsxwriter", + index=False, + autofilter=True, + startrow=2, + startcol=1, + ) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Filter should be offset by startrow=2 and startcol=1 (B3:D5) + assert ws.auto_filter.ref.startswith("B") + assert "3" in ws.auto_filter.ref + finally: + wb.close() + + +def test_to_excel_autofilter_multiindex_merge_cells_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="xlsxwriter", + index=False, + autofilter=True, + merge_cells=True, + ) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + finally: + wb.close() + + +def test_to_excel_autofilter_multiindex_no_merge_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="xlsxwriter", + index=False, + autofilter=True, + merge_cells=False, + ) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + finally: + wb.close()