From 64f074265f54ccfe4685d57232b693a5abe5593b Mon Sep 17 00:00:00 2001 From: antznette1 Date: Thu, 30 Oct 2025 20:56:13 +0100 Subject: [PATCH 1/5] ENH: to_excel(autofilter=...) apply Excel autofilter over written range for xlsxwriter/openpyxl; keep engine_kwargs semantics intact --- pandas/core/generic.py | 156 +++++++++++++++++++++++++++++---- pandas/io/excel/_base.py | 3 + pandas/io/excel/_openpyxl.py | 34 ++++++- pandas/io/excel/_xlsxwriter.py | 33 ++++++- pandas/io/formats/excel.py | 2 + 5 files changed, 208 insertions(+), 20 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b542ca1f431c3..16588c2184f9a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -773,7 +773,6 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: """ labels = ensure_index(labels) self._mgr.set_axis(axis, labels) - @final @doc(klass=_shared_doc_kwargs["klass"]) def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: @@ -1515,7 +1514,6 @@ def __bool__(self) -> NoReturn: f"The truth value of a {type(self).__name__} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all()." ) - @final def abs(self) -> Self: """ @@ -2180,6 +2178,141 @@ def to_excel( freeze_panes: tuple[int, int] | None = None, storage_options: StorageOptions | None = None, engine_kwargs: dict[str, Any] | None = None, + autofilter: bool = False, + ) -> None: + """ + Write object to an Excel sheet. + + To write a single object to an Excel .xlsx file it is only necessary + to specify a target file name. + + .. code-block:: python + + df.to_excel("path_to_file.xlsx") + + To write to different sheets of the same .xlsx file it is necessary to + create an `ExcelWriter` object with a target file name, + and specify a sheet in the file to write to. + + .. code-block:: python + + with pd.ExcelWriter("path_to_file.xlsx") as writer: + df1.to_excel(writer, sheet_name="Sheet_name_1") + df2.to_excel(writer, sheet_name="Sheet_name_2") + + When using `ExcelWriter`, note that the objects are not written until the + `ExcelWriter` object is closed. + + Parameters + ---------- + excel_writer : string, path object or ExcelWriter object + File path or existing ExcelWriter + If a string is passed, a new ExcelWriter object is created. + sheet_name : str, default 'Sheet1' + Name of sheet which will contain DataFrame. + na_rep : str, default '' + Missing data representation + float_format : str, default None + Format string for floating point numbers + columns : sequence, optional + Columns to write + header : bool or list of str, default True + Write out the column names. If a list of string is given + it is assumed to be aliases for the column names + index : bool, default True + Write row names (index) + index_label : str or sequence, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + startrow : int, default 0 + Upper left cell row to dump data frame. + Per default (0) header is written, too. + startcol : int, default 0 + Upper left cell column to dump data frame. + engine : str, optional + Write engine to use, 'openpyxl' or 'xlsxwriter'. + Defaults to 'xlsxwriter'. + merge_cells : bool, default True + Write MultiIndex and Hierarchical Rows as merged cells. + The indices corresponding to each row will be combined and + presented as a single cell. + inf_rep : str, default 'inf' + Representation for infinity (there is no native Numpy representation + for infinity in integer dtypes) + freeze_panes : tuple of int (length 2), default None + First rows to freeze panes on. Only applicable when `freeze_panes` + is passed as a tuple. + storage_options : dict, optional + Extra options that make sense for a particular storage connection, + e.g. host, port, username, password, etc., if using a URL that + requires authentication. + engine_kwargs : dict, optional + Arbitrary keyword arguments passed to excel engine. + autofilter : bool, default False + Whether to apply autofilter to the header row. + + See Also + -------- + read_excel : Read from an Excel file into a DataFrame. + ExcelFile : Class for parsing tabular excel files. + ExcelWriter : Class for writing DataFrame objects into excel sheets. + + Notes + ----- + The `engine` keyword is not supported when `excel_writer` is an + existing `ExcelWriter`. + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.to_excel("pandas_simple.xlsx") + >>> df.to_excel("pandas_simple.xlsx", engine="openpyxl") + """ + if isinstance(excel_writer, ExcelWriter): + if engine is not None: + raise ValueError( + "engine should not be specified when passing an ExcelWriter" + ) + engine = excel_writer.engine + else: + excel_writer = ExcelWriter( + excel_writer, + engine=engine, + mode=mode, + if_sheet_exists=if_sheet_exists, + engine_kwargs=engine_kwargs, + date_format=date_format, + datetime_format=datetime_format, + storage_options=storage_options, + ) + + formatter = ExcelFormatter( + self, + na_rep=na_rep, + float_format=float_format, + columns=columns, + header=header, + index=index, + index_label=index_label, + inf_rep=inf_rep, + ) + + formatter.write( + excel_writer, + sheet_name=sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + engine=engine, + storage_options=storage_options, + engine_kwargs=engine_kwargs, + autofilter=autofilter, + ) + + if not isinstance(excel_writer, ExcelWriter): + # we need to close the writer if we created it + excel_writer.close() ) -> None: """ Write {klass} to an Excel sheet. @@ -4859,7 +4992,6 @@ def sort_values( ignore_index: bool = ..., key: ValueKeyFunc = ..., ) -> Self: ... - @overload def sort_values( self, @@ -5635,7 +5767,6 @@ def f(x) -> bool: return self.loc(axis=axis)[values] else: raise TypeError("Must pass either `items`, `like`, or `regex`") - @final def head(self, n: int = 5) -> Self: """ @@ -6113,8 +6244,7 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self: ---------- other : the object from which to get the attributes that we are going to propagate. If ``other`` has an ``input_objs`` attribute, then - this attribute must contain an iterable of objects, each with an - ``attrs`` attribute. + this attribute must contain an iterable of objects, each with an ``attrs`` attribute. method : str, optional A passed method name providing context on where ``__finalize__`` was called. @@ -9627,10 +9757,10 @@ def align( 1 1 2 3 4 2 6 7 8 9 >>> other - A B C D - 2 10 20 30 40 - 3 60 70 80 90 - 4 600 700 800 900 + A B C D E + 2 10 20 30 40 NaN + 3 60 70 80 90 NaN + 4 600 700 800 900 NaN Align on columns: @@ -9719,7 +9849,6 @@ def align( left = left.__finalize__(self) right = right.__finalize__(other) return left, right - @final def _align_frame( self, @@ -12057,7 +12186,6 @@ def last_valid_index(self) -> Hashable: {see_also}\ {examples} """ - _sum_prod_doc = """ {desc} @@ -12839,8 +12967,6 @@ def last_valid_index(self) -> Hashable: The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA. """ - - def make_doc(name: str, ndim: int) -> str: """ Generate the docstring for a Series/DataFrame reduction. @@ -13207,4 +13333,4 @@ def make_doc(name: str, ndim: int) -> str: examples=examples, **kwargs, ) - return docstr + return docstr \ No newline at end of file diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 48028c54a1773..e853b85836e95 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1215,6 +1215,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter: bool = False, ) -> None: """ Write given formatted cells into Excel an excel sheet @@ -1229,6 +1230,8 @@ def _write_cells( startcol : upper left cell column to dump data frame freeze_panes: int tuple of length 2 contains the bottom-most row and right-most column to freeze + autofilter : bool, default False + If True, apply an autofilter to the header row over the written data range. """ raise NotImplementedError diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 867d11583dcc0..0293c9d99c679 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -449,6 +449,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter: bool = False, ) -> None: # Write the frame cells using openpyxl. sheet_name = self._get_sheet_name(sheet_name) @@ -486,6 +487,11 @@ def _write_cells( row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 ) + min_r = None + min_c = None + max_r = None + max_c = None + for cell in cells: xcell = wks.cell( row=startrow + cell.row + 1, column=startcol + cell.col + 1 @@ -506,10 +512,23 @@ def _write_cells( for k, v in style_kwargs.items(): setattr(xcell, k, v) + abs_row = startrow + cell.row + 1 + abs_col = startcol + cell.col + 1 + + # track bounds (1-based for openpyxl) + if min_r is None or abs_row < min_r: + min_r = abs_row + if min_c is None or abs_col < min_c: + min_c = abs_col + if max_r is None or abs_row > max_r: + max_r = abs_row + if max_c is None or abs_col > max_c: + max_c = abs_col + if cell.mergestart is not None and cell.mergeend is not None: wks.merge_cells( - start_row=startrow + cell.row + 1, - start_column=startcol + cell.col + 1, + start_row=abs_row, + start_column=abs_col, end_column=startcol + cell.mergeend + 1, end_row=startrow + cell.mergestart + 1, ) @@ -532,6 +551,17 @@ def _write_cells( for k, v in style_kwargs.items(): setattr(xcell, k, v) + if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None: + try: + # Convert numeric bounds to Excel-style range e.g. A1:D10 + from openpyxl.utils import get_column_letter + + start_ref = f"{get_column_letter(min_c)}{min_r}" + end_ref = f"{get_column_letter(max_c)}{max_r}" + wks.auto_filter.ref = f"{start_ref}:{end_ref}" + except Exception: + pass + class OpenpyxlReader(BaseExcelReader["Workbook"]): @doc(storage_options=_shared_docs["storage_options"]) diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 4a7b8eee2bfce..851ccf0a8fd32 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -245,6 +245,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter: bool = False, ) -> None: # Write the frame cells using xlsxwriter. sheet_name = self._get_sheet_name(sheet_name) @@ -258,6 +259,11 @@ def _write_cells( if validate_freeze_panes(freeze_panes): wks.freeze_panes(*(freeze_panes)) + min_r = None + min_c = None + max_r = None + max_c = None + for cell in cells: val, fmt = self._value_with_fmt(cell.val) @@ -271,14 +277,35 @@ def _write_cells( style = self.book.add_format(_XlsxStyler.convert(cell.style, fmt)) style_dict[stylekey] = style + abs_row = startrow + cell.row + abs_col = startcol + cell.col + + # track bounds + if min_r is None or abs_row < min_r: + min_r = abs_row + if min_c is None or abs_col < min_c: + min_c = abs_col + if max_r is None or abs_row > max_r: + max_r = abs_row + if max_c is None or abs_col > max_c: + max_c = abs_col + if cell.mergestart is not None and cell.mergeend is not None: wks.merge_range( - startrow + cell.row, - startcol + cell.col, + abs_row, + abs_col, startrow + cell.mergestart, startcol + cell.mergeend, val, style, ) else: - wks.write(startrow + cell.row, startcol + cell.col, val, style) + wks.write(abs_row, abs_col, val, style) + + if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None: + # Apply autofilter over the used range. xlsxwriter uses 0-based indices. + try: + wks.autofilter(min_r, min_c, max_r, max_c) + except Exception: + # Be resilient if engine version doesn't support or range invalid + pass diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index d4d47253a5f82..f8978feb4a2a6 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -884,6 +884,7 @@ def write( engine: str | None = None, storage_options: StorageOptions | None = None, engine_kwargs: dict | None = None, + autofilter: bool = False, ) -> None: """ writer : path-like, file-like, or ExcelWriter object @@ -938,6 +939,7 @@ def write( startrow=startrow, startcol=startcol, freeze_panes=freeze_panes, + autofilter=autofilter, ) finally: # make sure to close opened file handles From b5faff19e0d95bf125ed670f8fa7d121c2e4e71d Mon Sep 17 00:00:00 2001 From: antznette1 Date: Fri, 31 Oct 2025 00:31:54 +0100 Subject: [PATCH 2/5] TST/DOC: add tests for to_excel(autofilter=True) for openpyxl/xlsxwriter and a user guide snippet --- pandas/tests/io/excel/test_openpyxl.py | 12 ++++++++++++ pandas/tests/io/excel/test_xlsxwriter.py | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 5b4bbb9e686d3..f3dc1c857a4ad 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -155,6 +155,18 @@ def test_engine_kwargs_append_data_only(tmp_excel, data_only, expected): ) +def test_to_excel_autofilter_openpyxl(tmp_excel): + # Ensure that writing with autofilter=True sets auto_filter.ref + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.to_excel(tmp_excel, engine="openpyxl", index=False, autofilter=True) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + # Expect filter over the full range, e.g. A1:B3 (header + 2 rows) + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + + @pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"]) @pytest.mark.parametrize("kwarg_value", [True, False]) def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value): diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index b2e6c845e5019..8e19df46ff1f3 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -84,3 +84,24 @@ def test_book_and_sheets_consistent(tmp_excel): assert writer.sheets == {} sheet = writer.book.add_worksheet("test_name") assert writer.sheets == {"test_name": sheet} + + +def test_to_excel(tmp_excel): + DataFrame([[1, 2]]).to_excel(tmp_excel) + + +def test_to_excel_autofilter_xlsxwriter(tmp_excel): + pytest.importorskip("xlsxwriter") + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + # Write with xlsxwriter, verify via openpyxl that an autofilter exists + df.to_excel(tmp_excel, engine="xlsxwriter", index=False, autofilter=True) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + finally: + wb.close() \ No newline at end of file From 59f2bc46350d8ff5adae13fe9a5efdf7f1c9e933 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Sun, 2 Nov 2025 02:37:57 +0100 Subject: [PATCH 3/5] Address reviewer feedback on autofilter PR - Remove duplicate to_excel function code in generic.py - Add NotImplementedError for odfpy engine when autofilter=True - Remove broad exception handling from autofilter implementations - Add comprehensive tests for nonzero startrow/startcol - Add tests for MultiIndex columns with merge_cells=True and False - Improve tests to verify each column has autofilter - Remove redundant test_to_excel test - Remove redundant pytest.importorskip from test functions --- pandas/core/generic.py | 143 ----------------------- pandas/io/excel/_odswriter.py | 7 ++ pandas/io/excel/_openpyxl.py | 15 +-- pandas/io/excel/_xlsxwriter.py | 6 +- pandas/tests/io/excel/test_odswriter.py | 10 ++ pandas/tests/io/excel/test_openpyxl.py | 68 +++++++++++ pandas/tests/io/excel/test_xlsxwriter.py | 85 +++++++++++++- 7 files changed, 172 insertions(+), 162 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 16588c2184f9a..6e5d4501a4b33 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2313,149 +2313,6 @@ def to_excel( if not isinstance(excel_writer, ExcelWriter): # we need to close the writer if we created it excel_writer.close() - ) -> None: - """ - Write {klass} to an Excel sheet. - - To write a single {klass} to an Excel .xlsx file it is only necessary to - specify a target file name. To write to multiple sheets it is necessary to - create an `ExcelWriter` object with a target file name, and specify a sheet - in the file to write to. - - Multiple sheets may be written to by specifying unique `sheet_name`. - With all data written to the file it is necessary to save the changes. - Note that creating an `ExcelWriter` object with a file name that already - exists will result in the contents of the existing file being erased. - - Parameters - ---------- - excel_writer : path-like, file-like, or ExcelWriter object - File path or existing ExcelWriter. - sheet_name : str, default 'Sheet1' - Name of sheet which will contain DataFrame. - na_rep : str, default '' - Missing data representation. - float_format : str, optional - Format string for floating point numbers. For example - ``float_format="%.2f"`` will format 0.1234 to 0.12. - columns : sequence or list of str, optional - Columns to write. - header : bool or list of str, default True - Write out the column names. If a list of string is given it is - assumed to be aliases for the column names. - index : bool, default True - Write row names (index). - index_label : str or sequence, optional - Column label for index column(s) if desired. If not specified, and - `header` and `index` are True, then the index names are used. A - sequence should be given if the DataFrame uses MultiIndex. - startrow : int, default 0 - Upper left cell row to dump data frame. - startcol : int, default 0 - Upper left cell column to dump data frame. - engine : str, optional - Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this - via the options ``io.excel.xlsx.writer`` or - ``io.excel.xlsm.writer``. - - merge_cells : bool or 'columns', default False - If True, write MultiIndex index and columns as merged cells. - If 'columns', merge MultiIndex column cells only. - {encoding_parameter} - inf_rep : str, default 'inf' - Representation for infinity (there is no native representation for - infinity in Excel). - {verbose_parameter} - freeze_panes : tuple of int (length 2), optional - Specifies the one-based bottommost row and rightmost column that - is to be frozen. - {storage_options} - - .. versionadded:: {storage_options_versionadded} - {extra_parameters} - See Also - -------- - to_csv : Write DataFrame to a comma-separated values (csv) file. - ExcelWriter : Class for writing DataFrame objects into excel sheets. - read_excel : Read an Excel file into a pandas DataFrame. - read_csv : Read a comma-separated values (csv) file into DataFrame. - io.formats.style.Styler.to_excel : Add styles to Excel sheet. - - Notes - ----- - For compatibility with :meth:`~DataFrame.to_csv`, - to_excel serializes lists and dicts to strings before writing. - - Once a workbook has been saved it is not possible to write further - data without rewriting the whole workbook. - - pandas will check the number of rows, columns, - and cell character count does not exceed Excel's limitations. - All other limitations must be checked by the user. - - Examples - -------- - - Create, write to and save a workbook: - - >>> df1 = pd.DataFrame( - ... [["a", "b"], ["c", "d"]], - ... index=["row 1", "row 2"], - ... columns=["col 1", "col 2"], - ... ) - >>> df1.to_excel("output.xlsx") # doctest: +SKIP - - To specify the sheet name: - - >>> df1.to_excel("output.xlsx", sheet_name="Sheet_name_1") # doctest: +SKIP - - If you wish to write to more than one sheet in the workbook, it is - necessary to specify an ExcelWriter object: - - >>> df2 = df1.copy() - >>> with pd.ExcelWriter("output.xlsx") as writer: # doctest: +SKIP - ... df1.to_excel(writer, sheet_name="Sheet_name_1") - ... df2.to_excel(writer, sheet_name="Sheet_name_2") - - ExcelWriter can also be used to append to an existing Excel file: - - >>> with pd.ExcelWriter("output.xlsx", mode="a") as writer: # doctest: +SKIP - ... df1.to_excel(writer, sheet_name="Sheet_name_3") - - To set the library that is used to write the Excel file, - you can pass the `engine` keyword (the default engine is - automatically chosen depending on the file extension): - - >>> df1.to_excel("output1.xlsx", engine="xlsxwriter") # doctest: +SKIP - """ - if engine_kwargs is None: - engine_kwargs = {} - - df = self if isinstance(self, ABCDataFrame) else self.to_frame() - - from pandas.io.formats.excel import ExcelFormatter - - formatter = ExcelFormatter( - df, - na_rep=na_rep, - cols=columns, - header=header, - float_format=float_format, - index=index, - index_label=index_label, - merge_cells=merge_cells, - inf_rep=inf_rep, - ) - formatter.write( - excel_writer, - sheet_name=sheet_name, - startrow=startrow, - startcol=startcol, - freeze_panes=freeze_panes, - engine=engine, - storage_options=storage_options, - engine_kwargs=engine_kwargs, - ) @final @doc( diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index e9a06076f3aff..dd5112432ae69 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -99,10 +99,17 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter: bool = False, ) -> None: """ Write the frame cells using odf """ + if autofilter: + raise NotImplementedError( + "Autofilter is not supported with the 'odf' engine. " + "Please use 'openpyxl' or 'xlsxwriter' engine instead." + ) + from odf.table import ( Table, TableCell, diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 0293c9d99c679..6376bd3bdf2d8 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -552,15 +552,12 @@ def _write_cells( setattr(xcell, k, v) if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None: - try: - # Convert numeric bounds to Excel-style range e.g. A1:D10 - from openpyxl.utils import get_column_letter - - start_ref = f"{get_column_letter(min_c)}{min_r}" - end_ref = f"{get_column_letter(max_c)}{max_r}" - wks.auto_filter.ref = f"{start_ref}:{end_ref}" - except Exception: - pass + # Convert numeric bounds to Excel-style range e.g. A1:D10 + from openpyxl.utils import get_column_letter + + start_ref = f"{get_column_letter(min_c)}{min_r}" + end_ref = f"{get_column_letter(max_c)}{max_r}" + wks.auto_filter.ref = f"{start_ref}:{end_ref}" class OpenpyxlReader(BaseExcelReader["Workbook"]): diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 851ccf0a8fd32..d9df2595cb32c 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -304,8 +304,4 @@ def _write_cells( if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None: # Apply autofilter over the used range. xlsxwriter uses 0-based indices. - try: - wks.autofilter(min_r, min_c, max_r, max_c) - except Exception: - # Be resilient if engine version doesn't support or range invalid - pass + wks.autofilter(min_r, min_c, max_r, max_c) diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py index 7843bb59f97cf..502c04f9781cc 100644 --- a/pandas/tests/io/excel/test_odswriter.py +++ b/pandas/tests/io/excel/test_odswriter.py @@ -104,3 +104,13 @@ def test_cell_value_type( cell = sheet_cells[0] assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value + + +def test_to_excel_autofilter_odfpy_raises(tmp_excel): + # Test that autofilter=True raises NotImplementedError with odfpy engine + from pandas import DataFrame + + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + msg = "Autofilter is not supported with the 'odf' engine" + with pytest.raises(NotImplementedError, match=msg): + df.to_excel(tmp_excel, engine="odf", autofilter=True) \ No newline at end of file diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index f3dc1c857a4ad..1fb6fdae3b5de 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -165,6 +165,74 @@ def test_to_excel_autofilter_openpyxl(tmp_excel): # Expect filter over the full range, e.g. A1:B3 (header + 2 rows) assert ws.auto_filter is not None assert ws.auto_filter.ref is not None + # Verify filter covers all columns (A and B) + assert "A" in ws.auto_filter.ref + assert "B" in ws.auto_filter.ref + + +def test_to_excel_autofilter_startrow_startcol_openpyxl(tmp_excel): + # Test autofilter with nonzero startrow and startcol + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.to_excel( + tmp_excel, + engine="openpyxl", + index=False, + autofilter=True, + startrow=2, + startcol=1, + ) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Filter should be offset by startrow=2 and startcol=1 (B3:D5) + assert ws.auto_filter.ref.startswith("B") + assert "3" in ws.auto_filter.ref + + +def test_to_excel_autofilter_multiindex_merge_cells_openpyxl(tmp_excel): + # Test autofilter with MultiIndex columns and merge_cells=True + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="openpyxl", + index=False, + autofilter=True, + merge_cells=True, + ) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + + +def test_to_excel_autofilter_multiindex_no_merge_openpyxl(tmp_excel): + # Test autofilter with MultiIndex columns and merge_cells=False + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="openpyxl", + index=False, + autofilter=True, + merge_cells=False, + ) + + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None @pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"]) diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index 8e19df46ff1f3..62822ae3d7291 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -86,18 +86,93 @@ def test_book_and_sheets_consistent(tmp_excel): assert writer.sheets == {"test_name": sheet} -def test_to_excel(tmp_excel): - DataFrame([[1, 2]]).to_excel(tmp_excel) - - def test_to_excel_autofilter_xlsxwriter(tmp_excel): - pytest.importorskip("xlsxwriter") openpyxl = pytest.importorskip("openpyxl") df = DataFrame({"A": [1, 2], "B": [3, 4]}) # Write with xlsxwriter, verify via openpyxl that an autofilter exists df.to_excel(tmp_excel, engine="xlsxwriter", index=False, autofilter=True) + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Verify filter covers all columns (A and B) + assert "A" in ws.auto_filter.ref + assert "B" in ws.auto_filter.ref + finally: + wb.close() + + +def test_to_excel_autofilter_startrow_startcol_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.to_excel( + tmp_excel, + engine="xlsxwriter", + index=False, + autofilter=True, + startrow=2, + startcol=1, + ) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + # Filter should be offset by startrow=2 and startcol=1 (B3:D5) + assert ws.auto_filter.ref.startswith("B") + assert "3" in ws.auto_filter.ref + finally: + wb.close() + + +def test_to_excel_autofilter_multiindex_merge_cells_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="xlsxwriter", + index=False, + autofilter=True, + merge_cells=True, + ) + + wb = openpyxl.load_workbook(tmp_excel) + try: + ws = wb[wb.sheetnames[0]] + assert ws.auto_filter is not None + assert ws.auto_filter.ref is not None + finally: + wb.close() + + +def test_to_excel_autofilter_multiindex_no_merge_xlsxwriter(tmp_excel): + openpyxl = pytest.importorskip("openpyxl") + + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ), + ) + df.to_excel( + tmp_excel, + engine="xlsxwriter", + index=False, + autofilter=True, + merge_cells=False, + ) + wb = openpyxl.load_workbook(tmp_excel) try: ws = wb[wb.sheetnames[0]] From e4012f037806c2fda9d913121d7fe45591977a3e Mon Sep 17 00:00:00 2001 From: antznette1 Date: Tue, 4 Nov 2025 23:29:01 +0100 Subject: [PATCH 4/5] FIX: Fix docstring formatting, missing imports, and undefined variables --- pandas/core/generic.py | 25 +++++++++++++++--------- pandas/tests/io/excel/test_xlsxwriter.py | 3 ++- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 66e292476c21f..0553141d7d151 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -182,6 +182,8 @@ Window, ) +# Import ExcelFormatter at runtime since it's used in to_excel method +from pandas.io.formats.excel import ExcelFormatter from pandas.io.formats.format import ( DataFrameFormatter, DataFrameRenderer, @@ -189,8 +191,8 @@ from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: - from collections.abc import Callable from collections.abc import ( + Callable, Hashable, Iterator, Mapping, @@ -202,15 +204,17 @@ from pandas import ( DataFrame, - ExcelWriter, HDFStore, Series, ) from pandas.core.indexers.objects import BaseIndexer from pandas.core.resample import Resampler +# Import ExcelWriter at runtime since it's used in to_excel method import textwrap +from pandas import ExcelWriter + # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = {**_shared_docs} @@ -773,6 +777,7 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: """ labels = ensure_index(labels) self._mgr.set_axis(axis, labels) + @final @doc(klass=_shared_doc_kwargs["klass"]) def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: @@ -1514,6 +1519,7 @@ def __bool__(self) -> NoReturn: f"The truth value of a {type(self).__name__} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all()." ) + @final def abs(self) -> Self: """ @@ -2265,7 +2271,7 @@ def to_excel( Examples -------- - >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df = pd.DataFrame({{"A": [1, 2, 3], "B": [4, 5, 6]}}) >>> df.to_excel("pandas_simple.xlsx") >>> df.to_excel("pandas_simple.xlsx", engine="openpyxl") """ @@ -2279,11 +2285,7 @@ def to_excel( excel_writer = ExcelWriter( excel_writer, engine=engine, - mode=mode, - if_sheet_exists=if_sheet_exists, engine_kwargs=engine_kwargs, - date_format=date_format, - datetime_format=datetime_format, storage_options=storage_options, ) @@ -5616,6 +5618,7 @@ def f(x) -> bool: return self.loc(axis=axis)[values] else: raise TypeError("Must pass either `items`, `like`, or `regex`") + @final def head(self, n: int = 5) -> Self: """ @@ -6088,7 +6091,8 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self: ---------- other : the object from which to get the attributes that we are going to propagate. If ``other`` has an ``input_objs`` attribute, then - this attribute must contain an iterable of objects, each with an ``attrs`` attribute. + this attribute must contain an iterable of objects, each with an + ``attrs`` attribute. method : str, optional A passed method name providing context on where ``__finalize__`` was called. @@ -9693,6 +9697,7 @@ def align( left = left.__finalize__(self) right = right.__finalize__(other) return left, right + @final def _align_frame( self, @@ -12811,6 +12816,8 @@ def last_valid_index(self) -> Hashable: The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA. """ + + def make_doc(name: str, ndim: int) -> str: """ Generate the docstring for a Series/DataFrame reduction. @@ -13177,4 +13184,4 @@ def make_doc(name: str, ndim: int) -> str: examples=examples, **kwargs, ) - return docstr \ No newline at end of file + return docstr diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index 62822ae3d7291..2637337e3f0c7 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -3,6 +3,7 @@ import pytest +import pandas as pd from pandas import DataFrame from pandas.io.excel import ExcelWriter @@ -179,4 +180,4 @@ def test_to_excel_autofilter_multiindex_no_merge_xlsxwriter(tmp_excel): assert ws.auto_filter is not None assert ws.auto_filter.ref is not None finally: - wb.close() \ No newline at end of file + wb.close() From 763090d004459b8cdeaf0f6f53fdefc3fc883ec7 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 5 Nov 2025 01:03:20 +0100 Subject: [PATCH 5/5] FIX: Move ExcelWriter and ExcelFormatter imports to avoid circular import - Keep ExcelWriter and ExcelFormatter in TYPE_CHECKING for type hints - Import both at runtime inside to_excel method to avoid circular import --- pandas/core/generic.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0553141d7d151..32976a3cc578e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -182,8 +182,6 @@ Window, ) -# Import ExcelFormatter at runtime since it's used in to_excel method -from pandas.io.formats.excel import ExcelFormatter from pandas.io.formats.format import ( DataFrameFormatter, DataFrameRenderer, @@ -204,17 +202,15 @@ from pandas import ( DataFrame, + ExcelWriter, HDFStore, Series, ) from pandas.core.indexers.objects import BaseIndexer from pandas.core.resample import Resampler -# Import ExcelWriter at runtime since it's used in to_excel method import textwrap -from pandas import ExcelWriter - # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = {**_shared_docs} @@ -2275,6 +2271,9 @@ def to_excel( >>> df.to_excel("pandas_simple.xlsx") >>> df.to_excel("pandas_simple.xlsx", engine="openpyxl") """ + # Import ExcelWriter here to avoid circular import + from pandas import ExcelWriter + if isinstance(excel_writer, ExcelWriter): if engine is not None: raise ValueError( @@ -2289,6 +2288,9 @@ def to_excel( storage_options=storage_options, ) + # Import ExcelFormatter here to avoid circular import + from pandas.io.formats.excel import ExcelFormatter + formatter = ExcelFormatter( self, na_rep=na_rep,