CLN: Make ExcelWriter more pluggable

Make ExcelWriter an ABC and add ExcelWriter config to core/config_init ENH: Allow Panel.to_excel to pass keyword arguments
pandas-dev · Sep 13, 2013 · 42e06a7 · 42e06a7
1 parent c0198b4
commit 42e06a7
Show file tree

Hide file tree

Showing 9 changed files with 415 additions and 139 deletions.
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -1681,6 +1681,24 @@ one can use the ExcelWriter class, as in the following example:
    df2.to_excel(writer, sheet_name='sheet2')
    writer.save()
 
+.. _io.excel.writers:
+
+Excel writer engines
+~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.13
+
+``pandas`` chooses an Excel writer via two methods:
+
+1. the ``engine`` keyword argument
+2. the filename extension (via the default specified in config options)
+
+``pandas`` only supports ``openpyxl`` for ``.xlsx`` and ``.xlsm`` files and
+``xlwt`` for ``.xls`` files.  If you have multiple engines installed, you can choose the
+engine to use by default via the options ``io.excel.xlsx.writer`` and
+``io.excel.xls.writer``.
+
+
 .. _io.hdf5:
 
 HDF5 (PyTables)

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -106,6 +106,13 @@ Improvements to existing features
   - Add ``axis`` and ``level`` keywords to ``where``, so that the ``other`` argument
     can now be an alignable pandas object.
   - ``to_datetime`` with a format of '%Y%m%d' now parses much faster
+  - It's now easier to hook new Excel writers into pandas (just subclass
+    ``ExcelWriter`` and register your engine). You can specify an ``engine`` in
+    ``to_excel`` or in ``ExcelWriter``.  You can also specify which writers you
+    want to use by default with config options ``io.excel.xlsx.writer`` and
+    ``io.excel.xls.writer``. (:issue:`4745`, :issue:`4750`)
+  - ``Panel.to_excel()`` now accepts keyword arguments that will be passed to
+    its ``DataFrame``'s ``to_excel()`` methods. (:issue:`4750`)
 
 API Changes
 ~~~~~~~~~~~
@@ -194,6 +201,7 @@ API Changes
   - default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`)
   - moved timedeltas support to pandas.tseries.timedeltas.py; add timedeltas string parsing,
     add top-level ``to_timedelta`` function
+  - ``NDFrame`` now is compatible with Python's toplevel ``abs()`` function (:issue:`4821`).
 
 Internal Refactoring
 ~~~~~~~~~~~~~~~~~~~~

diff --git a/pandas/core/config.py b/pandas/core/config.py
@@ -73,7 +73,7 @@ def _get_single_key(pat, silent):
     if len(keys) == 0:
         if not silent:
             _warn_if_deprecated(pat)
-        raise KeyError('No such keys(s)')
+        raise KeyError('No such keys(s): %r' % pat)
     if len(keys) > 1:
         raise KeyError('Pattern matched multiple keys')
     key = keys[0]

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -279,3 +279,24 @@ def use_inf_as_null_cb(key):
 with cf.config_prefix('mode'):
     cf.register_option('use_inf_as_null', False, use_inf_as_null_doc,
                        cb=use_inf_as_null_cb)
+
+
+# Set up the io.excel specific configuration.
+writer_engine_doc = """
+: string
+    The default Excel writer engine for '{ext}' files. Available options: '{default}' (the default){others}.
+"""
+
+with cf.config_prefix('io.excel'):
+    # going forward, will be additional writers
+    for ext, options in [('xls', ['xlwt']),
+                         ('xlsm', ['openpyxl']),
+                         ('xlsx', ['openpyxl'])]:
+        default = options.pop(0)
+        if options:
+            options = " " + ", ".join(options)
+        else:
+            options = ""
+        doc = writer_engine_doc.format(ext=ext, default=default,
+                                       others=options)
+        cf.register_option(ext + '.writer', default, doc, validator=str)
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -1146,15 +1146,8 @@ class ExcelFormatter(object):
             sequence should be given if the DataFrame uses MultiIndex.
     """
 
-    def __init__(self,
-                 df,
-                 na_rep='',
-                 float_format=None,
-                 cols=None,
-                 header=True,
-                 index=True,
-                 index_label=None
-                 ):
+    def __init__(self, df, na_rep='', float_format=None, cols=None,
+                 header=True, index=True, index_label=None):
         self.df = df
         self.rowcounter = 0
         self.na_rep = na_rep

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1354,7 +1354,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
 
     def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
                  float_format=None, cols=None, header=True, index=True,
-                 index_label=None, startrow=0, startcol=0):
+                 index_label=None, startrow=0, startcol=0, engine=None):
         """
         Write DataFrame to a excel sheet
 
@@ -1381,6 +1381,10 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
             sequence should be given if the DataFrame uses MultiIndex.
         startow : upper left cell row to dump data frame
         startcol : upper left cell column to dump data frame
+        engine : string, default None
+            write engine to use - you can also set this via the options
+            ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
+            ``io.excel.xlsm.writer``.
 
 
         Notes
@@ -1396,7 +1400,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
         from pandas.io.excel import ExcelWriter
         need_save = False
         if isinstance(excel_writer, compat.string_types):
-            excel_writer = ExcelWriter(excel_writer)
+            excel_writer = ExcelWriter(excel_writer, engine=engine)
             need_save = True
 
         formatter = fmt.ExcelFormatter(self,

diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -458,22 +458,53 @@ def to_sparse(self, fill_value=None, kind='block'):
                            default_kind=kind,
                            default_fill_value=fill_value)
 
-    def to_excel(self, path, na_rep=''):
+    def to_excel(self, path, na_rep='', engine=None, **kwargs):
         """
         Write each DataFrame in Panel to a separate excel sheet
 
         Parameters
         ----------
-        excel_writer : string or ExcelWriter object
+        path : string or ExcelWriter object
             File path or existing ExcelWriter
         na_rep : string, default ''
             Missing data representation
+        engine : string, default None
+            write engine to use - you can also set this via the options
+            ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
+            ``io.excel.xlsm.writer``.
+
+        Keyword Arguments
+        -----------------
+        float_format : string, default None
+            Format string for floating point numbers
+        cols : sequence, optional
+            Columns to write
+        header : boolean or list of string, default True
+            Write out column names. If a list of string is given it is
+            assumed to be aliases for the column names
+        index : boolean, default True
+            Write row names (index)
+        index_label : string or sequence, default None
+            Column label for index column(s) if desired. If None is given, and
+            `header` and `index` are True, then the index names are used. A
+            sequence should be given if the DataFrame uses MultiIndex.
+        startow : upper left cell row to dump data frame
+        startcol : upper left cell column to dump data frame
+
+        Keyword arguments (and na_rep) are passed to the ``to_excel`` method
+        for each DataFrame written.
         """
         from pandas.io.excel import ExcelWriter
-        writer = ExcelWriter(path)
+
+        if isinstance(path, compat.string_types):
+            writer = ExcelWriter(path, engine=engine)
+        else:
+            writer = path
+        kwargs['na_rep'] = na_rep
+
         for item, df in compat.iteritems(self):
             name = str(item)
-            df.to_excel(writer, name, na_rep=na_rep)
+            df.to_excel(writer, name, **kwargs)
         writer.save()
 
     def as_matrix(self):