Skip to content

Commit

Permalink
CLN: Make ExcelWriter more pluggable
Browse files Browse the repository at this point in the history
Make ExcelWriter an ABC and add ExcelWriter config to core/config_init

ENH: Allow Panel.to_excel to pass keyword arguments
  • Loading branch information
jtratner committed Sep 13, 2013
1 parent c0198b4 commit 42e06a7
Show file tree
Hide file tree
Showing 9 changed files with 415 additions and 139 deletions.
18 changes: 18 additions & 0 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,24 @@ one can use the ExcelWriter class, as in the following example:
df2.to_excel(writer, sheet_name='sheet2')
writer.save()
.. _io.excel.writers:
Excel writer engines
~~~~~~~~~~~~~~~~~~~~
.. versionadded:: 0.13
``pandas`` chooses an Excel writer via two methods:
1. the ``engine`` keyword argument
2. the filename extension (via the default specified in config options)
``pandas`` only supports ``openpyxl`` for ``.xlsx`` and ``.xlsm`` files and
``xlwt`` for ``.xls`` files. If you have multiple engines installed, you can choose the
engine to use by default via the options ``io.excel.xlsx.writer`` and
``io.excel.xls.writer``.
.. _io.hdf5:
HDF5 (PyTables)
Expand Down
8 changes: 8 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ Improvements to existing features
- Add ``axis`` and ``level`` keywords to ``where``, so that the ``other`` argument
can now be an alignable pandas object.
- ``to_datetime`` with a format of '%Y%m%d' now parses much faster
- It's now easier to hook new Excel writers into pandas (just subclass
``ExcelWriter`` and register your engine). You can specify an ``engine`` in
``to_excel`` or in ``ExcelWriter``. You can also specify which writers you
want to use by default with config options ``io.excel.xlsx.writer`` and
``io.excel.xls.writer``. (:issue:`4745`, :issue:`4750`)
- ``Panel.to_excel()`` now accepts keyword arguments that will be passed to
its ``DataFrame``'s ``to_excel()`` methods. (:issue:`4750`)

API Changes
~~~~~~~~~~~
Expand Down Expand Up @@ -194,6 +201,7 @@ API Changes
- default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`)
- moved timedeltas support to pandas.tseries.timedeltas.py; add timedeltas string parsing,
add top-level ``to_timedelta`` function
- ``NDFrame`` now is compatible with Python's toplevel ``abs()`` function (:issue:`4821`).

Internal Refactoring
~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _get_single_key(pat, silent):
if len(keys) == 0:
if not silent:
_warn_if_deprecated(pat)
raise KeyError('No such keys(s)')
raise KeyError('No such keys(s): %r' % pat)
if len(keys) > 1:
raise KeyError('Pattern matched multiple keys')
key = keys[0]
Expand Down
21 changes: 21 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,3 +279,24 @@ def use_inf_as_null_cb(key):
with cf.config_prefix('mode'):
cf.register_option('use_inf_as_null', False, use_inf_as_null_doc,
cb=use_inf_as_null_cb)


# Set up the io.excel specific configuration.
writer_engine_doc = """
: string
The default Excel writer engine for '{ext}' files. Available options: '{default}' (the default){others}.
"""

with cf.config_prefix('io.excel'):
# going forward, will be additional writers
for ext, options in [('xls', ['xlwt']),
('xlsm', ['openpyxl']),
('xlsx', ['openpyxl'])]:
default = options.pop(0)
if options:
options = " " + ", ".join(options)
else:
options = ""
doc = writer_engine_doc.format(ext=ext, default=default,
others=options)
cf.register_option(ext + '.writer', default, doc, validator=str)
11 changes: 2 additions & 9 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,15 +1146,8 @@ class ExcelFormatter(object):
sequence should be given if the DataFrame uses MultiIndex.
"""

def __init__(self,
df,
na_rep='',
float_format=None,
cols=None,
header=True,
index=True,
index_label=None
):
def __init__(self, df, na_rep='', float_format=None, cols=None,
header=True, index=True, index_label=None):
self.df = df
self.rowcounter = 0
self.na_rep = na_rep
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1354,7 +1354,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,

def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
float_format=None, cols=None, header=True, index=True,
index_label=None, startrow=0, startcol=0):
index_label=None, startrow=0, startcol=0, engine=None):
"""
Write DataFrame to a excel sheet
Expand All @@ -1381,6 +1381,10 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
sequence should be given if the DataFrame uses MultiIndex.
startow : upper left cell row to dump data frame
startcol : upper left cell column to dump data frame
engine : string, default None
write engine to use - you can also set this via the options
``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
``io.excel.xlsm.writer``.
Notes
Expand All @@ -1396,7 +1400,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
from pandas.io.excel import ExcelWriter
need_save = False
if isinstance(excel_writer, compat.string_types):
excel_writer = ExcelWriter(excel_writer)
excel_writer = ExcelWriter(excel_writer, engine=engine)
need_save = True

formatter = fmt.ExcelFormatter(self,
Expand Down
39 changes: 35 additions & 4 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,22 +458,53 @@ def to_sparse(self, fill_value=None, kind='block'):
default_kind=kind,
default_fill_value=fill_value)

def to_excel(self, path, na_rep=''):
def to_excel(self, path, na_rep='', engine=None, **kwargs):
"""
Write each DataFrame in Panel to a separate excel sheet
Parameters
----------
excel_writer : string or ExcelWriter object
path : string or ExcelWriter object
File path or existing ExcelWriter
na_rep : string, default ''
Missing data representation
engine : string, default None
write engine to use - you can also set this via the options
``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
``io.excel.xlsm.writer``.
Keyword Arguments
-----------------
float_format : string, default None
Format string for floating point numbers
cols : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out column names. If a list of string is given it is
assumed to be aliases for the column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.
startow : upper left cell row to dump data frame
startcol : upper left cell column to dump data frame
Keyword arguments (and na_rep) are passed to the ``to_excel`` method
for each DataFrame written.
"""
from pandas.io.excel import ExcelWriter
writer = ExcelWriter(path)

if isinstance(path, compat.string_types):
writer = ExcelWriter(path, engine=engine)
else:
writer = path
kwargs['na_rep'] = na_rep

for item, df in compat.iteritems(self):
name = str(item)
df.to_excel(writer, name, na_rep=na_rep)
df.to_excel(writer, name, **kwargs)
writer.save()

def as_matrix(self):
Expand Down
Loading

0 comments on commit 42e06a7

Please sign in to comment.