Skip to content

Commit

Permalink
ENH: kind option for Excel file. close #2613
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Jan 20, 2013
1 parent ad9bba3 commit 61f7320
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 6 deletions.
3 changes: 3 additions & 0 deletions RELEASE.rst
Expand Up @@ -63,6 +63,8 @@ pandas 0.10.1
- Attempt to parse ISO8601 format dates when parse_dates=True in read_csv for
major performance boost in such cases (GH2698_)
- Add methods ``neg`` and ``inv`` to Series
- Implement ``kind`` option in ``ExcelFile`` to indicate whether it's an XLS
or XLSX file (GH2613_)

**Bug fixes**

Expand Down Expand Up @@ -114,6 +116,7 @@ pandas 0.10.1
.. _GH2599: https://github.com/pydata/pandas/issues/2599
.. _GH2604: https://github.com/pydata/pandas/issues/2604
.. _GH2576: https://github.com/pydata/pandas/issues/2576
.. _GH2613: https://github.com/pydata/pandas/issues/2613
.. _GH2616: https://github.com/pydata/pandas/issues/2616
.. _GH2621: https://github.com/pydata/pandas/issues/2621
.. _GH2625: https://github.com/pydata/pandas/issues/2625
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexing.py
Expand Up @@ -298,6 +298,9 @@ def _getitem_lowerdim(self, tup):
except KeyError:
raise e1

if len(tup) > self.obj.ndim:
raise IndexingError

# to avoid wasted computation
# df.ix[d1:d2, 0] -> columns first (True)
# df.ix[0, ['C', 'B', A']] -> rows first (False)
Expand Down
26 changes: 20 additions & 6 deletions pandas/io/parsers.py
Expand Up @@ -1798,18 +1798,23 @@ class ExcelFile(object):
----------
path : string or file-like object
Path to xls or xlsx file
kind : {'xls', 'xlsx', None}, default None
"""
def __init__(self, path_or_buf):
self.use_xlsx = True
def __init__(self, path_or_buf, kind=None):
self.kind = kind
self.use_xlsx = kind == 'xls'

self.path_or_buf = path_or_buf
self.tmpfile = None

if isinstance(path_or_buf, basestring):
if path_or_buf.endswith('.xls'):
if kind == 'xls' or (kind is None and
path_or_buf.endswith('.xls')):
self.use_xlsx = False
import xlrd
self.book = xlrd.open_workbook(path_or_buf)
else:
self.use_xlsx = True
try:
from openpyxl.reader.excel import load_workbook
self.book = load_workbook(path_or_buf, use_iterators=True)
Expand All @@ -1818,14 +1823,23 @@ def __init__(self, path_or_buf):
else:
data = path_or_buf.read()

try:
if self.kind == 'xls':
import xlrd
self.book = xlrd.open_workbook(file_contents=data)
self.use_xlsx = False
except Exception:
elif self.kind == 'xlsx':
from openpyxl.reader.excel import load_workbook
buf = py3compat.BytesIO(data)
self.book = load_workbook(buf, use_iterators=True)
else:
try:
import xlrd
self.book = xlrd.open_workbook(file_contents=data)
self.use_xlsx = False
except Exception:
self.use_xlsx = True
from openpyxl.reader.excel import load_workbook
buf = py3compat.BytesIO(data)
self.book = load_workbook(buf, use_iterators=True)

def __repr__(self):
return object.__repr__(self)
Expand Down
14 changes: 14 additions & 0 deletions pandas/io/tests/test_excel.py
Expand Up @@ -234,6 +234,20 @@ def test_xlsx_table(self):
tm.assert_frame_equal(df4, df.ix[:-1])
tm.assert_frame_equal(df4, df5)

def test_specify_kind(self):
xlsx_file = os.path.join(self.dirpath, 'test.xlsx')
xls_file = os.path.join(self.dirpath, 'test.xls')

self.assertRaises(Exception, ExcelFile, xlsx_file, kind='xls')
self.assertRaises(Exception, ExcelFile, xls_file, kind='xlsx')

ExcelFile(open(xlsx_file, 'rb'), kind='xlsx')
ExcelFile(open(xls_file, 'rb'), kind='xls')
self.assertRaises(Exception, ExcelFile, open(xlsx_file, 'rb'),
kind='xls')
self.assertRaises(Exception, ExcelFile, open(xls_file, 'rb'),
kind='xlsx')

def read_csv(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = 'python'
Expand Down

0 comments on commit 61f7320

Please sign in to comment.