diff --git a/RELEASE.rst b/RELEASE.rst index 9cd91c6ef7401..6d34faa2aa93b 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -63,6 +63,8 @@ pandas 0.10.1 - Attempt to parse ISO8601 format dates when parse_dates=True in read_csv for major performance boost in such cases (GH2698_) - Add methods ``neg`` and ``inv`` to Series + - Implement ``kind`` option in ``ExcelFile`` to indicate whether it's an XLS + or XLSX file (GH2613_) **Bug fixes** @@ -114,6 +116,7 @@ pandas 0.10.1 .. _GH2599: https://github.com/pydata/pandas/issues/2599 .. _GH2604: https://github.com/pydata/pandas/issues/2604 .. _GH2576: https://github.com/pydata/pandas/issues/2576 +.. _GH2613: https://github.com/pydata/pandas/issues/2613 .. _GH2616: https://github.com/pydata/pandas/issues/2616 .. _GH2621: https://github.com/pydata/pandas/issues/2621 .. _GH2625: https://github.com/pydata/pandas/issues/2625 diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a2aca3be39811..6966b0ab76aed 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -298,6 +298,9 @@ def _getitem_lowerdim(self, tup): except KeyError: raise e1 + if len(tup) > self.obj.ndim: + raise IndexingError + # to avoid wasted computation # df.ix[d1:d2, 0] -> columns first (True) # df.ix[0, ['C', 'B', A']] -> rows first (False) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 0447c05cb55cb..20dd24829461a 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1798,18 +1798,23 @@ class ExcelFile(object): ---------- path : string or file-like object Path to xls or xlsx file + kind : {'xls', 'xlsx', None}, default None """ - def __init__(self, path_or_buf): - self.use_xlsx = True + def __init__(self, path_or_buf, kind=None): + self.kind = kind + self.use_xlsx = kind == 'xls' + self.path_or_buf = path_or_buf self.tmpfile = None if isinstance(path_or_buf, basestring): - if path_or_buf.endswith('.xls'): + if kind == 'xls' or (kind is None and + path_or_buf.endswith('.xls')): self.use_xlsx = False import xlrd self.book = xlrd.open_workbook(path_or_buf) else: + self.use_xlsx = True try: from openpyxl.reader.excel import load_workbook self.book = load_workbook(path_or_buf, use_iterators=True) @@ -1818,14 +1823,23 @@ def __init__(self, path_or_buf): else: data = path_or_buf.read() - try: + if self.kind == 'xls': import xlrd self.book = xlrd.open_workbook(file_contents=data) - self.use_xlsx = False - except Exception: + elif self.kind == 'xlsx': from openpyxl.reader.excel import load_workbook buf = py3compat.BytesIO(data) self.book = load_workbook(buf, use_iterators=True) + else: + try: + import xlrd + self.book = xlrd.open_workbook(file_contents=data) + self.use_xlsx = False + except Exception: + self.use_xlsx = True + from openpyxl.reader.excel import load_workbook + buf = py3compat.BytesIO(data) + self.book = load_workbook(buf, use_iterators=True) def __repr__(self): return object.__repr__(self) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 1b29a4bdd9bf2..bf7161cc23e86 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -234,6 +234,20 @@ def test_xlsx_table(self): tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5) + def test_specify_kind(self): + xlsx_file = os.path.join(self.dirpath, 'test.xlsx') + xls_file = os.path.join(self.dirpath, 'test.xls') + + self.assertRaises(Exception, ExcelFile, xlsx_file, kind='xls') + self.assertRaises(Exception, ExcelFile, xls_file, kind='xlsx') + + ExcelFile(open(xlsx_file, 'rb'), kind='xlsx') + ExcelFile(open(xls_file, 'rb'), kind='xls') + self.assertRaises(Exception, ExcelFile, open(xlsx_file, 'rb'), + kind='xls') + self.assertRaises(Exception, ExcelFile, open(xls_file, 'rb'), + kind='xlsx') + def read_csv(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = 'python'