ENH: kind option for Excel file. close #2613

pandas-dev · Jan 20, 2013 · 61f7320 · 61f7320
1 parent ad9bba3
commit 61f7320
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 6 deletions.
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -63,6 +63,8 @@ pandas 0.10.1
   - Attempt to parse ISO8601 format dates when parse_dates=True in read_csv for
     major performance boost in such cases (GH2698_)
   - Add methods ``neg`` and ``inv`` to Series
+  - Implement ``kind`` option in ``ExcelFile`` to indicate whether it's an XLS
+    or XLSX file (GH2613_)
 
 **Bug fixes**
 
@@ -114,6 +116,7 @@ pandas 0.10.1
 .. _GH2599: https://github.com/pydata/pandas/issues/2599
 .. _GH2604: https://github.com/pydata/pandas/issues/2604
 .. _GH2576: https://github.com/pydata/pandas/issues/2576
+.. _GH2613: https://github.com/pydata/pandas/issues/2613
 .. _GH2616: https://github.com/pydata/pandas/issues/2616
 .. _GH2621: https://github.com/pydata/pandas/issues/2621
 .. _GH2625: https://github.com/pydata/pandas/issues/2625

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -298,6 +298,9 @@ def _getitem_lowerdim(self, tup):
                 except KeyError:
                     raise e1
 
+        if len(tup) > self.obj.ndim:
+            raise IndexingError
+
         # to avoid wasted computation
         # df.ix[d1:d2, 0] -> columns first (True)
         # df.ix[0, ['C', 'B', A']] -> rows first (False)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1798,18 +1798,23 @@ class ExcelFile(object):
     ----------
     path : string or file-like object
         Path to xls or xlsx file
+    kind : {'xls', 'xlsx', None}, default None
     """
-    def __init__(self, path_or_buf):
-        self.use_xlsx = True
+    def __init__(self, path_or_buf, kind=None):
+        self.kind = kind
+        self.use_xlsx = kind == 'xls'
+
         self.path_or_buf = path_or_buf
         self.tmpfile = None
 
         if isinstance(path_or_buf, basestring):
-            if path_or_buf.endswith('.xls'):
+            if kind == 'xls' or (kind is None and
+                                 path_or_buf.endswith('.xls')):
                 self.use_xlsx = False
                 import xlrd
                 self.book = xlrd.open_workbook(path_or_buf)
             else:
+                self.use_xlsx = True
                 try:
                     from openpyxl.reader.excel import load_workbook
                     self.book = load_workbook(path_or_buf, use_iterators=True)
@@ -1818,14 +1823,23 @@ def __init__(self, path_or_buf):
         else:
             data = path_or_buf.read()
 
-            try:
+            if self.kind == 'xls':
                 import xlrd
                 self.book = xlrd.open_workbook(file_contents=data)
-                self.use_xlsx = False
-            except Exception:
+            elif self.kind == 'xlsx':
                 from openpyxl.reader.excel import load_workbook
                 buf = py3compat.BytesIO(data)
                 self.book = load_workbook(buf, use_iterators=True)
+            else:
+                try:
+                    import xlrd
+                    self.book = xlrd.open_workbook(file_contents=data)
+                    self.use_xlsx = False
+                except Exception:
+                    self.use_xlsx = True
+                    from openpyxl.reader.excel import load_workbook
+                    buf = py3compat.BytesIO(data)
+                    self.book = load_workbook(buf, use_iterators=True)
 
     def __repr__(self):
         return object.__repr__(self)

diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -234,6 +234,20 @@ def test_xlsx_table(self):
         tm.assert_frame_equal(df4, df.ix[:-1])
         tm.assert_frame_equal(df4, df5)
 
+    def test_specify_kind(self):
+        xlsx_file = os.path.join(self.dirpath, 'test.xlsx')
+        xls_file = os.path.join(self.dirpath, 'test.xls')
+
+        self.assertRaises(Exception, ExcelFile, xlsx_file, kind='xls')
+        self.assertRaises(Exception, ExcelFile, xls_file, kind='xlsx')
+
+        ExcelFile(open(xlsx_file, 'rb'), kind='xlsx')
+        ExcelFile(open(xls_file, 'rb'), kind='xls')
+        self.assertRaises(Exception, ExcelFile, open(xlsx_file, 'rb'),
+                          kind='xls')
+        self.assertRaises(Exception, ExcelFile, open(xls_file, 'rb'),
+                          kind='xlsx')
+
     def read_csv(self, *args, **kwds):
         kwds = kwds.copy()
         kwds['engine'] = 'python'