Merge branch 'test_branch' of https://github.com/aileronajay/pandas i…

…nto test_branch pull from github
jorisvandenbossche · Nov 12, 2016 · c5a87d8 · c5a87d8
2 parents f708c2e + ed21736
commit c5a87d8
Show file tree

Hide file tree

Showing 17 changed files with 34 additions and 20 deletions.
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -1467,6 +1467,10 @@ with duplicates dropped.
    idx1.symmetric_difference(idx2)
    idx1 ^ idx2
 
+.. note::
+
+   The resulting index from a set operation will be sorted in ascending order.
+
 Missing values
 ~~~~~~~~~~~~~~
 

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -30,7 +30,7 @@ New features
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
-
+- ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
 
 
 .. _whatsnew_0200.api_breaking:

diff --git a/pandas/io/clipboard.py b/pandas/io/clipboard.py
@@ -3,12 +3,16 @@
 from pandas.compat import StringIO, PY2
 
 
-def read_clipboard(**kwargs):  # pragma: no cover
-    """
+def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
+    r"""
     Read text from clipboard and pass to read_table. See read_table for the
     full argument list
 
-    If unspecified, `sep` defaults to '\s+'
+    Parameters
+    ----------
+    sep : str, default '\s+'.
+        A string or regex delimiter. The default of '\s+' denotes
+        one or more whitespace characters.
 
     Returns
     -------
@@ -29,7 +33,7 @@ def read_clipboard(**kwargs):  # pragma: no cover
         except:
             pass
 
-    # Excel copies into clipboard with \t seperation
+    # Excel copies into clipboard with \t separation
     # inspect no more then the 10 first lines, if they
     # all contain an equal number (>0) of tabs, infer
     # that this came from excel and set 'sep' accordingly
@@ -43,12 +47,12 @@ def read_clipboard(**kwargs):  # pragma: no cover
 
     counts = set([x.lstrip().count('\t') for x in lines])
     if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
-        kwargs['sep'] = '\t'
+        sep = '\t'
 
-    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
-        kwargs['sep'] = '\s+'
+    if sep is None and kwargs.get('delim_whitespace') is None:
+        sep = '\s+'
 
-    return read_table(StringIO(text), **kwargs)
+    return read_table(StringIO(text), sep=sep, **kwargs)
 
 
 def to_clipboard(obj, excel=None, sep=None, **kwargs):  # pragma: no cover

diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -21,7 +21,7 @@
 from pandas.tseries.period import Period
 from pandas import json
 from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
-                           string_types)
+                           string_types, OrderedDict)
 from pandas.core import config
 from pandas.formats.printing import pprint_thing
 import pandas.compat as compat
@@ -418,9 +418,9 @@ def _parse_cell(cell_contents, cell_typ):
             sheets = [sheetname]
 
         # handle same-type duplicates.
-        sheets = list(set(sheets))
+        sheets = list(OrderedDict.fromkeys(sheets).keys())
 
-        output = {}
+        output = OrderedDict()
 
         for asheetname in sheets:
             if verbose:

diff --git a/pandas/io/tests/data/test_multisheet.xls b/pandas/io/tests/data/test_multisheet.xls
diff --git a/pandas/io/tests/data/test_multisheet.xlsm b/pandas/io/tests/data/test_multisheet.xlsm
diff --git a/pandas/io/tests/data/test_multisheet.xlsx b/pandas/io/tests/data/test_multisheet.xlsx
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
@@ -630,18 +630,18 @@ def test_read_csv_parse_simple_list(self):
     def test_url(self):
         # HTTP(S)
         url = ('https://raw.github.com/pandas-dev/pandas/master/'
-               'pandas/io/tests/parser/data/salary.table.csv')
+               'pandas/io/tests/parser/data/salaries.csv')
         url_table = self.read_table(url)
         dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salary.table.csv')
+        localtable = os.path.join(dirpath, 'salaries.csv')
         local_table = self.read_table(localtable)
         tm.assert_frame_equal(url_table, local_table)
         # TODO: ftp testing
 
     @tm.slow
     def test_file(self):
         dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salary.table.csv')
+        localtable = os.path.join(dirpath, 'salaries.csv')
         local_table = self.read_table(localtable)
 
         try:

diff --git a/pandas/io/tests/parser/data/salary.table.csv → pandas/io/tests/parser/data/salaries.csv b/pandas/io/tests/parser/data/salary.table.csv → pandas/io/tests/parser/data/salaries.csv
diff --git a/pandas/io/tests/parser/data/salaries.csv.bz2 b/pandas/io/tests/parser/data/salaries.csv.bz2
diff --git a/pandas/io/tests/parser/data/salary.table.gz → pandas/io/tests/parser/data/salaries.csv.gz b/pandas/io/tests/parser/data/salary.table.gz → pandas/io/tests/parser/data/salaries.csv.gz
diff --git a/pandas/io/tests/parser/data/salaries.csv.xz b/pandas/io/tests/parser/data/salaries.csv.xz
diff --git a/pandas/io/tests/parser/data/salaries.csv.zip b/pandas/io/tests/parser/data/salaries.csv.zip
diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py
@@ -18,13 +18,13 @@ class TestUrlGz(tm.TestCase):
 
     def setUp(self):
         dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salary.table.csv')
+        localtable = os.path.join(dirpath, 'salaries.csv')
         self.local_table = read_table(localtable)
 
     @tm.network
     def test_url_gz(self):
         url = ('https://raw.github.com/pandas-dev/pandas/'
-               'master/pandas/io/tests/parser/data/salary.table.gz')
+               'master/pandas/io/tests/parser/data/salaries.csv.gz')
         url_table = read_table(url, compression="gzip", engine="python")
         tm.assert_frame_equal(url_table, self.local_table)
 

diff --git a/pandas/io/tests/test_clipboard.py b/pandas/io/tests/test_clipboard.py
@@ -74,6 +74,8 @@ def check_round_trip_frame(self, data_type, excel=None, sep=None):
     def test_round_trip_frame_sep(self):
         for dt in self.data_types:
             self.check_round_trip_frame(dt, sep=',')
+            self.check_round_trip_frame(dt, sep='\s+')
+            self.check_round_trip_frame(dt, sep='|')
 
     def test_round_trip_frame_string(self):
         for dt in self.data_types:

diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -379,8 +379,12 @@ def test_reading_all_sheets(self):
         # See PR #9450
         basename = 'test_multisheet'
         dfs = self.get_exceldf(basename, sheetname=None)
-        expected_keys = ['Alpha', 'Beta', 'Charlie']
+        # ensure this is not alphabetical to test order preservation
+        expected_keys = ['Charlie', 'Alpha', 'Beta']
         tm.assert_contains_all(expected_keys, dfs.keys())
+        # Issue 9930
+        # Ensure sheet order is preserved
+        tm.assert_equal(expected_keys, list(dfs.keys()))
 
     def test_reading_multiple_specific_sheets(self):
         # Test reading specific sheetnames by specifying a mixed list

diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
@@ -816,8 +816,8 @@ def _validate_specification(self):
                 self.left_on = self.right_on = common_cols
         elif self.on is not None:
             if self.left_on is not None or self.right_on is not None:
-                raise MergeError('Can only pass on OR left_on and '
-                                 'right_on')
+                raise MergeError('Can only pass argument "on" OR "left_on" '
+                                 'and "right_on", not a combination of both.')
             self.left_on = self.right_on = self.on
         elif self.left_on is not None:
             n = len(self.left_on)