Skip to content

Commit

Permalink
Merge branch 'test_branch' of https://github.com/aileronajay/pandas i…
Browse files Browse the repository at this point in the history
…nto test_branch

pull from github
  • Loading branch information
Ajay Saxena authored and Ajay Saxena committed Nov 12, 2016
2 parents f708c2e + ed21736 commit c5a87d8
Show file tree
Hide file tree
Showing 17 changed files with 34 additions and 20 deletions.
4 changes: 4 additions & 0 deletions doc/source/indexing.rst
Expand Up @@ -1467,6 +1467,10 @@ with duplicates dropped.
idx1.symmetric_difference(idx2)
idx1 ^ idx2
.. note::

The resulting index from a set operation will be sorted in ascending order.

Missing values
~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.0.txt
Expand Up @@ -30,7 +30,7 @@ New features
Other enhancements
^^^^^^^^^^^^^^^^^^


- ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)


.. _whatsnew_0200.api_breaking:
Expand Down
20 changes: 12 additions & 8 deletions pandas/io/clipboard.py
Expand Up @@ -3,12 +3,16 @@
from pandas.compat import StringIO, PY2


def read_clipboard(**kwargs): # pragma: no cover
"""
def read_clipboard(sep='\s+', **kwargs): # pragma: no cover
r"""
Read text from clipboard and pass to read_table. See read_table for the
full argument list
If unspecified, `sep` defaults to '\s+'
Parameters
----------
sep : str, default '\s+'.
A string or regex delimiter. The default of '\s+' denotes
one or more whitespace characters.
Returns
-------
Expand All @@ -29,7 +33,7 @@ def read_clipboard(**kwargs): # pragma: no cover
except:
pass

# Excel copies into clipboard with \t seperation
# Excel copies into clipboard with \t separation
# inspect no more then the 10 first lines, if they
# all contain an equal number (>0) of tabs, infer
# that this came from excel and set 'sep' accordingly
Expand All @@ -43,12 +47,12 @@ def read_clipboard(**kwargs): # pragma: no cover

counts = set([x.lstrip().count('\t') for x in lines])
if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
kwargs['sep'] = '\t'
sep = '\t'

if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
kwargs['sep'] = '\s+'
if sep is None and kwargs.get('delim_whitespace') is None:
sep = '\s+'

return read_table(StringIO(text), **kwargs)
return read_table(StringIO(text), sep=sep, **kwargs)


def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/excel.py
Expand Up @@ -21,7 +21,7 @@
from pandas.tseries.period import Period
from pandas import json
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
string_types)
string_types, OrderedDict)
from pandas.core import config
from pandas.formats.printing import pprint_thing
import pandas.compat as compat
Expand Down Expand Up @@ -418,9 +418,9 @@ def _parse_cell(cell_contents, cell_typ):
sheets = [sheetname]

# handle same-type duplicates.
sheets = list(set(sheets))
sheets = list(OrderedDict.fromkeys(sheets).keys())

output = {}
output = OrderedDict()

for asheetname in sheets:
if verbose:
Expand Down
Binary file modified pandas/io/tests/data/test_multisheet.xls
Binary file not shown.
Binary file modified pandas/io/tests/data/test_multisheet.xlsm
Binary file not shown.
Binary file modified pandas/io/tests/data/test_multisheet.xlsx
Binary file not shown.
6 changes: 3 additions & 3 deletions pandas/io/tests/parser/common.py
Expand Up @@ -630,18 +630,18 @@ def test_read_csv_parse_simple_list(self):
def test_url(self):
# HTTP(S)
url = ('https://raw.github.com/pandas-dev/pandas/master/'
'pandas/io/tests/parser/data/salary.table.csv')
'pandas/io/tests/parser/data/salaries.csv')
url_table = self.read_table(url)
dirpath = tm.get_data_path()
localtable = os.path.join(dirpath, 'salary.table.csv')
localtable = os.path.join(dirpath, 'salaries.csv')
local_table = self.read_table(localtable)
tm.assert_frame_equal(url_table, local_table)
# TODO: ftp testing

@tm.slow
def test_file(self):
dirpath = tm.get_data_path()
localtable = os.path.join(dirpath, 'salary.table.csv')
localtable = os.path.join(dirpath, 'salaries.csv')
local_table = self.read_table(localtable)

try:
Expand Down
File renamed without changes.
Binary file added pandas/io/tests/parser/data/salaries.csv.bz2
Binary file not shown.
File renamed without changes.
Binary file added pandas/io/tests/parser/data/salaries.csv.xz
Binary file not shown.
Binary file added pandas/io/tests/parser/data/salaries.csv.zip
Binary file not shown.
4 changes: 2 additions & 2 deletions pandas/io/tests/parser/test_network.py
Expand Up @@ -18,13 +18,13 @@ class TestUrlGz(tm.TestCase):

def setUp(self):
dirpath = tm.get_data_path()
localtable = os.path.join(dirpath, 'salary.table.csv')
localtable = os.path.join(dirpath, 'salaries.csv')
self.local_table = read_table(localtable)

@tm.network
def test_url_gz(self):
url = ('https://raw.github.com/pandas-dev/pandas/'
'master/pandas/io/tests/parser/data/salary.table.gz')
'master/pandas/io/tests/parser/data/salaries.csv.gz')
url_table = read_table(url, compression="gzip", engine="python")
tm.assert_frame_equal(url_table, self.local_table)

Expand Down
2 changes: 2 additions & 0 deletions pandas/io/tests/test_clipboard.py
Expand Up @@ -74,6 +74,8 @@ def check_round_trip_frame(self, data_type, excel=None, sep=None):
def test_round_trip_frame_sep(self):
for dt in self.data_types:
self.check_round_trip_frame(dt, sep=',')
self.check_round_trip_frame(dt, sep='\s+')
self.check_round_trip_frame(dt, sep='|')

def test_round_trip_frame_string(self):
for dt in self.data_types:
Expand Down
6 changes: 5 additions & 1 deletion pandas/io/tests/test_excel.py
Expand Up @@ -379,8 +379,12 @@ def test_reading_all_sheets(self):
# See PR #9450
basename = 'test_multisheet'
dfs = self.get_exceldf(basename, sheetname=None)
expected_keys = ['Alpha', 'Beta', 'Charlie']
# ensure this is not alphabetical to test order preservation
expected_keys = ['Charlie', 'Alpha', 'Beta']
tm.assert_contains_all(expected_keys, dfs.keys())
# Issue 9930
# Ensure sheet order is preserved
tm.assert_equal(expected_keys, list(dfs.keys()))

def test_reading_multiple_specific_sheets(self):
# Test reading specific sheetnames by specifying a mixed list
Expand Down
4 changes: 2 additions & 2 deletions pandas/tools/merge.py
Expand Up @@ -816,8 +816,8 @@ def _validate_specification(self):
self.left_on = self.right_on = common_cols
elif self.on is not None:
if self.left_on is not None or self.right_on is not None:
raise MergeError('Can only pass on OR left_on and '
'right_on')
raise MergeError('Can only pass argument "on" OR "left_on" '
'and "right_on", not a combination of both.')
self.left_on = self.right_on = self.on
elif self.left_on is not None:
n = len(self.left_on)
Expand Down

0 comments on commit c5a87d8

Please sign in to comment.