Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
BUG: Accept unicode quotechars again in pd.read_csv
Title is self-explanatory.  Affects Python 2.x only.  Closes #14477.

Author: gfyoung <gfyoung17@gmail.com>

Closes #14492 from gfyoung/quotechar-unicode-2.x and squashes the following commits:

ec9f59a [gfyoung] BUG: Accept unicode quotechars again in pd.read_csv
  • Loading branch information
gfyoung authored and jreback committed Oct 26, 2016
1 parent 66b4c83 commit 6130e77
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.1.txt
Expand Up @@ -36,6 +36,7 @@ Bug Fixes
- Compat with Cython 0.25 for building (:issue:`14496`)


- Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`)
- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`)
- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`)

Expand Down
3 changes: 3 additions & 0 deletions pandas/io/parsers.py
Expand Up @@ -1759,6 +1759,9 @@ def __init__(self, f, **kwds):
self.delimiter = kwds['delimiter']

self.quotechar = kwds['quotechar']
if isinstance(self.quotechar, compat.text_type):
self.quotechar = str(self.quotechar)

self.escapechar = kwds['escapechar']
self.doublequote = kwds['doublequote']
self.skipinitialspace = kwds['skipinitialspace']
Expand Down
15 changes: 14 additions & 1 deletion pandas/io/tests/parser/quoting.py
Expand Up @@ -9,7 +9,7 @@
import pandas.util.testing as tm

from pandas import DataFrame
from pandas.compat import StringIO
from pandas.compat import PY3, StringIO, u


class QuotingTests(object):
Expand Down Expand Up @@ -138,3 +138,16 @@ def test_double_quote(self):
result = self.read_csv(StringIO(data), quotechar='"',
doublequote=False)
tm.assert_frame_equal(result, expected)

def test_quotechar_unicode(self):
# See gh-14477
data = 'a\n1'
expected = DataFrame({'a': [1]})

result = self.read_csv(StringIO(data), quotechar=u('"'))
tm.assert_frame_equal(result, expected)

# Compared to Python 3.x, Python 2.x does not handle unicode well.
if PY3:
result = self.read_csv(StringIO(data), quotechar=u('\u0394'))
tm.assert_frame_equal(result, expected)
3 changes: 2 additions & 1 deletion pandas/parser.pyx
Expand Up @@ -570,7 +570,8 @@ cdef class TextReader:
if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE:
raise TypeError('bad "quoting" value')

if not isinstance(quote_char, (str, bytes)) and quote_char is not None:
if not isinstance(quote_char, (str, compat.text_type,
bytes)) and quote_char is not None:
dtype = type(quote_char).__name__
raise TypeError('"quotechar" must be string, '
'not {dtype}'.format(dtype=dtype))
Expand Down

0 comments on commit 6130e77

Please sign in to comment.