Skip to content

Commit

Permalink
GH14671 - ERR: Raise ValueError if usecol doesn't exist with same len
Browse files Browse the repository at this point in the history
- Updated tests
- Updated whatsnew 0.19.2 note
- Added new parameter file_header for CParserWrapper to contain
the original header read from the file for comparison
  • Loading branch information
GGordonGordon committed Nov 18, 2016
1 parent c045e1d commit a985129
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Bug Fixes



- Bug in pd.read_csv - catch missing columns if usecols and header lengths match (:issue:`14671`)



Expand Down
14 changes: 8 additions & 6 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1425,12 +1425,14 @@ def __init__(self, src, **kwds):
self.orig_names = self.names[:]

if self.usecols:
if len(self.names) > len(self.usecols):
self.names = [n for i, n in enumerate(self.names)
if (i in self.usecols or n in self.usecols)]

if len(self.names) < len(self.usecols):
raise ValueError("Usecols do not match names.")
if self._reader.file_header is not None:
h = self._reader.file_header[0]
usecol_len = len(set(self.usecols) - set(h))
usecoli_len = len(set(self.usecols) - set(range(0, len(h))))
if usecol_len > 0 and usecoli_len > 0:
raise ValueError("Usecols do not match names.")

self.names = self._filter_usecols(self.names)

self._set_noconvert_columns()

Expand Down
4 changes: 4 additions & 0 deletions pandas/io/tests/parser/usecols.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ def test_usecols(self):
expected.columns = ['foo', 'bar']
tm.assert_frame_equal(result, expected)

# same length but usecols column doesn't exist - see gh-14671
self.assertRaises(ValueError, self.read_csv, StringIO(data),
usecols=['a', 'b', 'z'])

data = """\
1,2,3
4,5,6
Expand Down
8 changes: 7 additions & 1 deletion pandas/parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ cdef class TextReader:
object na_values
object memory_map
object as_recarray
object header, orig_header, names, header_start, header_end
object header, orig_header, names, header_start, header_end, file_header
object index_col
object low_memory
object skiprows
Expand Down Expand Up @@ -775,6 +775,12 @@ cdef class TextReader:
data_line = hr + 1
header.append(this_header)

self.file_header = header[:]

#if self.usecols is not None:
# if len(set(self.usecols) - set(header[0])) > 0 and len(set(self.usecols) - set(range(0,field_count))) > 0:
# raise ValueError("Usecols do not match names.")

if self.names is not None:
header = [ self.names ]

Expand Down

0 comments on commit a985129

Please sign in to comment.