diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index 4e2c6e2faeaa5..54d0a212f6924 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -32,6 +32,7 @@ Bug Fixes +- Bug in pd.read_csv - catch missing columns if usecols and header lengths match (:issue:`14671`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 092cba093421a..7c2db9ed39b5f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1425,12 +1425,14 @@ def __init__(self, src, **kwds): self.orig_names = self.names[:] if self.usecols: - if len(self.names) > len(self.usecols): - self.names = [n for i, n in enumerate(self.names) - if (i in self.usecols or n in self.usecols)] - - if len(self.names) < len(self.usecols): - raise ValueError("Usecols do not match names.") + if self._reader.file_header is not None: + h = self._reader.file_header[0] + usecol_len = len(set(self.usecols) - set(h)) + usecoli_len = len(set(self.usecols) - set(range(0, len(h)))) + if usecol_len > 0 and usecoli_len > 0: + raise ValueError("Usecols do not match names.") + + self.names = self._filter_usecols(self.names) self._set_noconvert_columns() diff --git a/pandas/io/tests/parser/usecols.py b/pandas/io/tests/parser/usecols.py index 5051171ccb8f0..0358f1fba3d8d 100644 --- a/pandas/io/tests/parser/usecols.py +++ b/pandas/io/tests/parser/usecols.py @@ -54,6 +54,10 @@ def test_usecols(self): expected.columns = ['foo', 'bar'] tm.assert_frame_equal(result, expected) + # same length but usecols column doesn't exist - see gh-14671 + self.assertRaises(ValueError, self.read_csv, StringIO(data), + usecols=['a', 'b', 'z']) + data = """\ 1,2,3 4,5,6 diff --git a/pandas/parser.pyx b/pandas/parser.pyx index 9fb99637731be..666ab882bba3b 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -290,7 +290,7 @@ cdef class TextReader: object na_values object memory_map object as_recarray - object header, orig_header, names, header_start, header_end + object header, orig_header, names, header_start, header_end, file_header object index_col object low_memory object skiprows @@ -775,6 +775,12 @@ cdef class TextReader: data_line = hr + 1 header.append(this_header) + self.file_header = header[:] + + #if self.usecols is not None: + # if len(set(self.usecols) - set(header[0])) > 0 and len(set(self.usecols) - set(range(0,field_count))) > 0: + # raise ValueError("Usecols do not match names.") + if self.names is not None: header = [ self.names ]