Skip to content

Commit

Permalink
BUG: read_csv with empty df
Browse files Browse the repository at this point in the history
read_csv would fail on files if the number of header lines passed in includes
all the lines in the files. This commit fixes that bug.
  • Loading branch information
Ben Kandel committed Nov 22, 2016
1 parent f26b049 commit b8200e4
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,6 @@ Bug Fixes
- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
- Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`)
- Bug in ``pd.read_csv`` where reading files fails if the number of headers is equal to the number of lines in the file (:issue:`14515`)
- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
is not scalar and ``values`` is not specified (:issue:`14380`)
18 changes: 18 additions & 0 deletions pandas/io/tests/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,24 @@ def test_multi_index_no_level_names(self):
expected = self.read_csv(StringIO(data), index_col=[1, 0])
tm.assert_frame_equal(df, expected, check_names=False)

def test_multi_index_blank_df(self):
# GH 14545
data = """a,b
"""
df = self.read_csv(StringIO(data), header=[0])
expected = DataFrame(columns=[('a'),('b')])
tm.assert_frame_equal(df, expected)
expected_csv = expected.to_csv()
round_trip = self.read_csv(StringIO(expected_csv))
tm.assert_frame_equal(expected, round_trip)

data_multiline = """a,b
c,d
"""
df2 = self.read_csv(StringIO(data_multiline), header=[0,1])
expected2 = DataFrame(columns=[('a', 'c'), ('b', 'd')])
tm.assert_frame_equal(df2, expected2)

def test_no_unnamed_index(self):
data = """ id c0 c1 c2
0 1 0 a b
Expand Down
6 changes: 4 additions & 2 deletions pandas/parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,9 @@ cdef class TextReader:
start = self.parser.line_start[0]

# e.g., if header=3 and file only has 2 lines
elif self.parser.lines < hr + 1:
if (self.parser.lines < hr + 1
and not isinstance(self.orig_header, list)) or (
self.parser.lines < hr):
msg = self.orig_header
if isinstance(msg, list):
msg = "[%s], len of %d," % (
Expand Down Expand Up @@ -940,7 +942,7 @@ cdef class TextReader:
raise_parser_error('Error tokenizing data', self.parser)
footer = self.skipfooter

if self.parser_start == self.parser.lines:
if self.parser_start >= self.parser.lines:
raise StopIteration
self._end_clock('Tokenization')

Expand Down

0 comments on commit b8200e4

Please sign in to comment.