Skip to content

Commit

Permalink
BUG: Don't print stray newline with MultiIndex
Browse files Browse the repository at this point in the history
Title is self-explanatory.    Closes #6618.

Author: gfyoung <gfyoung17@gmail.com>

Closes #14132 from gfyoung/to-csv-newline and squashes the following commits:

d1a600f [gfyoung] BUG: Don't print stray newline with MultiIndex
  • Loading branch information
gfyoung authored and jreback committed Sep 2, 2016
1 parent 58199c5 commit 362a561
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 24 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,7 @@ Bug Fixes
- Bug in using NumPy ufunc with ``PeriodIndex`` to add or subtract integer raise ``IncompatibleFrequency``. Note that using standard operator like ``+`` or ``-`` is recommended, because standard operators use more efficient path (:issue:`13980`)
- Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`)
- Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`)
- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`)


- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
Expand Down
18 changes: 9 additions & 9 deletions pandas/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1524,9 +1524,9 @@ def _save_header(self):

if not has_mi_columns:
encoded_labels += list(write_cols)

# write out the mi
if has_mi_columns:
writer.writerow(encoded_labels)
else:
# write out the mi
columns = obj.columns

# write out the names for each level, then ALL of the values for
Expand All @@ -1547,12 +1547,12 @@ def _save_header(self):

writer.writerow(col_line)

# add blanks for the columns, so that we
# have consistent seps
encoded_labels.extend([''] * len(columns))

# write out the index label line
writer.writerow(encoded_labels)
# Write out the index line if it's not empty.
# Otherwise, we will print out an extraneous
# blank line between the mi and the data rows.
if encoded_labels and set(encoded_labels) != set(['']):
encoded_labels.extend([''] * len(columns))
writer.writerow(encoded_labels)

def _save(self):

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3327,6 +3327,33 @@ def test_to_csv_date_format(self):
self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'),
expected_ymd_sec)

def test_to_csv_multi_index(self):
# see gh-6618
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]]))

exp = ",1\n,2\n0,1\n"
self.assertEqual(df.to_csv(), exp)

exp = "1\n2\n1\n"
self.assertEqual(df.to_csv(index=False), exp)

df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]]),
index=pd.MultiIndex.from_arrays([[1],[2]]))

exp = ",,1\n,,2\n1,2,1\n"
self.assertEqual(df.to_csv(), exp)

exp = "1\n2\n1\n"
self.assertEqual(df.to_csv(index=False), exp)

df = DataFrame([1], columns=pd.MultiIndex.from_arrays([['foo'],['bar']]))

exp = ",foo\n,bar\n0,1\n"
self.assertEqual(df.to_csv(), exp)

exp = "foo\nbar\n1\n"
self.assertEqual(df.to_csv(index=False), exp)

def test_period(self):
# GH 12615
df = pd.DataFrame({'A': pd.period_range('2013-01',
Expand Down
18 changes: 3 additions & 15 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,21 +587,9 @@ def _make_frame(names=None):
df = _make_frame(True)
df.to_csv(path, tupleize_cols=False)

# catch invalid headers
with assertRaisesRegexp(CParserError,
'Passed header=\[0,1,2\] are too many '
'rows for this multi_index of columns'):
read_csv(path, tupleize_cols=False,
header=lrange(3), index_col=0)

with assertRaisesRegexp(CParserError,
'Passed header=\[0,1,2,3,4,5,6\], len of '
'7, but only 6 lines in file'):
read_csv(path, tupleize_cols=False,
header=lrange(7), index_col=0)

for i in [4, 5, 6]:
with tm.assertRaises(CParserError):
for i in [5, 6, 7]:
msg = 'len of {i}, but only 5 lines in file'.format(i=i)
with assertRaisesRegexp(CParserError, msg):
read_csv(path, tupleize_cols=False,
header=lrange(i), index_col=0)

Expand Down

0 comments on commit 362a561

Please sign in to comment.