BUG: Don't print stray newline with MultiIndex

Title is self-explanatory. Closes #6618. Author: gfyoung <gfyoung17@gmail.com> Closes #14132 from gfyoung/to-csv-newline and squashes the following commits: d1a600f [gfyoung] BUG: Don't print stray newline with MultiIndex
pandas-dev · Sep 2, 2016 · 362a561 · 362a561
1 parent 58199c5
commit 362a561
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 24 deletions.
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -1355,6 +1355,7 @@ Bug Fixes
 - Bug in using NumPy ufunc with ``PeriodIndex`` to add or subtract integer raise ``IncompatibleFrequency``. Note that using standard operator like ``+`` or ``-`` is recommended, because standard operators use more efficient path (:issue:`13980`)
 - Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`)
 - Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`)
+- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`) 
 
 
 - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)

diff --git a/pandas/formats/format.py b/pandas/formats/format.py
@@ -1524,9 +1524,9 @@ def _save_header(self):
 
         if not has_mi_columns:
             encoded_labels += list(write_cols)
-
-        # write out the mi
-        if has_mi_columns:
+            writer.writerow(encoded_labels)
+        else:
+            # write out the mi
             columns = obj.columns
 
             # write out the names for each level, then ALL of the values for
@@ -1547,12 +1547,12 @@ def _save_header(self):
 
                 writer.writerow(col_line)
 
-            # add blanks for the columns, so that we
-            # have consistent seps
-            encoded_labels.extend([''] * len(columns))
-
-        # write out the index label line
-        writer.writerow(encoded_labels)
+            # Write out the index line if it's not empty.
+            # Otherwise, we will print out an extraneous
+            # blank line between the mi and the data rows.
+            if encoded_labels and set(encoded_labels) != set(['']):
+                encoded_labels.extend([''] * len(columns))
+                writer.writerow(encoded_labels)
 
     def _save(self):
 

diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py
@@ -3327,6 +3327,33 @@ def test_to_csv_date_format(self):
         self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'),
                          expected_ymd_sec)
 
+    def test_to_csv_multi_index(self):
+        # see gh-6618
+        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]]))
+
+        exp = ",1\n,2\n0,1\n"
+        self.assertEqual(df.to_csv(), exp)
+
+        exp = "1\n2\n1\n"
+        self.assertEqual(df.to_csv(index=False), exp)
+
+        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]]),
+                       index=pd.MultiIndex.from_arrays([[1],[2]]))
+
+        exp = ",,1\n,,2\n1,2,1\n"
+        self.assertEqual(df.to_csv(), exp)
+
+        exp = "1\n2\n1\n"
+        self.assertEqual(df.to_csv(index=False), exp)
+
+        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([['foo'],['bar']]))
+
+        exp = ",foo\n,bar\n0,1\n"
+        self.assertEqual(df.to_csv(), exp)
+
+        exp = "foo\nbar\n1\n"
+        self.assertEqual(df.to_csv(index=False), exp)
+
     def test_period(self):
         # GH 12615
         df = pd.DataFrame({'A': pd.period_range('2013-01',

diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -587,21 +587,9 @@ def _make_frame(names=None):
             df = _make_frame(True)
             df.to_csv(path, tupleize_cols=False)
 
-            # catch invalid headers
-            with assertRaisesRegexp(CParserError,
-                                    'Passed header=\[0,1,2\] are too many '
-                                    'rows for this multi_index of columns'):
-                read_csv(path, tupleize_cols=False,
-                         header=lrange(3), index_col=0)
-
-            with assertRaisesRegexp(CParserError,
-                                    'Passed header=\[0,1,2,3,4,5,6\], len of '
-                                    '7, but only 6 lines in file'):
-                read_csv(path, tupleize_cols=False,
-                         header=lrange(7), index_col=0)
-
-            for i in [4, 5, 6]:
-                with tm.assertRaises(CParserError):
+            for i in [5, 6, 7]:
+                msg = 'len of {i}, but only 5 lines in file'.format(i=i)
+                with assertRaisesRegexp(CParserError, msg):
                     read_csv(path, tupleize_cols=False,
                              header=lrange(i), index_col=0)