Backport PR #25625: BUG: to_csv line endings with compression (#25663)

pandas-dev · Mar 11, 2019 · 0fa9580 · 0fa9580
1 parent 05fe1dc
commit 0fa9580
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 1 deletion.
diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst
@@ -32,6 +32,7 @@ Fixed Regressions
 - Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`)
 - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
 - Fixed pip installing from source into an environment without NumPy (:issue:`25193`)
+- Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`)
 
 .. _whatsnew_0242.enhancements:
 

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -434,7 +434,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
     if (compat.PY3 and is_text and
             (compression or isinstance(f, need_text_wrapping))):
         from io import TextIOWrapper
-        f = TextIOWrapper(f, encoding=encoding)
+        f = TextIOWrapper(f, encoding=encoding, newline='')
         handles.append(f)
 
     if memory_map and hasattr(f, 'fileno'):

diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -1220,3 +1220,15 @@ def test_multi_index_header(self):
                          '1,5,6,7,8']
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
         assert result == expected
+
+    def test_gz_lineend(self):
+        # GH 25311
+        df = pd.DataFrame({'a': [1, 2]})
+        expected_rows = ['a', '1', '2']
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        with ensure_clean('__test_gz_lineend.csv.gz') as path:
+            df.to_csv(path, index=False)
+            with tm.decompress_file(path, compression='gzip') as f:
+                result = f.read().decode('utf-8')
+
+        assert result == expected