Skip to content

Commit

Permalink
Backport PR pandas-dev#39253: REGR: codecs.open() is always opened in…
Browse files Browse the repository at this point in the history
… text mode
  • Loading branch information
twoertwein authored and meeseeksmachine committed Jan 19, 2021
1 parent c7aaa49 commit f67a8f4
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ including other versions of pandas.
Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed regression in :meth:`~DataFrame.to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`)
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamReaderWriter`` in binary mode instead of in text mode (:issue:`39247`)
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
- Fixed regression in :meth:`DataFrame.to_stata` not removing the created file when an error occured (:issue:`39202`)
Expand Down
12 changes: 8 additions & 4 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Common IO api utilities"""

import bz2
import codecs
from collections import abc
import dataclasses
import gzip
Expand Down Expand Up @@ -844,9 +845,12 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:

def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
"""Whether the handle is opened in binary mode"""
# classes that expect string but have 'b' in mode
text_classes = (codecs.StreamReaderWriter,)
if isinstance(handle, text_classes):
return False

# classes that expect bytes
binary_classes = [BufferedIOBase, RawIOBase]
binary_classes = (BufferedIOBase, RawIOBase)

return isinstance(handle, tuple(binary_classes)) or "b" in getattr(
handle, "mode", mode
)
return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode)
17 changes: 17 additions & 0 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Tests for the pandas.io.common functionalities
"""
import codecs
from io import StringIO
import mmap
import os
Expand Down Expand Up @@ -426,3 +427,19 @@ def test_default_errors():
file = Path(path)
file.write_bytes(b"\xe4\na\n1")
tm.assert_frame_equal(pd.read_csv(file, skiprows=[0]), pd.DataFrame({"a": [1]}))


@pytest.mark.parametrize("encoding", [None, "utf-8"])
@pytest.mark.parametrize("format", ["csv", "json"])
def test_codecs_encoding(encoding, format):
# GH39247
expected = tm.makeDataFrame()
with tm.ensure_clean() as path:
with codecs.open(path, mode="w", encoding=encoding) as handle:
getattr(expected, f"to_{format}")(handle)
with codecs.open(path, mode="r", encoding=encoding) as handle:
if format == "csv":
df = pd.read_csv(handle, index_col=0)
else:
df = pd.read_json(handle)
tm.assert_frame_equal(expected, df)

0 comments on commit f67a8f4

Please sign in to comment.