Skip to content

Commit

Permalink
use compression=None (again) to avoid inferring compression
Browse files Browse the repository at this point in the history
  • Loading branch information
twoertwein committed Nov 17, 2020
1 parent bb929a6 commit c2f0ff6
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ I/O
- Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`)
- :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`)
- Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`)
- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`)

Period
^^^^^^
Expand Down
5 changes: 4 additions & 1 deletion pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,8 +468,11 @@ def infer_compression(
------
ValueError on invalid compression specified.
"""
if compression is None:
return None

# Infer compression
if compression in ("infer", None):
if compression == "infer":
# Convert all path types (e.g. pathlib.Path) to strings
filepath_or_buffer = stringify_path(filepath_or_buffer)
if not isinstance(filepath_or_buffer, str):
Expand Down
17 changes: 15 additions & 2 deletions pandas/tests/io/parser/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
"""

import os
from pathlib import Path
import zipfile

import pytest

import pandas as pd
from pandas import DataFrame
import pandas._testing as tm


Expand Down Expand Up @@ -130,7 +131,7 @@ def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding
path = os.path.join(csv_dir_path, f"utf{utf_value}_ex_small.zip")

result = parser.read_csv(path, encoding=encoding, compression="zip", sep="\t")
expected = pd.DataFrame(
expected = DataFrame(
{
"Country": ["Venezuela", "Venezuela"],
"Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."],
Expand All @@ -149,3 +150,15 @@ def test_invalid_compression(all_parsers, invalid_compression):

with pytest.raises(ValueError, match=msg):
parser.read_csv("test_file.zip", **compress_kwargs)


def test_ignore_compression_extension(all_parsers):
parser = all_parsers
df = DataFrame({"a": [0, 1]})
with tm.ensure_clean("test.csv") as path_csv:
with tm.ensure_clean("test.csv.zip") as path_zip:
# make sure to create un-compressed file with zip extension
df.to_csv(path_csv, index=False)
Path(path_zip).write_text(Path(path_csv).read_text())

tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/test_read_fwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ def test_default_delimiter():
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("infer", [True, False, None])
@pytest.mark.parametrize("infer", [True, False])
def test_fwf_compression(compression_only, infer):
data = """1111111111
2222222222
Expand Down

0 comments on commit c2f0ff6

Please sign in to comment.