use compression=None (again) to avoid inferring compression

pandas-dev · Nov 17, 2020 · c2f0ff6 · c2f0ff6
1 parent bb929a6
commit c2f0ff6
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 4 deletions.
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -630,6 +630,7 @@ I/O
 - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`)
 - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`)
 - Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`)
+- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`)
 
 Period
 ^^^^^^

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -468,8 +468,11 @@ def infer_compression(
     ------
     ValueError on invalid compression specified.
     """
+    if compression is None:
+        return None
+
     # Infer compression
-    if compression in ("infer", None):
+    if compression == "infer":
         # Convert all path types (e.g. pathlib.Path) to strings
         filepath_or_buffer = stringify_path(filepath_or_buffer)
         if not isinstance(filepath_or_buffer, str):

diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py
@@ -4,11 +4,12 @@
 """
 
 import os
+from pathlib import Path
 import zipfile
 
 import pytest
 
-import pandas as pd
+from pandas import DataFrame
 import pandas._testing as tm
 
 
@@ -130,7 +131,7 @@ def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding
     path = os.path.join(csv_dir_path, f"utf{utf_value}_ex_small.zip")
 
     result = parser.read_csv(path, encoding=encoding, compression="zip", sep="\t")
-    expected = pd.DataFrame(
+    expected = DataFrame(
         {
             "Country": ["Venezuela", "Venezuela"],
             "Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."],
@@ -149,3 +150,15 @@ def test_invalid_compression(all_parsers, invalid_compression):
 
     with pytest.raises(ValueError, match=msg):
         parser.read_csv("test_file.zip", **compress_kwargs)
+
+
+def test_ignore_compression_extension(all_parsers):
+    parser = all_parsers
+    df = DataFrame({"a": [0, 1]})
+    with tm.ensure_clean("test.csv") as path_csv:
+        with tm.ensure_clean("test.csv.zip") as path_zip:
+            # make sure to create un-compressed file with zip extension
+            df.to_csv(path_csv, index=False)
+            Path(path_zip).write_text(Path(path_csv).read_text())
+
+            tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
@@ -638,7 +638,7 @@ def test_default_delimiter():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("infer", [True, False, None])
+@pytest.mark.parametrize("infer", [True, False])
 def test_fwf_compression(compression_only, infer):
     data = """1111111111
     2222222222