FIX-#5097: Stop using deprecated mangle_dup_cols. (#5104)

Signed-off-by: mvashishtha <mahesh@ponder.io>
modin-project · Oct 10, 2022 · d005429 · d005429
1 parent 9013f54
commit d005429
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 3 deletions.
diff --git a/modin/_compat/core/latest/base_io.py b/modin/_compat/core/latest/base_io.py
@@ -72,7 +72,6 @@ def read_csv(
         usecols=None,
         squeeze=False,
         prefix=lib.no_default,
-        mangle_dupe_cols=True,
         dtype=None,
         engine=None,
         converters=None,
@@ -126,7 +125,6 @@ def read_csv(
             usecols=usecols,
             squeeze=squeeze,
             prefix=prefix,
-            mangle_dupe_cols=mangle_dupe_cols,
             dtype=dtype,
             engine=engine,
             converters=converters,

diff --git a/modin/_compat/pandas_api/latest/io.py b/modin/_compat/pandas_api/latest/io.py
@@ -149,6 +149,9 @@ def read_csv(
         val.name for val in inspect.signature(pandas.read_csv).parameters.values()
     }
     _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
+    # mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
+    # kwargs so pandas doesn't spuriously warn people not to use it.
+    f_locals.pop("mangle_dupe_cols", None)
     kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
     return _read(**kwargs)
 
@@ -264,6 +267,9 @@ def read_table(
     _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
     if f_locals.get("sep", sep) is False or f_locals.get("sep", sep) is no_default:
         f_locals["sep"] = "\t"
+    # mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
+    # kwargs so pandas doesn't spuriously warn people not to use it.
+    f_locals.pop("mangle_dupe_cols", None)
     kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_table_signature}
     return _read(**kwargs)
 
@@ -380,6 +386,9 @@ def read_excel(
     storage_options: "StorageOptions" = None,
 ) -> "DataFrame | dict[IntStrT, DataFrame]":
     _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+    # mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
+    # kwargs so pandas doesn't spuriously warn people not to use it.
+    kwargs.pop("mangle_dupe_cols", None)
 
     Engine.subscribe(_update_engine)
     from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
@@ -11,6 +11,7 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
+import contextlib
 import pytest
 import numpy as np
 from packaging import version
@@ -117,6 +118,12 @@
 }
 
 
+@contextlib.contextmanager
+def _nullcontext():
+    """Replacement for contextlib.nullcontext missing in older Python."""
+    yield
+
+
 def assert_files_eq(path1, path2):
     with open(path1, "rb") as file1, open(path2, "rb") as file2:
         file1_content = file1.read()
@@ -490,12 +497,29 @@ def test_read_csv_mangle_dupe_cols(self):
             pytest.xfail(
                 "processing of duplicated columns in HDK storage format is not supported yet - issue #3080"
             )
-        with ensure_clean() as unique_filename:
+        with ensure_clean() as unique_filename, (
+            pytest.warns(
+                FutureWarning, match="'mangle_dupe_cols' keyword is deprecated"
+            )
+            if PandasCompatVersion.CURRENT == PandasCompatVersion.LATEST
+            else _nullcontext()
+        ):
             str_non_unique_cols = "col,col,col,col\n5, 6, 7, 8\n9, 10, 11, 12\n"
             eval_io_from_str(
                 str_non_unique_cols, unique_filename, mangle_dupe_cols=True
             )
 
+    # Putting this filterwarnings in setup.cfg doesn't seem to catch the error.
+    @pytest.mark.filterwarnings(
+        "error:.*'mangle_dupe_cols' keyword is deprecated:FutureWarning"
+    )
+    def test_read_csv_does_not_warn_mangle_dupe_cols_kwarg(self):
+        with ensure_clean() as unique_filename:
+            eval_io_from_str(
+                "a,b,c\n1,2,3\n",
+                unique_filename,
+            )
+
     # NA and Missing Data Handling tests
     @pytest.mark.parametrize("na_values", ["custom_nan", "73"])
     @pytest.mark.parametrize("keep_default_na", [True, False])