Skip to content

Commit

Permalink
FIX-#5097: Stop using deprecated mangle_dup_cols. (#5104)
Browse files Browse the repository at this point in the history
Signed-off-by: mvashishtha <mahesh@ponder.io>
  • Loading branch information
mvashishtha committed Oct 10, 2022
1 parent 9013f54 commit d005429
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 3 deletions.
2 changes: 0 additions & 2 deletions modin/_compat/core/latest/base_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def read_csv(
usecols=None,
squeeze=False,
prefix=lib.no_default,
mangle_dupe_cols=True,
dtype=None,
engine=None,
converters=None,
Expand Down Expand Up @@ -126,7 +125,6 @@ def read_csv(
usecols=usecols,
squeeze=squeeze,
prefix=prefix,
mangle_dupe_cols=mangle_dupe_cols,
dtype=dtype,
engine=engine,
converters=converters,
Expand Down
9 changes: 9 additions & 0 deletions modin/_compat/pandas_api/latest/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ def read_csv(
val.name for val in inspect.signature(pandas.read_csv).parameters.values()
}
_, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
# mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
# kwargs so pandas doesn't spuriously warn people not to use it.
f_locals.pop("mangle_dupe_cols", None)
kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
return _read(**kwargs)

Expand Down Expand Up @@ -264,6 +267,9 @@ def read_table(
_, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
if f_locals.get("sep", sep) is False or f_locals.get("sep", sep) is no_default:
f_locals["sep"] = "\t"
# mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
# kwargs so pandas doesn't spuriously warn people not to use it.
f_locals.pop("mangle_dupe_cols", None)
kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_table_signature}
return _read(**kwargs)

Expand Down Expand Up @@ -380,6 +386,9 @@ def read_excel(
storage_options: "StorageOptions" = None,
) -> "DataFrame | dict[IntStrT, DataFrame]":
_, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
# mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
# kwargs so pandas doesn't spuriously warn people not to use it.
kwargs.pop("mangle_dupe_cols", None)

Engine.subscribe(_update_engine)
from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
Expand Down
26 changes: 25 additions & 1 deletion modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib
import pytest
import numpy as np
from packaging import version
Expand Down Expand Up @@ -117,6 +118,12 @@
}


@contextlib.contextmanager
def _nullcontext():
"""Replacement for contextlib.nullcontext missing in older Python."""
yield


def assert_files_eq(path1, path2):
with open(path1, "rb") as file1, open(path2, "rb") as file2:
file1_content = file1.read()
Expand Down Expand Up @@ -490,12 +497,29 @@ def test_read_csv_mangle_dupe_cols(self):
pytest.xfail(
"processing of duplicated columns in HDK storage format is not supported yet - issue #3080"
)
with ensure_clean() as unique_filename:
with ensure_clean() as unique_filename, (
pytest.warns(
FutureWarning, match="'mangle_dupe_cols' keyword is deprecated"
)
if PandasCompatVersion.CURRENT == PandasCompatVersion.LATEST
else _nullcontext()
):
str_non_unique_cols = "col,col,col,col\n5, 6, 7, 8\n9, 10, 11, 12\n"
eval_io_from_str(
str_non_unique_cols, unique_filename, mangle_dupe_cols=True
)

# Putting this filterwarnings in setup.cfg doesn't seem to catch the error.
@pytest.mark.filterwarnings(
"error:.*'mangle_dupe_cols' keyword is deprecated:FutureWarning"
)
def test_read_csv_does_not_warn_mangle_dupe_cols_kwarg(self):
with ensure_clean() as unique_filename:
eval_io_from_str(
"a,b,c\n1,2,3\n",
unique_filename,
)

# NA and Missing Data Handling tests
@pytest.mark.parametrize("na_values", ["custom_nan", "73"])
@pytest.mark.parametrize("keep_default_na", [True, False])
Expand Down

0 comments on commit d005429

Please sign in to comment.