Skip to content

Commit

Permalink
Deprecate prefix argument in read_csv and read_table (#44713)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Dec 3, 2021
1 parent 1895062 commit 8eb0b1b
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 26 deletions.
12 changes: 12 additions & 0 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,18 @@ squeeze : boolean, default ``False``
the data.
prefix : str, default ``None``
Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...

.. deprecated:: 1.4.0
Use a list comprehension on the DataFrame's columns after calling ``read_csv``.

.. ipython:: python
data = "col1,col2,col3\na,b,1"
df = pd.read_csv(StringIO(data))
df.columns = [f"pre_{col}" for col in df.columns]
df
mangle_dupe_cols : boolean, default ``True``
Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'.
Passing in ``False`` will cause data to be overwritten if there are duplicate
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ Other Deprecations
- Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`)
- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`)
- Deprecated the 'errors' keyword argument in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, and meth:`DataFrame.mask`; in a future version the argument will be removed (:issue:`44294`)
- Deprecated the ``prefix`` keyword argument in :func:`read_csv` and :func:`read_table`, in a future version the argument will be removed (:issue:`43396`)
- Deprecated :meth:`PeriodIndex.astype` to ``datetime64[ns]`` or ``DatetimeTZDtype``, use ``obj.to_timestamp(how).tz_localize(dtype.tz)`` instead (:issue:`44398`)
- Deprecated passing non boolean argument to sort in :func:`concat` (:issue:`41518`)
- Deprecated passing arguments as positional for :func:`read_fwf` other than ``filepath_or_buffer`` (:issue:`41485`):
Expand Down
6 changes: 6 additions & 0 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@
the data.
prefix : str, optional
Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
.. deprecated:: 1.4.0
Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
mangle_dupe_cols : bool, default True
Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
'X'...'X'. Passing in False will cause data to be overwritten if there
Expand Down Expand Up @@ -461,6 +464,9 @@ class _DeprecationConfig(NamedTuple):
"squeeze": _DeprecationConfig(
None, 'Append .squeeze("columns") to the call to squeeze.'
),
"prefix": _DeprecationConfig(
None, "Use a list comprehension on the column names in the future."
),
}


Expand Down
13 changes: 11 additions & 2 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,8 @@ def test_names_and_prefix_not_None_raises(all_parsers, func):
parser = all_parsers
msg = "Specified named and prefix; you can only specify one."
with pytest.raises(ValueError, match=msg):
getattr(parser, func)(f, names=["a", "b"], prefix="x")
with tm.assert_produces_warning(FutureWarning):
getattr(parser, func)(f, names=["a", "b"], prefix="x")


@pytest.mark.parametrize("func", ["read_csv", "read_table"])
Expand All @@ -833,7 +834,15 @@ def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func):
f = StringIO("a,b\n1,2")
expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]})
parser = all_parsers
result = getattr(parser, func)(f, names=names, sep=",", prefix=prefix, header=None)
if prefix is not None:
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = getattr(parser, func)(
f, names=names, sep=",", prefix=prefix, header=None
)
else:
result = getattr(parser, func)(
f, names=names, sep=",", prefix=prefix, header=None
)
tm.assert_frame_equal(result, expected)


Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/parser/common/test_read_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def test_read_csv_raises_on_header_prefix(all_parsers):
s = StringIO("0,1\n2,3")

with pytest.raises(ValueError, match=msg):
parser.read_csv(s, header=0, prefix="_X")
with tm.assert_produces_warning(FutureWarning):
parser.read_csv(s, header=0, prefix="_X")


def test_unexpected_keyword_parameter_exception(all_parsers):
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/io/parser/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def test_no_header_prefix(all_parsers):
6,7,8,9,10
11,12,13,14,15
"""
result = parser.read_csv(StringIO(data), prefix="Field", header=None)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = parser.read_csv(StringIO(data), prefix="Field", header=None)
expected = DataFrame(
[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]],
columns=["Field0", "Field1", "Field2", "Field3", "Field4"],
Expand Down Expand Up @@ -457,7 +458,11 @@ def test_no_header(all_parsers, kwargs, names):
expected = DataFrame(
[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], columns=names
)
result = parser.read_csv(StringIO(data), header=None, **kwargs)
if "prefix" in kwargs.keys():
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = parser.read_csv(StringIO(data), header=None, **kwargs)
else:
result = parser.read_csv(StringIO(data), header=None, **kwargs)
tm.assert_frame_equal(result, expected)


Expand Down
66 changes: 45 additions & 21 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,21 @@ def date_parser(*date_cols):
"""
return parsing.try_parse_dates(parsing.concat_date_cols(date_cols))

result = parser.read_csv(
kwds = {
"header": None,
"date_parser": date_parser,
"prefix": "X",
"parse_dates": {"actual": [1, 2], "nominal": [1, 3]},
"keep_date_col": keep_date_col,
}
result = parser.read_csv_check_warnings(
FutureWarning,
"The prefix argument has been deprecated "
"and will be removed in a future version. .*\n\n",
StringIO(data),
header=None,
date_parser=date_parser,
prefix="X",
parse_dates={"actual": [1, 2], "nominal": [1, 3]},
keep_date_col=keep_date_col,
**kwds,
)

expected = DataFrame(
[
[
Expand Down Expand Up @@ -309,13 +316,20 @@ def test_multiple_date_col(all_parsers, keep_date_col):
KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
"""
parser = all_parsers
result = parser.read_csv(
kwds = {
"header": None,
"prefix": "X",
"parse_dates": [[1, 2], [1, 3]],
"keep_date_col": keep_date_col,
}
result = parser.read_csv_check_warnings(
FutureWarning,
"The prefix argument has been deprecated "
"and will be removed in a future version. .*\n\n",
StringIO(data),
header=None,
prefix="X",
parse_dates=[[1, 2], [1, 3]],
keep_date_col=keep_date_col,
**kwds,
)

expected = DataFrame(
[
[
Expand Down Expand Up @@ -427,8 +441,13 @@ def test_date_col_as_index_col(all_parsers):
KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
"""
parser = all_parsers
result = parser.read_csv(
StringIO(data), header=None, prefix="X", parse_dates=[1], index_col=1
kwds = {"header": None, "prefix": "X", "parse_dates": [1], "index_col": 1}
result = parser.read_csv_check_warnings(
FutureWarning,
"The prefix argument has been deprecated "
"and will be removed in a future version. .*\n\n",
StringIO(data),
**kwds,
)

index = Index(
Expand Down Expand Up @@ -477,14 +496,19 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning):
parse_dates = {"actual": [1, 2], "nominal": [1, 3]}
parser = all_parsers

with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(
StringIO(data),
header=None,
date_parser=date_parser,
parse_dates=parse_dates,
prefix="X",
)
kwds = {
"header": None,
"prefix": "X",
"parse_dates": parse_dates,
"date_parser": date_parser,
}
result = parser.read_csv_check_warnings(
FutureWarning,
"The prefix argument has been deprecated "
"and will be removed in a future version. .*\n\n",
StringIO(data),
**kwds,
)

expected = DataFrame(
[
Expand Down

0 comments on commit 8eb0b1b

Please sign in to comment.