Skip to content

Commit

Permalink
Backport PR #52195 on branch 2.0.x (WARN: Only warn about inconsisten…
Browse files Browse the repository at this point in the history
…t parsing if there are multiple non-null elements) (#52242)

Backport PR #52195: WARN: Only warn about inconsistent parsing if there are multiple non-null elements

Co-authored-by: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and MarcoGorelli committed Mar 27, 2023
1 parent 454e820 commit 2c21af5
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 158 deletions.
17 changes: 10 additions & 7 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,16 @@ def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str
)
if guessed_format is not None:
return guessed_format
warnings.warn(
"Could not infer format, so each element will be parsed "
"individually, falling back to `dateutil`. To ensure parsing is "
"consistent and as-expected, please specify a format.",
UserWarning,
stacklevel=find_stack_level(),
)
# If there are multiple non-null elements, warn about
# how parsing might not be consistent
if tslib.first_non_null(arr[first_non_null + 1 :]) != -1:
warnings.warn(
"Could not infer format, so each element will be parsed "
"individually, falling back to `dateutil`. To ensure parsing is "
"consistent and as-expected, please specify a format.",
UserWarning,
stacklevel=find_stack_level(),
)
return None


Expand Down
16 changes: 9 additions & 7 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1252,13 +1252,15 @@ def test_bad_date_parse(all_parsers, cache_dates, value):
parser = all_parsers
s = StringIO((f"{value},\n") * 50000)

if parser.engine == "pyarrow":
if parser.engine == "pyarrow" and not cache_dates:
# None in input gets converted to 'None', for which
# pandas tries to guess the datetime format, triggering
# the warning. TODO: parse dates directly in pyarrow, see
# https://github.com/pandas-dev/pandas/issues/48017
warn = UserWarning
else:
# Note: warning is not raised if 'cache_dates', because here there is only a
# single unique date and hence no risk of inconsistent parsing.
warn = None
parser.read_csv_check_warnings(
warn,
Expand All @@ -1285,6 +1287,10 @@ def test_bad_date_parse_with_warning(all_parsers, cache_dates, value):
# TODO: parse dates directly in pyarrow, see
# https://github.com/pandas-dev/pandas/issues/48017
warn = None
elif cache_dates:
# Note: warning is not raised if 'cache_dates', because here there is only a
# single unique date and hence no risk of inconsistent parsing.
warn = None
else:
warn = UserWarning
parser.read_csv_check_warnings(
Expand Down Expand Up @@ -1737,9 +1743,7 @@ def test_parse_timezone(all_parsers):
def test_invalid_parse_delimited_date(all_parsers, date_string):
parser = all_parsers
expected = DataFrame({0: [date_string]}, dtype="object")
result = parser.read_csv_check_warnings(
UserWarning,
"Could not infer format",
result = parser.read_csv(
StringIO(date_string),
header=None,
parse_dates=[0],
Expand Down Expand Up @@ -2063,9 +2067,7 @@ def test_infer_first_column_as_index(all_parsers):
# GH#11019
parser = all_parsers
data = "a,b,c\n1970-01-01,2,3,4"
result = parser.read_csv_check_warnings(
UserWarning,
"Could not infer format",
result = parser.read_csv(
StringIO(data),
parse_dates=["a"],
)
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/io/parser/usecols/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,7 @@ def test_usecols_with_parse_dates4(all_parsers):
}
expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))

result = parser.read_csv_check_warnings(
UserWarning,
"Could not infer format",
result = parser.read_csv(
StringIO(data),
usecols=usecols,
parse_dates=parse_dates,
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1231,8 +1231,7 @@ def test_value_counts_datetime_outofbounds(self):
tm.assert_series_equal(res, exp)

# GH 12424
with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
exp = Series(["2362-01-01", np.nan], dtype=object)
tm.assert_series_equal(res, exp)

Expand Down

0 comments on commit 2c21af5

Please sign in to comment.