Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ become the default string dtype in pandas 3.0. See

Bug fixes
^^^^^^^^^
- Fix bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g<name>``) with the Arrow-backed dtype would raise an error (:issue:`57636`)
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
with a compiled regex and custom flags (:issue:`62240`)

Expand Down
14 changes: 12 additions & 2 deletions pandas/core/arrays/_arrow_string_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,20 @@ def _str_replace(
flags: int = 0,
regex: bool = True,
) -> Self:
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
if (
isinstance(pat, re.Pattern)
or callable(repl)
or not case
or flags
or (
isinstance(repl, str)
and (r"\g<" in repl or re.search(r"\\\d", repl) is not None)
)
):
raise NotImplementedError(
"replace is not supported with a re.Pattern, callable repl, "
"case=False, or flags!=0"
"case=False, flags!=0, or when the replacement string contains "
"named group references (\\g<...>, \\d+)"
)

func = pc.replace_substring_regex if regex else pc.replace_substring
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,17 @@ def _str_replace(
flags: int = 0,
regex: bool = True,
):
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
if (
isinstance(pat, re.Pattern)
or callable(repl)
or not case
or flags
or ( # substitution contains a named group pattern
# https://docs.python.org/3/library/re.html
isinstance(repl, str)
and (r"\g<" in repl or re.search(r"\\\d", repl) is not None)
)
):
return super()._str_replace(pat, repl, n, case, flags, regex)

return ArrowStringArrayMixin._str_replace(
Expand Down
62 changes: 62 additions & 0 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,68 @@ def test_replace_callable_raises(any_string_dtype, repl):
values.str.replace("a", repl, regex=True)


@pytest.mark.parametrize(
"repl, expected_list",
[
(
r"\g<three> \g<two> \g<one>",
["Three Two One", "Baz Bar Foo"],
),
(
r"\g<3> \g<2> \g<1>",
["Three Two One", "Baz Bar Foo"],
),
(
r"\g<2>0",
["Two0", "Bar0"],
),
(
r"\g<2>0 \1",
["Two0 One", "Bar0 Foo"],
),
],
ids=[
"named_groups_full_swap",
"numbered_groups_full_swap",
"single_group_with_literal",
"mixed_group_reference_with_literal",
],
)
@pytest.mark.parametrize("use_compile", [True, False])
def test_replace_named_groups_regex_swap(
any_string_dtype, use_compile, repl, expected_list
):
# GH#57636
ser = Series(["One Two Three", "Foo Bar Baz"], dtype=any_string_dtype)
pattern = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
if use_compile:
pattern = re.compile(pattern)
result = ser.str.replace(pattern, repl, regex=True)
expected = Series(expected_list, dtype=any_string_dtype)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"repl",
[
r"\g<20>",
r"\20",
],
)
@pytest.mark.parametrize("use_compile", [True, False])
def test_replace_named_groups_regex_swap_expected_fail(
any_string_dtype, repl, use_compile
):
# GH#57636
pattern = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
if use_compile:
pattern = re.compile(pattern)
ser = Series(["One Two Three", "Foo Bar Baz"], dtype=any_string_dtype)

with pytest.raises(re.error, match="invalid group reference"):
ser.str.replace(pattern, repl, regex=True)


def test_replace_callable_named_groups(any_string_dtype):
# test regex named groups
ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
Expand Down
Loading