diff --git a/llvm/include/llvm/Support/Regex.h b/llvm/include/llvm/Support/Regex.h index ae4b9516f194e..bb7a8009b6bd0 100644 --- a/llvm/include/llvm/Support/Regex.h +++ b/llvm/include/llvm/Support/Regex.h @@ -85,8 +85,9 @@ namespace llvm { std::string *Error = nullptr) const; /// sub - Return the result of replacing the first match of the regex in - /// \p String with the \p Repl string. Backreferences like "\0" in the - /// replacement string are replaced with the appropriate match substring. + /// \p String with the \p Repl string. Backreferences like "\0" and "\g<1>" + /// in the replacement string are replaced with the appropriate match + /// substring. /// /// Note that the replacement string has backslash escaping performed on /// it. Invalid backreferences are ignored (replaced by empty strings). diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp index dfbd373e4a980..8fa71a749cc8e 100644 --- a/llvm/lib/Support/Regex.cpp +++ b/llvm/lib/Support/Regex.cpp @@ -163,6 +163,25 @@ std::string Regex::sub(StringRef Repl, StringRef String, // FIXME: We should have a StringExtras function for mapping C99 escapes. switch (Repl[0]) { + + // Backreference with the "\g" syntax + case 'g': + if (Repl.size() >= 4 && Repl[1] == '<') { + size_t End = Repl.find('>'); + StringRef Ref = Repl.slice(2, End); + unsigned RefValue; + if (End != StringRef::npos && !Ref.getAsInteger(10, RefValue)) { + Repl = Repl.substr(End + 1); + if (RefValue < Matches.size()) + Res += Matches[RefValue]; + else if (Error && Error->empty()) + *Error = + ("invalid backreference string 'g<" + Twine(Ref) + ">'").str(); + break; + } + } + [[fallthrough]]; + // Treat all unrecognized characters as self-quoting. default: Res += Repl[0]; diff --git a/llvm/unittests/Support/RegexTest.cpp b/llvm/unittests/Support/RegexTest.cpp index 78f37cdbd1ef8..e3c721b466c6c 100644 --- a/llvm/unittests/Support/RegexTest.cpp +++ b/llvm/unittests/Support/RegexTest.cpp @@ -127,6 +127,34 @@ TEST_F(RegexTest, Substitution) { EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error)); EXPECT_EQ(Error, "invalid backreference string '100'"); + + EXPECT_EQ("012345", Regex("a([0-9]+).*").sub("0\\g<1>5", "a1234ber", &Error)); + EXPECT_EQ("", Error); + + EXPECT_EQ("0a1234ber5", + Regex("a([0-9]+).*").sub("0\\g<0>5", "a1234ber", &Error)); + EXPECT_EQ("", Error); + + EXPECT_EQ("0A5", Regex("a(.)(.)(.)(.)(.)(.)(.)(.)(.)(.).*") + .sub("0\\g<10>5", "a123456789Aber", &Error)); + EXPECT_EQ("", Error); + + EXPECT_EQ("0g<-1>5", + Regex("a([0-9]+).*").sub("0\\g<-1>5", "a1234ber", &Error)); + EXPECT_EQ("", Error); + + EXPECT_EQ("0g<15", Regex("a([0-9]+).*").sub("0\\g<15", "a1234ber", &Error)); + EXPECT_EQ("", Error); + + EXPECT_EQ("0g<>15", Regex("a([0-9]+).*").sub("0\\g<>15", "a1234ber", &Error)); + EXPECT_EQ("", Error); + + EXPECT_EQ("0g<3e>1", + Regex("a([0-9]+).*").sub("0\\g<3e>1", "a1234ber", &Error)); + EXPECT_EQ("", Error); + + EXPECT_EQ("aber", Regex("a([0-9]+)b").sub("a\\g<100>b", "a1234ber", &Error)); + EXPECT_EQ(Error, "invalid backreference string 'g<100>'"); } TEST_F(RegexTest, IsLiteralERE) {