-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[NFC][SpecialCaseList] Split Matcher into RegexMatcher and GlobMatcher #162303
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
vitalybuka
merged 7 commits into
main
from
users/vitalybuka/spr/nfcspecialcaselist-split-matcher-into-regexmatcher-and-globmatcher
Oct 8, 2025
Merged
[NFC][SpecialCaseList] Split Matcher into RegexMatcher and GlobMatcher #162303
vitalybuka
merged 7 commits into
main
from
users/vitalybuka/spr/nfcspecialcaselist-split-matcher-into-regexmatcher-and-globmatcher
Oct 8, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Created using spr 1.3.6 [skip ci]
Created using spr 1.3.6
@llvm/pr-subscribers-llvm-support Author: Vitaly Buka (vitalybuka) ChangesI am going to optimize the later, but keep the former intact. Full diff: https://github.com/llvm/llvm-project/pull/162303.diff 2 Files Affected:
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index e824cd44f6297..eff36569fcab7 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -19,6 +19,7 @@
#include <memory>
#include <string>
#include <utility>
+#include <variant>
#include <vector>
namespace llvm {
@@ -118,18 +119,20 @@ class SpecialCaseList {
SpecialCaseList(SpecialCaseList const &) = delete;
SpecialCaseList &operator=(SpecialCaseList const &) = delete;
- /// Represents a set of globs and their line numbers
- class Matcher {
+ // Lagacy v1 matcher.
+ class RegexMatcher {
+ public:
+ LLVM_ABI unsigned match(StringRef Query) const;
+ LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+ std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
+ };
+
+ class GlobMatcher {
public:
// Returns the line number in the source file that this query matches to.
// Returns zero if no match is found.
LLVM_ABI unsigned match(StringRef Query) const;
-
- private:
- friend class SpecialCaseList;
- LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber,
- bool UseRegex);
-
+ LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
struct Glob {
std::string Name;
unsigned LineNo;
@@ -140,15 +143,29 @@ class SpecialCaseList {
Glob() = default;
};
- std::vector<std::unique_ptr<Matcher::Glob>> Globs;
- std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
+ std::vector<std::unique_ptr<GlobMatcher::Glob>> Globs;
+ };
+
+ /// Represents a set of globs and their line numbers
+ class Matcher {
+ public:
+ LLVM_ABI explicit Matcher(bool UseGlobs);
+ // Returns the line number in the source file that this query matches to.
+ // Returns zero if no match is found.
+ LLVM_ABI unsigned match(StringRef Query) const;
+
+ private:
+ friend class SpecialCaseList;
+ LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+
+ std::variant<RegexMatcher, GlobMatcher> M;
};
using SectionEntries = StringMap<StringMap<Matcher>>;
struct Section {
- Section(StringRef Str, unsigned FileIdx)
- : SectionStr(Str), FileIdx(FileIdx) {};
+ Section(StringRef Str, unsigned FileIdx, bool UseGlobs)
+ : SectionMatcher(UseGlobs), SectionStr(Str), FileIdx(FileIdx) {};
Section(Section &&) = default;
@@ -162,7 +179,7 @@ class SpecialCaseList {
LLVM_ABI Expected<Section *> addSection(StringRef SectionStr,
unsigned FileIdx, unsigned LineNo,
- bool UseGlobs = true);
+ bool UseGlobs);
/// Parses just-constructed SpecialCaseList entries from a memory buffer.
LLVM_ABI bool parse(unsigned FileIdx, const MemoryBuffer *MB,
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 4b038850b62ca..71f7b9aa65796 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -25,36 +25,48 @@
namespace llvm {
-Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
- bool UseGlobs) {
- if (Pattern.empty())
+Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern,
+ unsigned LineNumber) {
+ if (Pattern.empty()) {
return createStringError(errc::invalid_argument,
- Twine("Supplied ") +
- (UseGlobs ? "glob" : "regex") + " was blank");
-
- if (!UseGlobs) {
- // Replace * with .*
- auto Regexp = Pattern.str();
- for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
- pos += strlen(".*")) {
- Regexp.replace(pos, strlen("*"), ".*");
- }
+ "Supplied regex was blank");
+ }
+
+ // Replace * with .*
+ auto Regexp = Pattern.str();
+ for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
+ pos += strlen(".*")) {
+ Regexp.replace(pos, strlen("*"), ".*");
+ }
+
+ Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
+
+ // Check that the regexp is valid.
+ Regex CheckRE(Regexp);
+ std::string REError;
+ if (!CheckRE.isValid(REError))
+ return createStringError(errc::invalid_argument, REError);
- Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
+ RegExes.emplace_back(
+ std::make_pair(std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
- // Check that the regexp is valid.
- Regex CheckRE(Regexp);
- std::string REError;
- if (!CheckRE.isValid(REError))
- return createStringError(errc::invalid_argument, REError);
+ return Error::success();
+}
- RegExes.emplace_back(std::make_pair(
- std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
+unsigned SpecialCaseList::RegexMatcher::match(StringRef Query) const {
+ for (const auto &[Regex, LineNumber] : reverse(RegExes))
+ if (Regex->match(Query))
+ return LineNumber;
+ return 0;
+}
- return Error::success();
+Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
+ unsigned LineNumber) {
+ if (Pattern.empty()) {
+ return createStringError(errc::invalid_argument, "Supplied glob was blank");
}
- auto Glob = std::make_unique<Matcher::Glob>();
+ auto Glob = std::make_unique<GlobMatcher::Glob>();
Glob->Name = Pattern.str();
Glob->LineNo = LineNumber;
// We must be sure to use the string in `Glob` rather than the provided
@@ -66,16 +78,28 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
return Error::success();
}
-unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
+unsigned SpecialCaseList::GlobMatcher::match(StringRef Query) const {
for (const auto &Glob : reverse(Globs))
if (Glob->Pattern.match(Query))
return Glob->LineNo;
- for (const auto &[Regex, LineNumber] : reverse(RegExes))
- if (Regex->match(Query))
- return LineNumber;
return 0;
}
+SpecialCaseList::Matcher::Matcher(bool UseGlobs) {
+ if (UseGlobs)
+ M.emplace<GlobMatcher>();
+ else
+ M.emplace<RegexMatcher>();
+}
+
+unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
+ return std::visit([&](auto &V) { return V.match(Query); }, M);
+}
+
+Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
+ return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
+}
+
// TODO: Refactor this to return Expected<...>
std::unique_ptr<SpecialCaseList>
SpecialCaseList::create(const std::vector<std::string> &Paths,
@@ -132,10 +156,10 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
Expected<SpecialCaseList::Section *>
SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
unsigned LineNo, bool UseGlobs) {
- Sections.emplace_back(SectionStr, FileNo);
+ Sections.emplace_back(SectionStr, FileNo, UseGlobs);
auto &Section = Sections.back();
- if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo, UseGlobs)) {
+ if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) {
return createStringError(errc::invalid_argument,
"malformed section at line " + Twine(LineNo) +
": '" + SectionStr +
@@ -148,7 +172,7 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
std::string &Error) {
Section *CurrentSection;
- if (auto Err = addSection("*", FileIdx, 1).moveInto(CurrentSection)) {
+ if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) {
Error = toString(std::move(Err));
return false;
}
@@ -194,8 +218,9 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
}
auto [Pattern, Category] = Postfix.split("=");
- auto &Entry = CurrentSection->Entries[Prefix][Category];
- if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
+ auto [It, _] =
+ CurrentSection->Entries[Prefix].try_emplace(Category, UseGlobs);
+ if (auto Err = It->second.insert(Pattern, LineNo)) {
Error =
(Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
|
Created using spr 1.3.6 [skip ci]
Created using spr 1.3.6 [skip ci]
fmayer
reviewed
Oct 8, 2025
fmayer
approved these changes
Oct 8, 2025
svkeerthy
pushed a commit
that referenced
this pull request
Oct 9, 2025
#162303) Glob will be optimized Regex we we will keep intact. Using std::variant to avoid virtual methods, and allow to switch unique_ptr to move in future.
clingfei
pushed a commit
to clingfei/llvm-project
that referenced
this pull request
Oct 10, 2025
llvm#162303) Glob will be optimized Regex we we will keep intact. Using std::variant to avoid virtual methods, and allow to switch unique_ptr to move in future.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Glob will be optimized
Regex we we will keep intact.
Using std::variant to avoid virtual methods,
and allow to switch unique_ptr to move in future.