Skip to content

Conversation

vitalybuka
Copy link
Collaborator

@vitalybuka vitalybuka commented Oct 7, 2025

Glob will be optimized
Regex we we will keep intact.

Using std::variant to avoid virtual methods,
and allow to switch unique_ptr to move in future.

@llvmbot
Copy link
Member

llvmbot commented Oct 7, 2025

@llvm/pr-subscribers-llvm-support

Author: Vitaly Buka (vitalybuka)

Changes

I am going to optimize the later, but keep the former intact.


Full diff: https://github.com/llvm/llvm-project/pull/162303.diff

2 Files Affected:

  • (modified) llvm/include/llvm/Support/SpecialCaseList.h (+30-13)
  • (modified) llvm/lib/Support/SpecialCaseList.cpp (+57-32)
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index e824cd44f6297..eff36569fcab7 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -19,6 +19,7 @@
 #include <memory>
 #include <string>
 #include <utility>
+#include <variant>
 #include <vector>
 
 namespace llvm {
@@ -118,18 +119,20 @@ class SpecialCaseList {
   SpecialCaseList(SpecialCaseList const &) = delete;
   SpecialCaseList &operator=(SpecialCaseList const &) = delete;
 
-  /// Represents a set of globs and their line numbers
-  class Matcher {
+  // Lagacy v1 matcher.
+  class RegexMatcher {
+  public:
+    LLVM_ABI unsigned match(StringRef Query) const;
+    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+    std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
+  };
+
+  class GlobMatcher {
   public:
     // Returns the line number in the source file that this query matches to.
     // Returns zero if no match is found.
     LLVM_ABI unsigned match(StringRef Query) const;
-
-  private:
-    friend class SpecialCaseList;
-    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber,
-                          bool UseRegex);
-
+    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
     struct Glob {
       std::string Name;
       unsigned LineNo;
@@ -140,15 +143,29 @@ class SpecialCaseList {
       Glob() = default;
     };
 
-    std::vector<std::unique_ptr<Matcher::Glob>> Globs;
-    std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
+    std::vector<std::unique_ptr<GlobMatcher::Glob>> Globs;
+  };
+
+  /// Represents a set of globs and their line numbers
+  class Matcher {
+  public:
+    LLVM_ABI explicit Matcher(bool UseGlobs);
+    // Returns the line number in the source file that this query matches to.
+    // Returns zero if no match is found.
+    LLVM_ABI unsigned match(StringRef Query) const;
+
+  private:
+    friend class SpecialCaseList;
+    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+
+    std::variant<RegexMatcher, GlobMatcher> M;
   };
 
   using SectionEntries = StringMap<StringMap<Matcher>>;
 
   struct Section {
-    Section(StringRef Str, unsigned FileIdx)
-        : SectionStr(Str), FileIdx(FileIdx) {};
+    Section(StringRef Str, unsigned FileIdx, bool UseGlobs)
+        : SectionMatcher(UseGlobs), SectionStr(Str), FileIdx(FileIdx) {};
 
     Section(Section &&) = default;
 
@@ -162,7 +179,7 @@ class SpecialCaseList {
 
   LLVM_ABI Expected<Section *> addSection(StringRef SectionStr,
                                           unsigned FileIdx, unsigned LineNo,
-                                          bool UseGlobs = true);
+                                          bool UseGlobs);
 
   /// Parses just-constructed SpecialCaseList entries from a memory buffer.
   LLVM_ABI bool parse(unsigned FileIdx, const MemoryBuffer *MB,
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 4b038850b62ca..71f7b9aa65796 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -25,36 +25,48 @@
 
 namespace llvm {
 
-Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
-                                       bool UseGlobs) {
-  if (Pattern.empty())
+Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern,
+                                            unsigned LineNumber) {
+  if (Pattern.empty()) {
     return createStringError(errc::invalid_argument,
-                             Twine("Supplied ") +
-                                 (UseGlobs ? "glob" : "regex") + " was blank");
-
-  if (!UseGlobs) {
-    // Replace * with .*
-    auto Regexp = Pattern.str();
-    for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
-         pos += strlen(".*")) {
-      Regexp.replace(pos, strlen("*"), ".*");
-    }
+                             "Supplied regex was blank");
+  }
+
+  // Replace * with .*
+  auto Regexp = Pattern.str();
+  for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
+       pos += strlen(".*")) {
+    Regexp.replace(pos, strlen("*"), ".*");
+  }
+
+  Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
+
+  // Check that the regexp is valid.
+  Regex CheckRE(Regexp);
+  std::string REError;
+  if (!CheckRE.isValid(REError))
+    return createStringError(errc::invalid_argument, REError);
 
-    Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
+  RegExes.emplace_back(
+      std::make_pair(std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
 
-    // Check that the regexp is valid.
-    Regex CheckRE(Regexp);
-    std::string REError;
-    if (!CheckRE.isValid(REError))
-      return createStringError(errc::invalid_argument, REError);
+  return Error::success();
+}
 
-    RegExes.emplace_back(std::make_pair(
-        std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
+unsigned SpecialCaseList::RegexMatcher::match(StringRef Query) const {
+  for (const auto &[Regex, LineNumber] : reverse(RegExes))
+    if (Regex->match(Query))
+      return LineNumber;
+  return 0;
+}
 
-    return Error::success();
+Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
+                                           unsigned LineNumber) {
+  if (Pattern.empty()) {
+    return createStringError(errc::invalid_argument, "Supplied glob was blank");
   }
 
-  auto Glob = std::make_unique<Matcher::Glob>();
+  auto Glob = std::make_unique<GlobMatcher::Glob>();
   Glob->Name = Pattern.str();
   Glob->LineNo = LineNumber;
   // We must be sure to use the string in `Glob` rather than the provided
@@ -66,16 +78,28 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
   return Error::success();
 }
 
-unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
+unsigned SpecialCaseList::GlobMatcher::match(StringRef Query) const {
   for (const auto &Glob : reverse(Globs))
     if (Glob->Pattern.match(Query))
       return Glob->LineNo;
-  for (const auto &[Regex, LineNumber] : reverse(RegExes))
-    if (Regex->match(Query))
-      return LineNumber;
   return 0;
 }
 
+SpecialCaseList::Matcher::Matcher(bool UseGlobs) {
+  if (UseGlobs)
+    M.emplace<GlobMatcher>();
+  else
+    M.emplace<RegexMatcher>();
+}
+
+unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
+  return std::visit([&](auto &V) { return V.match(Query); }, M);
+}
+
+Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
+  return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
+}
+
 // TODO: Refactor this to return Expected<...>
 std::unique_ptr<SpecialCaseList>
 SpecialCaseList::create(const std::vector<std::string> &Paths,
@@ -132,10 +156,10 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
 Expected<SpecialCaseList::Section *>
 SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
                             unsigned LineNo, bool UseGlobs) {
-  Sections.emplace_back(SectionStr, FileNo);
+  Sections.emplace_back(SectionStr, FileNo, UseGlobs);
   auto &Section = Sections.back();
 
-  if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo, UseGlobs)) {
+  if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) {
     return createStringError(errc::invalid_argument,
                              "malformed section at line " + Twine(LineNo) +
                                  ": '" + SectionStr +
@@ -148,7 +172,7 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
 bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
                             std::string &Error) {
   Section *CurrentSection;
-  if (auto Err = addSection("*", FileIdx, 1).moveInto(CurrentSection)) {
+  if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) {
     Error = toString(std::move(Err));
     return false;
   }
@@ -194,8 +218,9 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
     }
 
     auto [Pattern, Category] = Postfix.split("=");
-    auto &Entry = CurrentSection->Entries[Prefix][Category];
-    if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
+    auto [It, _] =
+        CurrentSection->Entries[Prefix].try_emplace(Category, UseGlobs);
+    if (auto Err = It->second.insert(Pattern, LineNo)) {
       Error =
           (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
            Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))

Created using spr 1.3.6

[skip ci]
Created using spr 1.3.6
@vitalybuka vitalybuka requested review from qinkunbao and fmayer October 8, 2025 00:39
Created using spr 1.3.6

[skip ci]
Created using spr 1.3.6
@vitalybuka vitalybuka requested a review from fmayer October 8, 2025 19:39
@vitalybuka vitalybuka changed the base branch from users/vitalybuka/spr/main.nfcspecialcaselist-split-matcher-into-regexmatcher-and-globmatcher to main October 8, 2025 20:38
Created using spr 1.3.6
@vitalybuka vitalybuka enabled auto-merge (squash) October 8, 2025 20:39
@vitalybuka vitalybuka disabled auto-merge October 8, 2025 22:25
@vitalybuka vitalybuka merged commit a2723dd into main Oct 8, 2025
6 of 8 checks passed
@vitalybuka vitalybuka deleted the users/vitalybuka/spr/nfcspecialcaselist-split-matcher-into-regexmatcher-and-globmatcher branch October 8, 2025 22:26
svkeerthy pushed a commit that referenced this pull request Oct 9, 2025
#162303)

Glob will be optimized
Regex we we will keep intact.

Using std::variant to avoid virtual methods,
and allow to switch unique_ptr to move in future.
clingfei pushed a commit to clingfei/llvm-project that referenced this pull request Oct 10, 2025
llvm#162303)

Glob will be optimized
Regex we we will keep intact.

Using std::variant to avoid virtual methods,
and allow to switch unique_ptr to move in future.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants