-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Support] Extract simple suffix from GlobPattern #162118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
vitalybuka
merged 1 commit into
main
from
users/vitalybuka/spr/support-extract-simple-suffix-from-globpattern
Oct 6, 2025
Merged
[Support] Extract simple suffix from GlobPattern #162118
vitalybuka
merged 1 commit into
main
from
users/vitalybuka/spr/support-extract-simple-suffix-from-globpattern
Oct 6, 2025
+89
−0
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Created using spr 1.3.6
@llvm/pr-subscribers-llvm-support Author: Vitaly Buka (vitalybuka) ChangesFull diff: https://github.com/llvm/llvm-project/pull/162118.diff 3 Files Affected:
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 62ed4a0f23fd9..c1b44849b9794 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -65,13 +65,19 @@ class GlobPattern {
bool isTrivialMatchAll() const {
if (!Prefix.empty())
return false;
+ if (!Suffix.empty())
+ return false;
if (SubGlobs.size() != 1)
return false;
return SubGlobs[0].getPat() == "*";
}
+ StringRef prefix() const { return Prefix; }
+ StringRef suffix() const { return Suffix; }
+
private:
StringRef Prefix;
+ StringRef Suffix;
struct SubGlobPattern {
/// \param Pat the pattern to match against
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 7004adf461a0c..0ecf47dc1d3d1 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -143,6 +143,15 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
return Pat;
S = S.substr(PrefixSize);
+ // Just in case we stop on unmatched opening brackets.
+ size_t SuffixStart = S.find_last_of("?*[]{}\\");
+ assert(SuffixStart != std::string::npos);
+ if (S[SuffixStart] == '\\')
+ ++SuffixStart;
+ ++SuffixStart;
+ Pat.Suffix = S.substr(SuffixStart);
+ S = S.substr(0, SuffixStart);
+
SmallVector<std::string, 1> SubPats;
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
return std::move(Err);
@@ -193,6 +202,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
bool GlobPattern::match(StringRef S) const {
if (!S.consume_front(Prefix))
return false;
+ if (!S.consume_back(Suffix))
+ return false;
if (SubGlobs.empty() && S.empty())
return true;
for (auto &Glob : SubGlobs)
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index e4f1025b00956..58fd7678131c6 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -257,6 +257,78 @@ TEST_F(GlobPatternTest, NUL) {
}
}
+TEST_F(GlobPatternTest, PrefixSuffix) {
+ auto Pat = GlobPattern::create("");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("abcd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("*abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->prefix());
+ EXPECT_EQ("abcd", Pat->suffix());
+
+ Pat = GlobPattern::create("abcd*");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("abcd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("ab*cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab?cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab[n]cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab{}cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab{cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab]cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab]cd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\\\cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
+
+ Pat = GlobPattern::create("ab?cd?");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("?ab?cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+}
+
TEST_F(GlobPatternTest, Pathological) {
std::string P, S(40, 'a');
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};
|
fmayer
reviewed
Oct 6, 2025
fmayer
approved these changes
Oct 6, 2025
400 seconds for glob matching ? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Existing glob is optimized with prefix for "src:/dir1/dir2/*",
but I notices we often use patterns like "src:*dir1/dir2/file.h".
So suffix will help.
It will be hard to notice in most cases, but I use ignore list to bisect some falures.
E.g. put 100k entries in the file, and build/test as needed.
On one of hard compilation units glob matching was 400s, after the change 20s.
Still, there is higher level inefficiency in ignore list matching, which I will
address in followup patches and remove 20s above.