-
Couldn't load subscription status.
- Fork 15k
[NFC][GlobPattern] Add GlobPattern::longest_substr() #164512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NFC][GlobPattern] Add GlobPattern::longest_substr() #164512
Conversation
Created using spr 1.3.6
|
@llvm/pr-subscribers-llvm-support Author: Vitaly Buka (vitalybuka) ChangesFinds longest (almost) plain substring in the pattern. Implementation is conservative to avoid false positives. The result is not used to optimize Full diff: https://github.com/llvm/llvm-project/pull/164512.diff 3 Files Affected:
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index c1b44849b9794..4824f3fa01e5b 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -63,22 +63,30 @@ class GlobPattern {
// Returns true for glob pattern "*". Can be used to avoid expensive
// preparation/acquisition of the input for match().
bool isTrivialMatchAll() const {
- if (!Prefix.empty())
+ if (PrefixSize)
return false;
- if (!Suffix.empty())
+ if (SuffixSize)
return false;
if (SubGlobs.size() != 1)
return false;
return SubGlobs[0].getPat() == "*";
}
- StringRef prefix() const { return Prefix; }
- StringRef suffix() const { return Suffix; }
+ // The followind functions are as shortcuts to some matching. They are
+ // conservative to simplify implementations.
-private:
- StringRef Prefix;
- StringRef Suffix;
+ // Returns plain prefix of the pattern.
+ StringRef prefix() const { return Pattern.take_front(PrefixSize); }
+ // Returns plain suffix of the pattern.
+ StringRef suffix() const { return Pattern.take_back(SuffixSize); }
+ // Returns the longest plain substring of the pattern between of prefix and
+ // suffix.
+ StringRef longest_substr() const;
+private:
+ StringRef Pattern;
+ size_t PrefixSize = 0;
+ size_t SuffixSize = 0;
struct SubGlobPattern {
/// \param Pat the pattern to match against
LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 0ecf47dc1d3d1..dfc1508ce63af 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -132,24 +132,60 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
+static StringRef maxPlainSubstring(StringRef S) {
+ StringRef R;
+ while (!S.empty()) {
+ size_t PrefixSize = S.find_first_of("?*[{\\");
+ if (PrefixSize == std::string::npos)
+ PrefixSize = S.size();
+
+ if (R.size() < PrefixSize)
+ R = S.take_front(PrefixSize);
+ S = S.drop_front(PrefixSize);
+
+ switch (S.front()) {
+ case '\\':
+ S = S.drop_front(2);
+ break;
+ case '[': {
+ size_t EndBracket = S.find_first_of("]");
+ if (EndBracket == std::string::npos)
+ return R; // Incorrect, but let SubGlobPattern::create handle it.
+ S = S.drop_front(EndBracket + 1);
+ break;
+ }
+ case '{':
+ // TODO: implement.
+ return {};
+ default:
+ S = S.drop_front(1);
+ }
+ }
+
+ return R;
+}
+
Expected<GlobPattern>
GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
+ Pat.Pattern = S;
// Store the prefix that does not contain any metacharacter.
- size_t PrefixSize = S.find_first_of("?*[{\\");
- Pat.Prefix = S.substr(0, PrefixSize);
- if (PrefixSize == std::string::npos)
+ Pat.PrefixSize = S.find_first_of("?*[{\\");
+ if (Pat.PrefixSize == std::string::npos) {
+ Pat.PrefixSize = S.size();
return Pat;
- S = S.substr(PrefixSize);
+ }
+ S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
size_t SuffixStart = S.find_last_of("?*[]{}\\");
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
- ++SuffixStart;
- Pat.Suffix = S.substr(SuffixStart);
+ if (SuffixStart < S.size())
+ ++SuffixStart;
+ Pat.SuffixSize = S.size() - SuffixStart;
S = S.substr(0, SuffixStart);
SmallVector<std::string, 1> SubPats;
@@ -199,10 +235,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
return Pat;
}
+StringRef GlobPattern::longest_substr() const {
+ return maxPlainSubstring(
+ Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+}
+
bool GlobPattern::match(StringRef S) const {
- if (!S.consume_front(Prefix))
+ if (!S.consume_front(prefix()))
return false;
- if (!S.consume_back(Suffix))
+ if (!S.consume_back(suffix()))
return false;
if (SubGlobs.empty() && S.empty())
return true;
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 58fd7678131c6..a0e0d1415f383 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -329,6 +329,64 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
EXPECT_EQ("cd", Pat->suffix());
}
+TEST_F(GlobPatternTest, Substr) {
+ auto Pat = GlobPattern::create("");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("*abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("abcd*");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc*d");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc*def*g");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("def", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd*ef*g");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd*efg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd[ef]g*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde\\fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde\\[fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde?fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcdef{g}*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+}
+
TEST_F(GlobPatternTest, Pathological) {
std::string P, S(40, 'a');
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull Request Overview
This PR adds a new longest_substr() method to GlobPattern that finds the longest plain (non-wildcard) substring within a glob pattern, excluding the prefix and suffix. The implementation is conservative to avoid false positives and is calculated on-demand rather than during pattern matching.
Key Changes:
- Added
longest_substr()method to find the longest plain substring between wildcards - Refactored internal storage from
StringRef Prefix/Suffixto store original pattern with size offsets - Added comprehensive test coverage for various glob pattern scenarios
Reviewed Changes
Copilot reviewed 3 out of 3 changed files in this pull request and generated 3 comments.
| File | Description |
|---|---|
| llvm/include/llvm/Support/GlobPattern.h | Added longest_substr() declaration and refactored internal storage to use pattern with size offsets |
| llvm/lib/Support/GlobPattern.cpp | Implemented maxPlainSubstring() helper and longest_substr() method, updated pattern storage |
| llvm/unittests/Support/GlobPatternTest.cpp | Added comprehensive test cases for longest_substr() functionality |
Created using spr 1.3.6 [skip ci]
Created using spr 1.3.6 [skip ci]
Created using spr 1.3.7 [skip ci]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull Request Overview
Copilot reviewed 3 out of 3 changed files in this pull request and generated 3 comments.
Created using spr 1.3.7
Created using spr 1.3.7 [skip ci]
Replace two StringRefs with One StringRef + 2 x size_t. Prepare for: * #164512
…n (#164513) Replace two StringRefs with One StringRef + 2 x size_t. Prepare for: * llvm/llvm-project#164512
Created using spr 1.3.7 [skip ci]
) Replace two StringRefs with One StringRef + 2 x size_t. Prepare for: * llvm#164512
Finds longest (almost) plain substring in the pattern. Implementation is conservative to avoid false positives. The result is not used to optimize `GlobPattern::match()` so it's calculated on request. For * llvm#164545 --------- Co-authored-by: Luke Lau <luke@igalia.com>
) Replace two StringRefs with One StringRef + 2 x size_t. Prepare for: * llvm#164512
Finds longest (almost) plain substring in the pattern. Implementation is conservative to avoid false positives. The result is not used to optimize `GlobPattern::match()` so it's calculated on request. For * llvm#164545 --------- Co-authored-by: Luke Lau <luke@igalia.com>
Finds longest (almost) plain substring in the pattern.
Implementation is conservative to avoid false positives.
The result is not used to optimize
GlobPattern::match()so it's calculated onrequest.
For