diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 6ebf64565559b..8cae6a38d8326 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -79,6 +79,9 @@ class GlobPattern { StringRef prefix() const { return Pattern.take_front(PrefixSize); } // Returns plain suffix of the pattern. StringRef suffix() const { return Pattern.take_back(SuffixSize); } + // Returns the longest plain substring of the pattern between prefix and + // suffix. + StringRef longest_substr() const; private: StringRef Pattern; diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index f56a8fcf4bf9d..2715229c65be1 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -132,6 +132,49 @@ parseBraceExpansions(StringRef S, std::optional MaxSubPatterns) { return std::move(SubPatterns); } +static StringRef maxPlainSubstring(StringRef S) { + StringRef Best; + while (!S.empty()) { + size_t PrefixSize = S.find_first_of("?*[{\\"); + if (PrefixSize == std::string::npos) + PrefixSize = S.size(); + + if (Best.size() < PrefixSize) + Best = S.take_front(PrefixSize); + + S = S.drop_front(PrefixSize); + + // It's impossible, as the first and last characters of the input string + // must be Glob special characters, otherwise they would be parts of + // the prefix or the suffix. + assert(!S.empty()); + + switch (S.front()) { + case '\\': + S = S.drop_front(2); + break; + case '[': { + // Drop '[' and the first character which can be ']'. + S = S.drop_front(2); + size_t EndBracket = S.find_first_of("]"); + // Should not be possible, SubGlobPattern::create should fail on invalid + // pattern before we get here. + assert(EndBracket != std::string::npos); + S = S.drop_front(EndBracket + 1); + break; + } + case '{': + // TODO: implement. + // Fallback to whatever is best for now. + return Best; + default: + S = S.drop_front(1); + } + } + + return Best; +} + Expected GlobPattern::create(StringRef S, std::optional MaxSubPatterns) { GlobPattern Pat; @@ -202,6 +245,11 @@ GlobPattern::SubGlobPattern::create(StringRef S) { return Pat; } +StringRef GlobPattern::longest_substr() const { + return maxPlainSubstring( + Pattern.drop_front(PrefixSize).drop_back(SuffixSize)); +} + bool GlobPattern::match(StringRef S) const { if (!S.consume_front(prefix())) return false; diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp index 58fd7678131c6..872a21e948d7a 100644 --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -329,6 +329,72 @@ TEST_F(GlobPatternTest, PrefixSuffix) { EXPECT_EQ("cd", Pat->suffix()); } +TEST_F(GlobPatternTest, Substr) { + auto Pat = GlobPattern::create(""); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("abcd"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("*abcd"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("abcd*"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc*d"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bc", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc*def*g"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("def", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd*ef*g"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcd", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd*efg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcd", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd[ef]g*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcd", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc[d]efg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("efg", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc[]]efg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("efg", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcde\\fg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcde", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcde\\[fg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcde", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcde?fg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcde", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcdef{g}*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcdef", Pat->longest_substr()); +} + TEST_F(GlobPatternTest, Pathological) { std::string P, S(40, 'a'); StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};