From b386ed04a29d51c6f4e322d5cb5ca4ae484d1afa Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Oct 2025 19:00:07 -0700 Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.7 [skip ci] --- llvm/include/llvm/Support/GlobPattern.h | 22 +- llvm/include/llvm/Support/RadixTree.h | 345 +++++++++++++++++++ llvm/lib/Support/GlobPattern.cpp | 67 +++- llvm/unittests/Support/CMakeLists.txt | 1 + llvm/unittests/Support/GlobPatternTest.cpp | 66 ++++ llvm/unittests/Support/RadixTreeTest.cpp | 372 +++++++++++++++++++++ 6 files changed, 858 insertions(+), 15 deletions(-) create mode 100644 llvm/include/llvm/Support/RadixTree.h create mode 100644 llvm/unittests/Support/RadixTreeTest.cpp diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index c1b44849b9794..8b8ac89304e31 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -63,22 +63,30 @@ class GlobPattern { // Returns true for glob pattern "*". Can be used to avoid expensive // preparation/acquisition of the input for match(). bool isTrivialMatchAll() const { - if (!Prefix.empty()) + if (PrefixSize) return false; - if (!Suffix.empty()) + if (SuffixSize) return false; if (SubGlobs.size() != 1) return false; return SubGlobs[0].getPat() == "*"; } - StringRef prefix() const { return Prefix; } - StringRef suffix() const { return Suffix; } + // The following functions are just shortcuts for faster matching. They are + // conservative to simplify implementations. -private: - StringRef Prefix; - StringRef Suffix; + // Returns plain prefix of the pattern. + StringRef prefix() const { return Pattern.take_front(PrefixSize); } + // Returns plain suffix of the pattern. + StringRef suffix() const { return Pattern.take_back(SuffixSize); } + // Returns the longest plain substring of the pattern between prefix and + // suffix. + StringRef longest_substr() const; +private: + StringRef Pattern; + size_t PrefixSize = 0; + size_t SuffixSize = 0; struct SubGlobPattern { /// \param Pat the pattern to match against LLVM_ABI static Expected create(StringRef Pat); diff --git a/llvm/include/llvm/Support/RadixTree.h b/llvm/include/llvm/Support/RadixTree.h new file mode 100644 index 0000000000000..05697fc267628 --- /dev/null +++ b/llvm/include/llvm/Support/RadixTree.h @@ -0,0 +1,345 @@ +//===-- RadixTree.h - Radix Tree implementation -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// +// +// This file implements a Radix Tree. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_RADIXTREE_H +#define LLVM_SUPPORT_RADIXTREE_H + +#include "llvm/ADT/ADL.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include +#include +#include +#include +#include +#include + +namespace llvm { + +/// \brief A Radix Tree implementation. +/// +/// A Radix Tree (also known as a compact prefix tree or radix trie) is a +/// data structure that stores a dynamic set or associative array where keys +/// are strings and values are associated with these keys. Unlike a regular +/// trie, the edges of a radix tree can be labeled with sequences of characters +/// as well as single characters. This makes radix trees more efficient for +/// storing sparse data sets, where many nodes in a regular trie would have +/// only one child. +/// +/// This implementation supports arbitrary key types that can be iterated over +/// (e.g., `std::string`, `std::vector`, `ArrayRef`). The key type +/// must provide `begin()` and `end()` for iteration. +/// +/// The tree stores `std::pair` as its value type. +/// +/// Example usage: +/// \code +/// llvm::RadixTree Tree; +/// Tree.emplace("apple", 1); +/// Tree.emplace("grapefruit", 2); +/// Tree.emplace("grape", 3); +/// +/// // Find prefixes +/// for (const auto &pair : Tree.find_prefixes("grapefruit juice")) { +/// // pair will be {"grape", 3} +/// // pair will be {"grapefruit", 2} +/// llvm::outs() << pair.first << ": " << pair.second << "\n"; +/// } +/// +/// // Iterate over all elements +/// for (const auto &pair : Tree) { +/// llvm::outs() << pair.first << ": " << pair.second << "\n"; +/// } +/// \endcode +/// +/// \note +/// The `RadixTree` takes ownership of the `KeyType` and `T` objects +/// inserted into it. When an element is removed or the tree is destroyed, +/// these objects will be destructed. +/// However, if `KeyType` is a reference-like type, e.g. StringRef or range, +/// User must guarantee that destination has lifetime longer than the tree. +template class RadixTree { +public: + using key_type = KeyType; + using mapped_type = T; + using value_type = std::pair; + +private: + using KeyConstIteratorType = + decltype(adl_begin(std::declval())); + using KeyConstIteratorRangeType = iterator_range; + using KeyValueType = + remove_cvref_t()))>; + using ContainerType = std::list; + + /// Represents an internal node in the Radix Tree. + struct Node { + KeyConstIteratorRangeType Key = {KeyConstIteratorType{}, + KeyConstIteratorType{}}; + std::vector Children; + + /// An iterator to the value associated with this node. + /// + /// If this node does not have a value (i.e., it's an internal node that + /// only serves as a path to other values), this iterator will be equal + /// to default constructed `ContainerType::iterator()`. + typename ContainerType::iterator Value; + + /// The first character of the Key. Used for fast child lookup. + KeyValueType KeyFront; + + Node() = default; + Node(const KeyConstIteratorRangeType &Key) + : Key(Key), KeyFront(*Key.begin()) { + assert(!Key.empty()); + } + + Node(Node &&) = default; + Node &operator=(Node &&) = default; + + Node(const Node &) = delete; + Node &operator=(const Node &) = delete; + + const Node *findChild(const KeyConstIteratorRangeType &Key) const { + if (Key.empty()) + return nullptr; + for (const auto &Child : Children) { + assert(!Child.Key.empty()); // Only root can be empty. + if (Child.KeyFront == *Key.begin()) + return &Child; + } + return nullptr; + } + + Node *findChild(const KeyConstIteratorRangeType &Query) { + const Node *This = this; + return const_cast(This->findChild(Query)); + } + + size_t countNodes() const { + size_t R = 1; + for (const auto &C : Children) + R += C.countNodes(); + return R; + } + + /// + /// Splits the current node into two. + /// + /// This function is used when a new key needs to be inserted that shares + /// a common prefix with the current node's key, but then diverges. + /// The current `Key` is truncated to the common prefix, and a new child + /// node is created for the remainder of the original node's `Key`. + /// + /// \param SplitPoint An iterator pointing to the character in the current + /// `Key` where the split should occur. + void split(KeyConstIteratorType SplitPoint) { + Node Child(make_range(SplitPoint, Key.end())); + Key = make_range(Key.begin(), SplitPoint); + + Children.swap(Child.Children); + std::swap(Value, Child.Value); + + Children.emplace_back(std::move(Child)); + } + }; + + Node Root; // Root is always for empty range. + ContainerType Values; + + /// Finds or creates a new tail or leaf node corresponding to the `Key`. + Node &findOrCreate(KeyConstIteratorRangeType Key) { + Node *Curr = &Root; + if (Key.empty()) + return *Curr; + + for (;;) { + auto [I1, I2] = llvm::mismatch(Key, Curr->Key); + Key = make_range(I1, Key.end()); + + if (I2 != Curr->Key.end()) { + // Match is partial. Either query is too short, or there is mismatching + // character. Split either way, and put new node in between of the + // current and its children. + Curr->split(I2); + + // Split was caused by mismatch, so `findChild` will fail. + break; + } + + Node *Child = Curr->findChild(Key); + if (!Child) + break; + + // Move to child with the same first character. + Curr = Child; + } + + if (Key.empty()) { + // The current node completely matches the key, return it. + return *Curr; + } + + // `Key` a suffix of original `Key` unmatched by path from the `Root` to the + // `Curr`, and we have no candidate in the children to match more. Create a + // new one. + return Curr->Children.emplace_back(Key); + } + + /// + /// An iterator for traversing prefixes search results. + /// + /// This iterator is used by `find_prefixes` to traverse the tree and find + /// elements that are prefixes to the given key. It's a forward iterator. + /// + /// \tparam MappedType The type of the value pointed to by the iterator. + /// This will be `value_type` for non-const iterators + /// and `const value_type` for const iterators. + template + class IteratorImpl + : public iterator_facade_base, + std::forward_iterator_tag, MappedType> { + const Node *Curr = nullptr; + KeyConstIteratorRangeType Query; + + void findNextValid() { + while (Curr && Curr->Value == typename ContainerType::iterator()) + advance(); + } + + void advance() { + assert(Curr); + if (Query.empty()) { + Curr = nullptr; + return; + } + + Curr = Curr->findChild(Query); + if (!Curr) { + Curr = nullptr; + return; + } + + auto [I1, I2] = llvm::mismatch(Query, Curr->Key); + if (I2 != Curr->Key.end()) { + Curr = nullptr; + return; + } + Query = make_range(I1, Query.end()); + } + + friend class RadixTree; + IteratorImpl(const Node *C, const KeyConstIteratorRangeType &Q) + : Curr(C), Query(Q) { + findNextValid(); + } + + public: + IteratorImpl() : Query{{}, {}} {} + + MappedType &operator*() const { return *Curr->Value; } + + IteratorImpl &operator++() { + advance(); + findNextValid(); + return *this; + } + + bool operator==(const IteratorImpl &Other) const { + return Curr == Other.Curr; + } + }; + +public: + RadixTree() = default; + RadixTree(RadixTree &&) = default; + RadixTree &operator=(RadixTree &&) = default; + + using prefix_iterator = IteratorImpl; + using const_prefix_iterator = IteratorImpl; + + using iterator = typename ContainerType::iterator; + using const_iterator = typename ContainerType::const_iterator; + + /// Returns true if the tree is empty. + bool empty() const { return Values.empty(); } + + /// Returns the number of elements in the tree. + size_t size() const { return Values.size(); } + + /// Returns the number of nodes in the tree. + /// + /// This function counts all internal nodes in the tree. It can be useful for + /// understanding the memory footprint or complexity of the tree structure. + size_t countNodes() const { return Root.countNodes(); } + + /// Returns an iterator to the first element. + iterator begin() { return Values.begin(); } + const_iterator begin() const { return Values.begin(); } + + /// Returns an iterator to the end of the tree. + iterator end() { return Values.end(); } + const_iterator end() const { return Values.end(); } + + /// Constructs and inserts a new element into the tree. + /// + /// This function constructs an element in-place within the tree. If an + /// element with the same key already exists, the insertion fails and the + /// function returns an iterator to the existing element along with `false`. + /// Otherwise, the new element is inserted and the function returns an + /// iterator to the new element along with `true`. + /// + /// \param Key The key of the element to construct. + /// \param Args Arguments to forward to the constructor of the mapped_type. + /// \return A pair consisting of an iterator to the inserted element (or to + /// the element that prevented insertion) and a boolean value + /// indicating whether the insertion took place. + template + std::pair emplace(key_type &&Key, Ts &&...Args) { + const value_type &NewValue = + Values.emplace_front(std::move(Key), T(std::move(Args)...)); + Node &Node = findOrCreate(NewValue.first); + bool HasValue = Node.Value != typename ContainerType::iterator(); + if (!HasValue) { + Node.Value = Values.begin(); + } else { + Values.pop_front(); + } + return std::make_pair(Node.Value, !HasValue); + } + + /// + /// Finds all elements whose keys are prefixes of the given `Key`. + /// + /// This function returns an iterator range over all elements in the tree + /// whose keys are prefixes of the provided `Key`. For example, if the tree + /// contains "abcde", "abc", "abcdefgh", and `Key` is "abcde", this function + /// would return iterators to "abcde" and "abc". + /// + /// \param Key The key to search for prefixes of. + /// \return An `iterator_range` of `const_prefix_iterator`s, allowing + /// iteration over the found prefix elements. + /// \note The returned iterators reference the `Key` provided by the caller. + /// The caller must ensure that `Key` remains valid for the lifetime + /// of the iterators. + iterator_range + find_prefixes(const key_type &Key) const { + return iterator_range{ + const_prefix_iterator( + &Root, KeyConstIteratorRangeType{adl_begin(Key), adl_end(Key)}), + const_prefix_iterator{}}; + } +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_RADIXTREE_H diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 0ecf47dc1d3d1..2715229c65be1 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -132,24 +132,70 @@ parseBraceExpansions(StringRef S, std::optional MaxSubPatterns) { return std::move(SubPatterns); } +static StringRef maxPlainSubstring(StringRef S) { + StringRef Best; + while (!S.empty()) { + size_t PrefixSize = S.find_first_of("?*[{\\"); + if (PrefixSize == std::string::npos) + PrefixSize = S.size(); + + if (Best.size() < PrefixSize) + Best = S.take_front(PrefixSize); + + S = S.drop_front(PrefixSize); + + // It's impossible, as the first and last characters of the input string + // must be Glob special characters, otherwise they would be parts of + // the prefix or the suffix. + assert(!S.empty()); + + switch (S.front()) { + case '\\': + S = S.drop_front(2); + break; + case '[': { + // Drop '[' and the first character which can be ']'. + S = S.drop_front(2); + size_t EndBracket = S.find_first_of("]"); + // Should not be possible, SubGlobPattern::create should fail on invalid + // pattern before we get here. + assert(EndBracket != std::string::npos); + S = S.drop_front(EndBracket + 1); + break; + } + case '{': + // TODO: implement. + // Fallback to whatever is best for now. + return Best; + default: + S = S.drop_front(1); + } + } + + return Best; +} + Expected GlobPattern::create(StringRef S, std::optional MaxSubPatterns) { GlobPattern Pat; + Pat.Pattern = S; // Store the prefix that does not contain any metacharacter. - size_t PrefixSize = S.find_first_of("?*[{\\"); - Pat.Prefix = S.substr(0, PrefixSize); - if (PrefixSize == std::string::npos) + Pat.PrefixSize = S.find_first_of("?*[{\\"); + if (Pat.PrefixSize == std::string::npos) { + Pat.PrefixSize = S.size(); return Pat; - S = S.substr(PrefixSize); + } + S = S.substr(Pat.PrefixSize); // Just in case we stop on unmatched opening brackets. size_t SuffixStart = S.find_last_of("?*[]{}\\"); assert(SuffixStart != std::string::npos); if (S[SuffixStart] == '\\') ++SuffixStart; - ++SuffixStart; - Pat.Suffix = S.substr(SuffixStart); + if (SuffixStart < S.size()) + ++SuffixStart; + Pat.SuffixSize = S.size() - SuffixStart; S = S.substr(0, SuffixStart); SmallVector SubPats; @@ -199,10 +245,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) { return Pat; } +StringRef GlobPattern::longest_substr() const { + return maxPlainSubstring( + Pattern.drop_front(PrefixSize).drop_back(SuffixSize)); +} + bool GlobPattern::match(StringRef S) const { - if (!S.consume_front(Prefix)) + if (!S.consume_front(prefix())) return false; - if (!S.consume_back(Suffix)) + if (!S.consume_back(suffix())) return false; if (SubGlobs.empty() && S.empty()) return true; diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index 21f10eb610f11..80646cfc0ef1f 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -76,6 +76,7 @@ add_llvm_unittest(SupportTests ProcessTest.cpp ProgramTest.cpp ProgramStackTest.cpp + RadixTreeTest.cpp RecyclerTest.cpp RegexTest.cpp ReverseIterationTest.cpp diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp index 58fd7678131c6..872a21e948d7a 100644 --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -329,6 +329,72 @@ TEST_F(GlobPatternTest, PrefixSuffix) { EXPECT_EQ("cd", Pat->suffix()); } +TEST_F(GlobPatternTest, Substr) { + auto Pat = GlobPattern::create(""); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("abcd"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("*abcd"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("abcd*"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc*d"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bc", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc*def*g"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("def", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd*ef*g"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcd", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd*efg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcd", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd[ef]g*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcd", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc[d]efg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("efg", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc[]]efg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("efg", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcde\\fg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcde", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcde\\[fg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcde", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcde?fg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcde", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcdef{g}*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcdef", Pat->longest_substr()); +} + TEST_F(GlobPatternTest, Pathological) { std::string P, S(40, 'a'); StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"}; diff --git a/llvm/unittests/Support/RadixTreeTest.cpp b/llvm/unittests/Support/RadixTreeTest.cpp new file mode 100644 index 0000000000000..e94a40eaf0264 --- /dev/null +++ b/llvm/unittests/Support/RadixTreeTest.cpp @@ -0,0 +1,372 @@ +//===- llvm/unittest/Support/RadixTreeTypeTest.cpp ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/RadixTree.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include +#include + +using namespace llvm; +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +// Test with StringRef. + +TEST(RadixTreeTest, Empty) { + RadixTree T; + EXPECT_TRUE(T.empty()); + EXPECT_EQ(0u, T.size()); + + EXPECT_TRUE(T.find_prefixes("").empty()); + EXPECT_TRUE(T.find_prefixes("A").empty()); + + EXPECT_EQ(1u, T.countNodes()); +} + +TEST(RadixTreeTest, InsertEmpty) { + RadixTree T; + auto [It, IsNew] = T.emplace("", 4); + EXPECT_TRUE(!T.empty()); + EXPECT_EQ(1u, T.size()); + EXPECT_TRUE(IsNew); + const auto &[K, V] = *It; + EXPECT_TRUE(K.empty()); + EXPECT_EQ(4, V); + + EXPECT_THAT(T, ElementsAre(Pair("", 4))); + + EXPECT_THAT(T.find_prefixes(""), ElementsAre(Pair("", 4))); + + EXPECT_THAT(T.find_prefixes("a"), ElementsAre(Pair("", 4))); + + EXPECT_EQ(1u, T.countNodes()); +} + +TEST(RadixTreeTest, Complex) { + RadixTree T; + T.emplace("abcd", 1); + EXPECT_EQ(2u, T.countNodes()); + T.emplace("abklm", 2); + EXPECT_EQ(4u, T.countNodes()); + T.emplace("123abklm", 3); + EXPECT_EQ(5u, T.countNodes()); + T.emplace("123abklm", 4); + EXPECT_EQ(5u, T.countNodes()); + T.emplace("ab", 5); + EXPECT_EQ(5u, T.countNodes()); + T.emplace("1234567", 6); + EXPECT_EQ(7u, T.countNodes()); + T.emplace("123456", 7); + EXPECT_EQ(8u, T.countNodes()); + T.emplace("123456789", 8); + EXPECT_EQ(9u, T.countNodes()); + + EXPECT_THAT(T, UnorderedElementsAre(Pair("abcd", 1), Pair("abklm", 2), + Pair("123abklm", 3), Pair("ab", 5), + Pair("1234567", 6), Pair("123456", 7), + Pair("123456789", 8))); + + EXPECT_THAT(T.find_prefixes("1234567890"), + UnorderedElementsAre(Pair("1234567", 6), Pair("123456", 7), + Pair("123456789", 8))); + + EXPECT_THAT(T.find_prefixes("123abklm"), + UnorderedElementsAre(Pair("123abklm", 3))); + + EXPECT_THAT(T.find_prefixes("abcdefg"), + UnorderedElementsAre(Pair("abcd", 1), Pair("ab", 5))); + + EXPECT_EQ(9u, T.countNodes()); +} + +// Test different types, less readable. + +template struct TestData { + static const T Data1[]; + static const T Data2[]; +}; + +template <> const char TestData::Data1[] = "abcdedcba"; +template <> const char TestData::Data2[] = "abCDEDCba"; + +template <> const int TestData::Data1[] = {1, 2, 3, 4, 5, 4, 3, 2, 1}; +template <> const int TestData::Data2[] = {1, 2, 4, 8, 16, 8, 4, 2, 1}; + +template class RadixTreeTypeTest : public ::testing::Test { +public: + using IteratorType = decltype(adl_begin(std::declval())); + using CharType = remove_cvref_t()))>; + + T make(const CharType *Data, size_t N) { return T(StringRef(Data, N)); } + + T make1(size_t N) { return make(TestData::Data1, N); } + T make2(size_t N) { return make(TestData::Data2, N); } +}; + +template <> +iterator_range +RadixTreeTypeTest>::make( + const char *Data, size_t N) { + return StringRef(Data).take_front(N); +} + +template <> +iterator_range +RadixTreeTypeTest>::make( + const char *Data, size_t N) { + return reverse(StringRef(Data).take_back(N)); +} + +template <> +ArrayRef RadixTreeTypeTest>::make(const int *Data, + size_t N) { + return ArrayRef(Data, Data + N); +} + +template <> +std::vector RadixTreeTypeTest>::make(const int *Data, + size_t N) { + return std::vector(Data, Data + N); +} + +template <> +std::list RadixTreeTypeTest>::make(const int *Data, + size_t N) { + return std::list(Data, Data + N); +} + +class TypeNameGenerator { +public: + template static std::string GetName(int) { + if (std::is_same_v) + return "StringRef"; + if (std::is_same_v) + return "string"; + if (std::is_same_v>) + return "iterator_range"; + if (std::is_same_v>) + return "reverse_iterator_range"; + if (std::is_same_v>) + return "ArrayRef"; + if (std::is_same_v>) + return "vector"; + if (std::is_same_v>) + return "list"; + return "Unknown"; + } +}; + +using TestTypes = + ::testing::Types, + iterator_range, + ArrayRef, std::vector, std::list>; + +TYPED_TEST_SUITE(RadixTreeTypeTest, TestTypes, TypeNameGenerator); + +TYPED_TEST(RadixTreeTypeTest, Helpers) { + for (size_t i = 0; i < 9; ++i) { + auto R1 = this->make1(i); + auto R2 = this->make2(i); + EXPECT_EQ(i, llvm::range_size(R1)); + EXPECT_EQ(i, llvm::range_size(R2)); + auto [I1, I2] = llvm::mismatch(R1, R2); + // Exactly 2 first elements of Data1 and Data2 must match. + EXPECT_EQ(std::min(2, i), std::distance(R1.begin(), I1)); + } +} + +TYPED_TEST(RadixTreeTypeTest, Empty) { + RadixTree T; + EXPECT_TRUE(T.empty()); + EXPECT_EQ(0u, T.size()); + + EXPECT_TRUE(T.find_prefixes(this->make1(0)).empty()); + EXPECT_TRUE(T.find_prefixes(this->make2(1)).empty()); + + EXPECT_EQ(1u, T.countNodes()); +} + +TYPED_TEST(RadixTreeTypeTest, InsertEmpty) { + using TreeType = RadixTree; + TreeType T; + auto [It, IsNew] = T.emplace(this->make1(0), 5); + EXPECT_TRUE(!T.empty()); + EXPECT_EQ(1u, T.size()); + EXPECT_TRUE(IsNew); + const auto &[K, V] = *It; + EXPECT_TRUE(K.empty()); + EXPECT_EQ(5, V); + + EXPECT_THAT(T.find_prefixes(this->make1(0)), + ElementsAre(Pair(ElementsAre(), 5))); + + EXPECT_THAT(T.find_prefixes(this->make2(1)), + ElementsAre(Pair(ElementsAre(), 5))); + + EXPECT_THAT(T, ElementsAre(Pair(ElementsAre(), 5))); + + EXPECT_EQ(1u, T.countNodes()); +} + +TYPED_TEST(RadixTreeTypeTest, InsertEmptyTwice) { + using TreeType = RadixTree; + TreeType T; + T.emplace(this->make1(0), 5); + auto [It, IsNew] = T.emplace(this->make1(0), 6); + EXPECT_TRUE(!T.empty()); + EXPECT_EQ(1u, T.size()); + EXPECT_TRUE(!IsNew); + const auto &[K, V] = *It; + EXPECT_TRUE(K.empty()); + EXPECT_EQ(5, V); + + EXPECT_THAT(T.find_prefixes(this->make1(0)), + ElementsAre(Pair(ElementsAre(), 5))); + + EXPECT_THAT(T.find_prefixes(this->make2(1)), + ElementsAre(Pair(ElementsAre(), 5))); + + EXPECT_THAT(T, ElementsAre(Pair(ElementsAre(), 5))); + + EXPECT_EQ(1u, T.countNodes()); +} + +TYPED_TEST(RadixTreeTypeTest, InsertOne) { + using TreeType = RadixTree; + TreeType T; + auto [It, IsNew] = T.emplace(this->make1(1), 4); + EXPECT_TRUE(!T.empty()); + EXPECT_EQ(1u, T.size()); + EXPECT_TRUE(IsNew); + const auto &[K, V] = *It; + EXPECT_THAT(K, ElementsAreArray(this->make1(1))); + EXPECT_EQ(4, V); + + EXPECT_THAT(T, ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4))); + + EXPECT_THAT(T.find_prefixes(this->make1(1)), + ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4))); + + EXPECT_THAT(T.find_prefixes(this->make1(2)), + ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4))); + + EXPECT_EQ(2u, T.countNodes()); +} + +TYPED_TEST(RadixTreeTypeTest, InsertOneTwice) { + using TreeType = RadixTree; + TreeType T; + T.emplace(this->make1(1), 4); + auto [It, IsNew] = T.emplace(this->make1(1), 4); + EXPECT_TRUE(!T.empty()); + EXPECT_EQ(1u, T.size()); + EXPECT_TRUE(!IsNew); + + EXPECT_THAT(T, ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4))); + EXPECT_EQ(2u, T.countNodes()); +} + +TYPED_TEST(RadixTreeTypeTest, InsertSuperStrings) { + using TreeType = RadixTree; + TreeType T; + + for (size_t Len = 0; Len < 7; Len += 2) { + auto [It, IsNew] = T.emplace(this->make1(Len), Len); + EXPECT_TRUE(IsNew); + } + + EXPECT_THAT(T, + UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0), + Pair(ElementsAreArray(this->make1(2)), 2), + Pair(ElementsAreArray(this->make1(4)), 4), + Pair(ElementsAreArray(this->make1(6)), 6))); + + EXPECT_THAT(T.find_prefixes(this->make1(0)), + UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0))); + + EXPECT_THAT(T.find_prefixes(this->make1(3)), + UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0), + Pair(ElementsAreArray(this->make1(2)), 2))); + + EXPECT_THAT(T.find_prefixes(this->make1(7)), + UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0), + Pair(ElementsAreArray(this->make1(2)), 2), + Pair(ElementsAreArray(this->make1(4)), 4), + Pair(ElementsAreArray(this->make1(6)), 6))); + + EXPECT_EQ(4u, T.countNodes()); +} + +TYPED_TEST(RadixTreeTypeTest, InsertSubStrings) { + using TreeType = RadixTree; + TreeType T; + + for (size_t Len = 0; Len < 7; Len += 2) { + auto [It, IsNew] = T.emplace(this->make1(7 - Len), 7 - Len); + EXPECT_TRUE(IsNew); + } + + EXPECT_THAT(T, + UnorderedElementsAre(Pair(ElementsAreArray(this->make1(1)), 1), + Pair(ElementsAreArray(this->make1(3)), 3), + Pair(ElementsAreArray(this->make1(5)), 5), + Pair(ElementsAreArray(this->make1(7)), 7))); + + EXPECT_THAT(T.find_prefixes(this->make1(0)), UnorderedElementsAre()); + + EXPECT_THAT(T.find_prefixes(this->make1(3)), + UnorderedElementsAre(Pair(ElementsAreArray(this->make1(1)), 1), + Pair(ElementsAreArray(this->make1(3)), 3))); + + EXPECT_THAT(T.find_prefixes(this->make1(6)), + UnorderedElementsAre(Pair(ElementsAreArray(this->make1(1)), 1), + Pair(ElementsAreArray(this->make1(3)), 3), + Pair(ElementsAreArray(this->make1(5)), 5))); + + EXPECT_EQ(5u, T.countNodes()); +} + +TYPED_TEST(RadixTreeTypeTest, InsertVShape) { + using TreeType = RadixTree; + TreeType T; + + EXPECT_EQ(1u, T.countNodes()); + T.emplace(this->make1(5), 15); + EXPECT_EQ(2u, T.countNodes()); + T.emplace(this->make2(6), 26); + EXPECT_EQ(4u, T.countNodes()); + T.emplace(this->make2(1), 21); + EXPECT_EQ(5u, T.countNodes()); + + EXPECT_THAT(T, + UnorderedElementsAre(Pair(ElementsAreArray(this->make1(5)), 15), + Pair(ElementsAreArray(this->make2(6)), 26), + Pair(ElementsAreArray(this->make2(1)), 21))); + + EXPECT_THAT(T.find_prefixes(this->make1(7)), + UnorderedElementsAre(Pair(ElementsAreArray(this->make2(1)), 21), + Pair(ElementsAreArray(this->make1(5)), 15))); + + EXPECT_THAT(T.find_prefixes(this->make2(7)), + UnorderedElementsAre(Pair(ElementsAreArray(this->make2(1)), 21), + Pair(ElementsAreArray(this->make2(6)), 26))); + + EXPECT_EQ(5u, T.countNodes()); +} + +} // namespace From a3e5a1347f1ebe4a42fa90acc757d467caf40466 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 23 Oct 2025 00:32:35 -0700 Subject: [PATCH 2/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20introduced=20through=20rebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.7 [skip ci] --- llvm/docs/ProgrammersManual.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst index 9cdac9c59fa9b..26865e4f671d7 100644 --- a/llvm/docs/ProgrammersManual.rst +++ b/llvm/docs/ProgrammersManual.rst @@ -2161,6 +2161,16 @@ that are not simple pointers (use :ref:`SmallPtrSet ` for pointers). Note that ``DenseSet`` has the same requirements for the value type that :ref:`DenseMap ` has. +.. _dss_radixtree: + +llvm/ADT/RadixTree.h +^^^^^^^^^^^^^^^^^^^^ + +``RadixTree`` is a trie-based data structure that stores range like keys and +their associated values. It is particularly efficient for storing keys that +share common prefixes, as it can compress these prefixes to save memory. It +supports efficient search of matching prefixes. + .. _dss_sparseset: llvm/ADT/SparseSet.h From 7fbe75b0b3c6af24fee60cb755c2be0fb7b28794 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 23 Oct 2025 23:20:07 -0700 Subject: [PATCH 3/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20introduced=20through=20rebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.7 [skip ci] --- llvm/unittests/ADT/RadixTreeTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/ADT/RadixTreeTest.cpp b/llvm/unittests/ADT/RadixTreeTest.cpp index c9fd44e3497ff..2c92e9d939852 100644 --- a/llvm/unittests/ADT/RadixTreeTest.cpp +++ b/llvm/unittests/ADT/RadixTreeTest.cpp @@ -1,4 +1,4 @@ -//===- llvm/unittest/ADT/RadixTreeTypeTest.cpp ------------------------===// +//===- llvm/unittest/ADT/RadixTreeTest.cpp --------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From d9944aaf92fcee109368c8cb07129f0bbfc09ff5 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 24 Oct 2025 22:02:39 -0700 Subject: [PATCH 4/5] Update llvm/lib/Support/SpecialCaseList.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- llvm/lib/Support/SpecialCaseList.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 567cc9421020c..041626876d1fb 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -104,7 +104,7 @@ void SpecialCaseList::GlobMatcher::match( if (!PrefixToGlob.empty()) { for (const auto &[_, V] : PrefixToGlob.find_prefixes(Query)) { for (const auto *G : V) { - // Each value of the map is vector of globs sorted as from best to + // Each value of the map is a vector of globs sorted as from best to // worst. if (G->Pattern.match(Query)) { Cb(G->Name, G->LineNo); From 40cdcee733c5f7be511c9240ce8b4762b15e8ad7 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 24 Oct 2025 22:02:49 -0700 Subject: [PATCH 5/5] Update llvm/lib/Support/SpecialCaseList.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- llvm/lib/Support/SpecialCaseList.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 041626876d1fb..642ec34ce43e1 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -108,7 +108,7 @@ void SpecialCaseList::GlobMatcher::match( // worst. if (G->Pattern.match(Query)) { Cb(G->Name, G->LineNo); - // As soon as we find match in the vector we can break for the vector, + // As soon as we find a match in the vector we can break for the vector, // but we still need to continue for other values in the map, as they // may contain a better match. break;