From 17412b03b2019cf0df75b16f8264a696724f45fe Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Sat, 12 Nov 2016 17:17:12 +0000 Subject: [PATCH] [Support] Add StringRef::find_lower and contains_lower. Differential Revision: https://reviews.llvm.org/D25299 llvm-svn: 286724 --- llvm/include/llvm/ADT/StringRef.h | 36 +++++++++++++ llvm/lib/Support/StringRef.cpp | 39 ++++++++++++++ llvm/unittests/ADT/StringRefTest.cpp | 76 ++++++++++++++++++++++------ 3 files changed, 136 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index 97359dd2630bb..56f1ab167f133 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -286,6 +286,12 @@ namespace llvm { return npos; } + /// Search for the first character \p C in the string, ignoring case. + /// + /// \returns The index of the first occurrence of \p C, or npos if not + /// found. + size_t find_lower(char C, size_t From = 0) const; + /// Search for the first character satisfying the predicate \p F /// /// \returns The index of the first character satisfying \p F starting from @@ -318,6 +324,12 @@ namespace llvm { /// found. size_t find(StringRef Str, size_t From = 0) const; + /// Search for the first string \p Str in the string, ignoring case. + /// + /// \returns The index of the first occurrence of \p Str, or npos if not + /// found. + size_t find_lower(StringRef Str, size_t From = 0) const; + /// Search for the last character \p C in the string. /// /// \returns The index of the last occurrence of \p C, or npos if not @@ -333,12 +345,24 @@ namespace llvm { return npos; } + /// Search for the last character \p C in the string, ignoring case. + /// + /// \returns The index of the last occurrence of \p C, or npos if not + /// found. + size_t rfind_lower(char C, size_t From = npos) const; + /// Search for the last string \p Str in the string. /// /// \returns The index of the last occurrence of \p Str, or npos if not /// found. size_t rfind(StringRef Str) const; + /// Search for the last string \p Str in the string, ignoring case. + /// + /// \returns The index of the last occurrence of \p Str, or npos if not + /// found. + size_t rfind_lower(StringRef Str) const; + /// Find the first character in the string that is \p C, or npos if not /// found. Same as find. size_t find_first_of(char C, size_t From = 0) const { @@ -393,6 +417,18 @@ namespace llvm { LLVM_ATTRIBUTE_ALWAYS_INLINE bool contains(char C) const { return find_first_of(C) != npos; } + /// Return true if the given string is a substring of *this, and false + /// otherwise. + LLVM_ATTRIBUTE_ALWAYS_INLINE + bool contains_lower(StringRef Other) const { + return find_lower(Other) != npos; + } + + /// Return true if the given character is contained in *this, and false + /// otherwise. + LLVM_ATTRIBUTE_ALWAYS_INLINE + bool contains_lower(char C) const { return find_lower(C) != npos; } + /// @} /// @name Helpful Algorithms /// @{ diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp index 51e0394d8bd50..b2cc793d99b44 100644 --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -69,6 +69,11 @@ bool StringRef::endswith_lower(StringRef Suffix) const { ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; } +size_t StringRef::find_lower(char C, size_t From) const { + char L = ascii_tolower(C); + return find_if([L](char D) { return ascii_tolower(D) == L; }, From); +} + /// compare_numeric - Compare strings, handle embedded numbers. int StringRef::compare_numeric(StringRef RHS) const { for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) { @@ -182,6 +187,28 @@ size_t StringRef::find(StringRef Str, size_t From) const { return npos; } +size_t StringRef::find_lower(StringRef Str, size_t From) const { + StringRef This = substr(From); + while (This.size() >= Str.size()) { + if (This.startswith_lower(Str)) + return From; + This = This.drop_front(); + ++From; + } + return npos; +} + +size_t StringRef::rfind_lower(char C, size_t From) const { + From = std::min(From, Length); + size_t i = From; + while (i != 0) { + --i; + if (ascii_tolower(Data[i]) == ascii_tolower(C)) + return i; + } + return npos; +} + /// rfind - Search for the last string \arg Str in the string. /// /// \return - The index of the last occurrence of \arg Str, or npos if not @@ -198,6 +225,18 @@ size_t StringRef::rfind(StringRef Str) const { return npos; } +size_t StringRef::rfind_lower(StringRef Str) const { + size_t N = Str.size(); + if (N > Length) + return npos; + for (size_t i = Length - N + 1, e = 0; i != e;) { + --i; + if (substr(i, N).equals_lower(Str)) + return i; + } + return npos; +} + /// find_first_of - Find the first character in the string that is in \arg /// Chars, or npos if not found. /// diff --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp index 9c65a4b3589b8..c1cc558b5f73b 100644 --- a/llvm/unittests/ADT/StringRefTest.cpp +++ b/llvm/unittests/ADT/StringRefTest.cpp @@ -410,21 +410,58 @@ TEST(StringRefTest, ConsumeBack) { } TEST(StringRefTest, Find) { - StringRef Str("hello"); - EXPECT_EQ(2U, Str.find('l')); - EXPECT_EQ(StringRef::npos, Str.find('z')); - EXPECT_EQ(StringRef::npos, Str.find("helloworld")); - EXPECT_EQ(0U, Str.find("hello")); - EXPECT_EQ(1U, Str.find("ello")); - EXPECT_EQ(StringRef::npos, Str.find("zz")); - EXPECT_EQ(2U, Str.find("ll", 2)); - EXPECT_EQ(StringRef::npos, Str.find("ll", 3)); - EXPECT_EQ(0U, Str.find("")); - StringRef LongStr("hellx xello hell ello world foo bar hello"); - EXPECT_EQ(36U, LongStr.find("hello")); - EXPECT_EQ(28U, LongStr.find("foo")); - EXPECT_EQ(12U, LongStr.find("hell", 2)); - EXPECT_EQ(0U, LongStr.find("")); + StringRef Str("helloHELLO"); + StringRef LongStr("hellx xello hell ello world foo bar hello HELLO"); + + struct { + StringRef Str; + char C; + std::size_t From; + std::size_t Pos; + std::size_t LowerPos; + } CharExpectations[] = { + {Str, 'h', 0U, 0U, 0U}, + {Str, 'e', 0U, 1U, 1U}, + {Str, 'l', 0U, 2U, 2U}, + {Str, 'l', 3U, 3U, 3U}, + {Str, 'o', 0U, 4U, 4U}, + {Str, 'L', 0U, 7U, 2U}, + {Str, 'z', 0U, StringRef::npos, StringRef::npos}, + }; + + struct { + StringRef Str; + llvm::StringRef S; + std::size_t From; + std::size_t Pos; + std::size_t LowerPos; + } StrExpectations[] = { + {Str, "helloword", 0, StringRef::npos, StringRef::npos}, + {Str, "hello", 0, 0U, 0U}, + {Str, "ello", 0, 1U, 1U}, + {Str, "zz", 0, StringRef::npos, StringRef::npos}, + {Str, "ll", 2U, 2U, 2U}, + {Str, "ll", 3U, StringRef::npos, 7U}, + {Str, "LL", 2U, 7U, 2U}, + {Str, "LL", 3U, 7U, 7U}, + {Str, "", 0U, 0U, 0U}, + {LongStr, "hello", 0U, 36U, 36U}, + {LongStr, "foo", 0U, 28U, 28U}, + {LongStr, "hell", 2U, 12U, 12U}, + {LongStr, "HELL", 2U, 42U, 12U}, + {LongStr, "", 0U, 0U, 0U}}; + + for (auto &E : CharExpectations) { + EXPECT_EQ(E.Pos, E.Str.find(E.C, E.From)); + EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.C, E.From)); + EXPECT_EQ(E.LowerPos, E.Str.find_lower(toupper(E.C), E.From)); + } + + for (auto &E : StrExpectations) { + EXPECT_EQ(E.Pos, E.Str.find(E.S, E.From)); + EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.S, E.From)); + EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.S.upper(), E.From)); + } EXPECT_EQ(3U, Str.rfind('l')); EXPECT_EQ(StringRef::npos, Str.rfind('z')); @@ -433,10 +470,19 @@ TEST(StringRefTest, Find) { EXPECT_EQ(1U, Str.rfind("ello")); EXPECT_EQ(StringRef::npos, Str.rfind("zz")); + EXPECT_EQ(8U, Str.rfind_lower('l')); + EXPECT_EQ(8U, Str.rfind_lower('L')); + EXPECT_EQ(StringRef::npos, Str.rfind_lower('z')); + EXPECT_EQ(StringRef::npos, Str.rfind_lower("HELLOWORLD")); + EXPECT_EQ(5U, Str.rfind("HELLO")); + EXPECT_EQ(6U, Str.rfind("ELLO")); + EXPECT_EQ(StringRef::npos, Str.rfind("ZZ")); + EXPECT_EQ(2U, Str.find_first_of('l')); EXPECT_EQ(1U, Str.find_first_of("el")); EXPECT_EQ(StringRef::npos, Str.find_first_of("xyz")); + Str = "hello"; EXPECT_EQ(1U, Str.find_first_not_of('h')); EXPECT_EQ(4U, Str.find_first_not_of("hel")); EXPECT_EQ(StringRef::npos, Str.find_first_not_of("hello"));