Skip to content

Commit

Permalink
Move GlobPattern class from LLD to llvm/Support.
Browse files Browse the repository at this point in the history
GlobPattern is a class to handle glob pattern matching. Currently
only LLD is using that, but technically that feature is not specific
to linkers, so in this patch I move that file to LLVM.

Differential Revision: https://reviews.llvm.org/D27969

llvm-svn: 290212
  • Loading branch information
rui314 committed Dec 20, 2016
1 parent e909a6e commit f7a7ab5
Show file tree
Hide file tree
Showing 7 changed files with 297 additions and 153 deletions.
134 changes: 7 additions & 127 deletions lld/ELF/Strings.cpp
Expand Up @@ -21,134 +21,14 @@ using namespace llvm;
using namespace lld;
using namespace lld::elf;

// This is a scanner for the glob pattern.
// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
// (which is a negative form of "[<chars>]"), or a non-meta character.
// This function returns the first token in S.
BitVector GlobPattern::scan(StringRef &S) {
switch (S[0]) {
case '*':
S = S.substr(1);
// '*' is represented by an empty bitvector.
// All other bitvectors are 256-bit long.
return BitVector();
case '?':
S = S.substr(1);
return BitVector(256, true);
case '[': {
size_t End = S.find(']', 1);
if (End == StringRef::npos) {
error("invalid glob pattern: " + Original);
S = "";
return BitVector(256, false);
}
StringRef Chars = S.substr(1, End - 1);
S = S.substr(End + 1);
if (Chars.startswith("^"))
return expand(Chars.substr(1)).flip();
return expand(Chars);
}
default:
BitVector BV(256, false);
BV[S[0]] = true;
S = S.substr(1);
return BV;
}
}

// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
BitVector GlobPattern::expand(StringRef S) {
BitVector BV(256, false);

// Expand "x-y".
for (;;) {
if (S.size() < 3)
break;

// If it doesn't start with something like "x-y",
// consume the first character and proceed.
if (S[1] != '-') {
BV[S[0]] = true;
S = S.substr(1);
continue;
}

// It must be in the form of "x-y".
// Validate it and then interpret the range.
if (S[0] > S[2]) {
error("invalid glob pattern: " + Original);
return BV;
}
for (int C = S[0]; C <= S[2]; ++C)
BV[C] = true;
S = S.substr(3);
}

for (char C : S)
BV[C] = true;
return BV;
}

GlobPattern::GlobPattern(StringRef S) : Original(S) {
if (!hasWildcard(S)) {
// S doesn't contain any metacharacter,
// so the regular string comparison should work.
Exact = S;
} else if (S.endswith("*") && !hasWildcard(S.drop_back())) {
// S is something like "foo*". We can use startswith().
Prefix = S.drop_back();
} else if (S.startswith("*") && !hasWildcard(S.drop_front())) {
// S is something like "*foo". We can use endswith().
Suffix = S.drop_front();
} else {
// Otherwise, we need to do real glob pattern matching.
// Parse the pattern now.
while (!S.empty())
Tokens.push_back(scan(S));
}
}

bool GlobPattern::match(StringRef S) const {
if (Exact)
return S == *Exact;
if (Prefix)
return S.startswith(*Prefix);
if (Suffix)
return S.endswith(*Suffix);
return matchOne(Tokens, S);
}

// Runs glob pattern Pats against string S.
bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
for (;;) {
if (Pats.empty())
return S.empty();

// If Pats[0] is '*', try to match Pats[1..] against all possible
// substrings of S to see at least one pattern succeeds.
if (Pats[0].size() == 0) {
Pats = Pats.slice(1);
if (Pats.empty())
// Fast path. If a pattern is '*', it matches anything.
return true;
for (size_t I = 0, E = S.size(); I < E; ++I)
if (matchOne(Pats, S.substr(I)))
return true;
return false;
}

// If Pats[0] is not '*', it must consume one character.
if (S.empty() || !Pats[0][S[0]])
return false;
Pats = Pats.slice(1);
S = S.substr(1);
}
}

StringMatcher::StringMatcher(const std::vector<StringRef> &Pat) {
for (StringRef S : Pat)
Patterns.push_back(GlobPattern(S));
for (StringRef S : Pat) {
Expected<GlobPattern> Pat = GlobPattern::create(S);
if (!Pat)
error(toString(Pat.takeError()));
else
Patterns.push_back(*Pat);
}
}

bool StringMatcher::match(StringRef S) const {
Expand Down
27 changes: 2 additions & 25 deletions lld/ELF/Strings.h
Expand Up @@ -15,6 +15,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/GlobPattern.h"
#include <vector>

namespace lld {
Expand Down Expand Up @@ -56,30 +57,6 @@ class StringRefZ {
mutable size_t Size;
};

// This class represents a glob pattern. Supported metacharacters
// are "*", "?", "[<chars>]" and "[^<chars>]".
class GlobPattern {
public:
explicit GlobPattern(StringRef Pat);
bool match(StringRef S) const;

private:
bool matchOne(ArrayRef<llvm::BitVector> Pat, StringRef S) const;
llvm::BitVector scan(StringRef &S);
llvm::BitVector expand(StringRef S);

// Parsed glob pattern.
std::vector<llvm::BitVector> Tokens;

// A glob pattern given to this class. This is for error reporting.
StringRef Original;

// The following members are for optimization.
llvm::Optional<StringRef> Exact;
llvm::Optional<StringRef> Prefix;
llvm::Optional<StringRef> Suffix;
};

// This class represents multiple glob patterns.
class StringMatcher {
public:
Expand All @@ -89,7 +66,7 @@ class StringMatcher {
bool match(StringRef S) const;

private:
std::vector<GlobPattern> Patterns;
std::vector<llvm::GlobPattern> Patterns;
};

// Returns a demangled C++ symbol name. If Name is not a mangled
Expand Down
48 changes: 48 additions & 0 deletions llvm/include/llvm/Support/GlobPattern.h
@@ -0,0 +1,48 @@
//===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a glob pattern matcher. The glob pattern is the
// rule used by the shell.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_GLOB_PATTERN_H
#define LLVM_SUPPORT_GLOB_PATTERN_H

#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include <vector>

// This class represents a glob pattern. Supported metacharacters
// are "*", "?", "[<chars>]" and "[^<chars>]".
namespace llvm {
class BitVector;
template <typename T> class ArrayRef;

class GlobPattern {
public:
static Expected<GlobPattern> create(StringRef Pat);
bool match(StringRef S) const;

private:
bool matchOne(ArrayRef<BitVector> Pat, StringRef S) const;

// Parsed glob pattern.
std::vector<BitVector> Tokens;

// The following members are for optimization.
Optional<StringRef> Exact;
Optional<StringRef> Prefix;
Optional<StringRef> Suffix;
};
}

#endif // LLVM_SUPPORT_GLOB_PATTERN_H
1 change: 1 addition & 0 deletions llvm/lib/Support/CMakeLists.txt
Expand Up @@ -56,6 +56,7 @@ add_llvm_library(LLVMSupport
FoldingSet.cpp
FormattedStream.cpp
FormatVariadic.cpp
GlobPattern.cpp
GraphWriter.cpp
Hashing.cpp
IntEqClasses.cpp
Expand Down

0 comments on commit f7a7ab5

Please sign in to comment.