-
Notifications
You must be signed in to change notification settings - Fork 12.3k
/
GlobPattern.h
93 lines (84 loc) · 3.46 KB
/
GlobPattern.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
//===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a glob pattern matcher.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_GLOBPATTERN_H
#define LLVM_SUPPORT_GLOBPATTERN_H
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include <optional>
namespace llvm {
/// This class implements a glob pattern matcher similar to the one found in
/// bash, but with some key differences. Namely, that \p "*" matches all
/// characters and does not exclude path separators.
///
/// * \p "?" matches a single character.
/// * \p "*" matches zero or more characters.
/// * \p "[<chars>]" matches one character in the bracket. Character ranges,
/// e.g., \p "[a-z]", and negative sets via \p "[^ab]" or \p "[!ab]" are also
/// supported.
/// * \p "{<glob>,...}" matches one of the globs in the list. Nested brace
/// expansions are not supported. If \p MaxSubPatterns is empty then
/// brace expansions are not supported and characters \p "{,}" are treated as
/// literals.
/// * \p "\\" (a single backslash) escapes the next character so it is treated
/// as a literal.
///
/// Some known edge cases are:
/// * \p "]" is allowed as the first character in a character class, i.e.,
/// \p "[]]" is valid and matches the literal \p "]".
/// * The empty character class, i.e., \p "[]", is invalid.
/// * Empty or singleton brace expansions, e.g., \p "{}", \p "{a}", are invalid.
/// * \p "}" and \p "," that are not inside a brace expansion are taken as
/// literals, e.g., \p ",}" is valid but \p "{" is not.
///
/// For example, \p "*[/\\\\]foo.{c,cpp}" (with two backslashes) will match
/// (unix or windows) paths to all files named \p "foo.c" or \p "foo.cpp".
class GlobPattern {
public:
/// \param Pat the pattern to match against
/// \param MaxSubPatterns if provided limit the number of allowed subpatterns
/// created from expanding braces otherwise disable
/// brace expansion
static Expected<GlobPattern>
create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
/// \returns \p true if \p S matches this glob pattern
bool match(StringRef S) const;
// Returns true for glob pattern "*". Can be used to avoid expensive
// preparation/acquisition of the input for match().
bool isTrivialMatchAll() const {
if (!Prefix.empty())
return false;
if (SubGlobs.size() != 1)
return false;
return SubGlobs[0].getPat() == "*";
}
private:
StringRef Prefix;
struct SubGlobPattern {
/// \param Pat the pattern to match against
static Expected<SubGlobPattern> create(StringRef Pat);
/// \returns \p true if \p S matches this glob pattern
bool match(StringRef S) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
// Brackets with their end position and matched bytes.
struct Bracket {
size_t NextOffset;
BitVector Bytes;
};
SmallVector<Bracket, 0> Brackets;
SmallVector<char, 0> Pat;
};
SmallVector<SubGlobPattern, 1> SubGlobs;
};
}
#endif // LLVM_SUPPORT_GLOBPATTERN_H