Skip to content

Commit f08d2db

Browse files
committed
add and document regex support for FileCheck. You can now do stuff like:
; CHECK: movl {{%e[a-z][xi]}}, %eax or whatever. llvm-svn: 82717
1 parent abab11a commit f08d2db

File tree

5 files changed

+161
-15
lines changed

5 files changed

+161
-15
lines changed

llvm/docs/TestingGuide.html

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,40 @@
624624

625625
</div>
626626

627+
<!-- _______________________________________________________________________ -->
628+
<div class="doc_subsubsection"><a
629+
name="FileCheck-Matching">FileCheck Pattern Matting Syntax</a></div>
630+
631+
<div class="doc_text">
632+
633+
<p>The CHECK: and CHECK-NOT: directives both take a pattern to match. For most
634+
uses of FileCheck, fixed string matching is perfectly sufficient. For some
635+
things, a more flexible form of matching is desired. To support this, FileCheck
636+
allows you to specify regular expressions in matching strings, surrounded by
637+
double braces: <b>{{yourregex}}</b>. Because we want to use fixed string
638+
matching for a majority of what we do, FileCheck has been designed to support
639+
mixing and matching fixed string matching with regular expressions. This allows
640+
you to write things like this:</p>
641+
642+
<div class="doc_code">
643+
<pre>
644+
; CHECK: movhpd <b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b>
645+
</pre>
646+
</div>
647+
648+
<p>In this case, any offset from the ESP register will be allowed, and any xmm
649+
register will be allowed.</p>
650+
651+
<p>Because regular expressions are enclosed with double braces, they are
652+
visually distinct, and you don't need to use escape characters within the double
653+
braces like you would in C. In the rare case that you want to match double
654+
braces explicitly from the input, you can use something ugly like
655+
<b>{{[{][{]}}</b> as your pattern.</p>
656+
657+
</div>
658+
659+
660+
627661
<!-- _______________________________________________________________________ -->
628662
<div class="doc_subsection"><a name="dgvars">Variables and
629663
substitutions</a></div>

llvm/include/llvm/Support/Regex.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14-
#include "llvm/ADT/SmallVector.h"
15-
#include "llvm/ADT/StringRef.h"
14+
#include <string>
1615

1716
struct llvm_regex;
17+
1818
namespace llvm {
19+
class StringRef;
20+
template<typename T> class SmallVectorImpl;
21+
1922
class Regex {
2023
public:
2124
enum {
@@ -54,6 +57,8 @@ namespace llvm {
5457
/// Matches.
5558
/// For this feature to be enabled you must construct the regex using
5659
/// Regex("...", Regex::Sub) constructor.
60+
///
61+
/// This returns true on a successful match.
5762
bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
5863
private:
5964
struct llvm_regex *preg;

llvm/lib/Support/Regex.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@
1414
#include "llvm/Support/Regex.h"
1515
#include "llvm/Support/ErrorHandling.h"
1616
#include "llvm/Support/raw_ostream.h"
17+
#include "llvm/ADT/SmallVector.h"
1718
#include "regex_impl.h"
1819
#include <string>
1920
using namespace llvm;
2021

2122
Regex::Regex(const StringRef &regex, unsigned Flags) {
2223
unsigned flags = 0;
23-
preg = new struct llvm_regex;
24+
preg = new llvm_regex();
2425
preg->re_endp = regex.end();
2526
if (Flags & IgnoreCase)
2627
flags |= REG_ICASE;
@@ -60,7 +61,7 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
6061
}
6162

6263
// pmatch needs to have at least one element.
63-
SmallVector<llvm_regmatch_t, 2> pm;
64+
SmallVector<llvm_regmatch_t, 8> pm;
6465
pm.resize(nmatch > 0 ? nmatch : 1);
6566
pm[0].rm_so = 0;
6667
pm[0].rm_eo = String.size();

llvm/unittests/Support/RegexTest.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include "gtest/gtest.h"
1111
#include "llvm/Support/Regex.h"
12+
#include "llvm/ADT/SmallVector.h"
1213
#include <cstring>
1314

1415
using namespace llvm;

llvm/utils/FileCheck/FileCheck.cpp

Lines changed: 116 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/Support/CommandLine.h"
2020
#include "llvm/Support/MemoryBuffer.h"
2121
#include "llvm/Support/PrettyStackTrace.h"
22+
#include "llvm/Support/Regex.h"
2223
#include "llvm/Support/SourceMgr.h"
2324
#include "llvm/Support/raw_ostream.h"
2425
#include "llvm/System/Signals.h"
@@ -44,8 +45,9 @@ NoCanonicalizeWhiteSpace("strict-whitespace",
4445
//===----------------------------------------------------------------------===//
4546

4647
class Pattern {
47-
/// Str - The string to match.
48-
StringRef Str;
48+
/// Chunks - The pattern chunks to match. If the bool is false, it is a fixed
49+
/// string match, if it is true, it is a regex match.
50+
SmallVector<std::pair<StringRef, bool>, 4> Chunks;
4951
public:
5052

5153
Pattern() { }
@@ -55,10 +57,7 @@ class Pattern {
5557
/// Match - Match the pattern string against the input buffer Buffer. This
5658
/// returns the position that is matched or npos if there is no match. If
5759
/// there is a match, the size of the matched string is returned in MatchLen.
58-
size_t Match(StringRef Buffer, size_t &MatchLen) const {
59-
MatchLen = Str.size();
60-
return Buffer.find(Str);
61-
}
60+
size_t Match(StringRef Buffer, size_t &MatchLen) const;
6261
};
6362

6463
bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
@@ -74,11 +73,117 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
7473
"error");
7574
return true;
7675
}
76+
77+
// Scan the pattern to break it into regex and non-regex pieces.
78+
while (!PatternStr.empty()) {
79+
// Handle fixed string matches.
80+
if (PatternStr.size() < 2 ||
81+
PatternStr[0] != '{' || PatternStr[1] != '{') {
82+
// Find the end, which is the start of the next regex.
83+
size_t FixedMatchEnd = PatternStr.find("{{");
84+
85+
Chunks.push_back(std::make_pair(PatternStr.substr(0, FixedMatchEnd),
86+
false));
87+
PatternStr = PatternStr.substr(FixedMatchEnd);
88+
continue;
89+
}
90+
91+
// Otherwise, this is the start of a regex match. Scan for the }}.
92+
size_t End = PatternStr.find("}}");
93+
if (End == StringRef::npos) {
94+
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
95+
"found start of regex string with no end '}}'", "error");
96+
return true;
97+
}
98+
99+
Regex R(PatternStr.substr(2, End-2));
100+
std::string Error;
101+
if (!R.isValid(Error)) {
102+
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2),
103+
"invalid regex: " + Error, "error");
104+
return true;
105+
}
106+
107+
Chunks.push_back(std::make_pair(PatternStr.substr(2, End-2), true));
108+
PatternStr = PatternStr.substr(End+2);
109+
}
110+
111+
return false;
112+
}
77113

114+
/// Match - Match the pattern string against the input buffer Buffer. This
115+
/// returns the position that is matched or npos if there is no match. If
116+
/// there is a match, the size of the matched string is returned in MatchLen.
117+
size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const {
118+
size_t FirstMatch = StringRef::npos;
119+
MatchLen = 0;
78120

121+
SmallVector<StringRef, 4> MatchInfo;
79122

80-
Str = PatternStr;
81-
return false;
123+
while (!Buffer.empty()) {
124+
StringRef MatchAttempt = Buffer;
125+
126+
unsigned ChunkNo = 0, e = Chunks.size();
127+
for (; ChunkNo != e; ++ChunkNo) {
128+
StringRef PatternStr = Chunks[ChunkNo].first;
129+
130+
size_t ThisMatch = StringRef::npos;
131+
size_t ThisLength = StringRef::npos;
132+
if (!Chunks[ChunkNo].second) {
133+
// Fixed string match.
134+
ThisMatch = MatchAttempt.find(Chunks[ChunkNo].first);
135+
ThisLength = Chunks[ChunkNo].first.size();
136+
} else if (Regex(Chunks[ChunkNo].first, Regex::Sub).match(MatchAttempt, &MatchInfo)) {
137+
// Successful regex match.
138+
assert(!MatchInfo.empty() && "Didn't get any match");
139+
StringRef FullMatch = MatchInfo[0];
140+
MatchInfo.clear();
141+
142+
ThisMatch = FullMatch.data()-MatchAttempt.data();
143+
ThisLength = FullMatch.size();
144+
}
145+
146+
// Otherwise, what we do depends on if this is the first match or not. If
147+
// this is the first match, it doesn't match to match at the start of
148+
// MatchAttempt.
149+
if (ChunkNo == 0) {
150+
// If the first match fails then this pattern will never match in
151+
// Buffer.
152+
if (ThisMatch == StringRef::npos)
153+
return ThisMatch;
154+
155+
FirstMatch = ThisMatch;
156+
MatchAttempt = MatchAttempt.substr(FirstMatch);
157+
ThisMatch = 0;
158+
}
159+
160+
// If this chunk didn't match, then the entire pattern didn't match from
161+
// FirstMatch, try later in the buffer.
162+
if (ThisMatch == StringRef::npos)
163+
break;
164+
165+
// Ok, if the match didn't match at the beginning of MatchAttempt, then we
166+
// have something like "ABC{{DEF}} and something was in-between. Reject
167+
// the match.
168+
if (ThisMatch != 0)
169+
break;
170+
171+
// Otherwise, match the string and move to the next chunk.
172+
MatchLen += ThisLength;
173+
MatchAttempt = MatchAttempt.substr(ThisLength);
174+
}
175+
176+
// If the whole thing matched, we win.
177+
if (ChunkNo == e)
178+
return FirstMatch;
179+
180+
// Otherwise, try matching again after FirstMatch to see if this pattern
181+
// matches later in the buffer.
182+
Buffer = Buffer.substr(FirstMatch+1);
183+
}
184+
185+
// If we ran out of stuff to scan, then we didn't match.
186+
return StringRef::npos;
82187
}
83188

84189

@@ -367,14 +472,14 @@ int main(int argc, char **argv) {
367472

368473
// If this match had "not strings", verify that they don't exist in the
369474
// skipped region.
370-
for (unsigned i = 0, e = CheckStr.NotStrings.size(); i != e; ++i) {
475+
for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) {
371476
size_t MatchLen = 0;
372-
size_t Pos = CheckStr.NotStrings[i].second.Match(SkippedRegion, MatchLen);
477+
size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen);
373478
if (Pos == StringRef::npos) continue;
374479

375480
SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
376481
CheckPrefix+"-NOT: string occurred!", "error");
377-
SM.PrintMessage(CheckStr.NotStrings[i].first,
482+
SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
378483
CheckPrefix+"-NOT: pattern specified here", "note");
379484
return 1;
380485
}

0 commit comments

Comments
 (0)