1919#include " llvm/Support/CommandLine.h"
2020#include " llvm/Support/MemoryBuffer.h"
2121#include " llvm/Support/PrettyStackTrace.h"
22+ #include " llvm/Support/Regex.h"
2223#include " llvm/Support/SourceMgr.h"
2324#include " llvm/Support/raw_ostream.h"
2425#include " llvm/System/Signals.h"
@@ -44,8 +45,9 @@ NoCanonicalizeWhiteSpace("strict-whitespace",
4445// ===----------------------------------------------------------------------===//
4546
4647class Pattern {
47- // / Str - The string to match.
48- StringRef Str;
48+ // / Chunks - The pattern chunks to match. If the bool is false, it is a fixed
49+ // / string match, if it is true, it is a regex match.
50+ SmallVector<std::pair<StringRef, bool >, 4 > Chunks;
4951public:
5052
5153 Pattern () { }
@@ -55,10 +57,7 @@ class Pattern {
5557 // / Match - Match the pattern string against the input buffer Buffer. This
5658 // / returns the position that is matched or npos if there is no match. If
5759 // / there is a match, the size of the matched string is returned in MatchLen.
58- size_t Match (StringRef Buffer, size_t &MatchLen) const {
59- MatchLen = Str.size ();
60- return Buffer.find (Str);
61- }
60+ size_t Match (StringRef Buffer, size_t &MatchLen) const ;
6261};
6362
6463bool Pattern::ParsePattern (StringRef PatternStr, SourceMgr &SM) {
@@ -74,11 +73,117 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
7473 " error" );
7574 return true ;
7675 }
76+
77+ // Scan the pattern to break it into regex and non-regex pieces.
78+ while (!PatternStr.empty ()) {
79+ // Handle fixed string matches.
80+ if (PatternStr.size () < 2 ||
81+ PatternStr[0 ] != ' {' || PatternStr[1 ] != ' {' ) {
82+ // Find the end, which is the start of the next regex.
83+ size_t FixedMatchEnd = PatternStr.find (" {{" );
84+
85+ Chunks.push_back (std::make_pair (PatternStr.substr (0 , FixedMatchEnd),
86+ false ));
87+ PatternStr = PatternStr.substr (FixedMatchEnd);
88+ continue ;
89+ }
90+
91+ // Otherwise, this is the start of a regex match. Scan for the }}.
92+ size_t End = PatternStr.find (" }}" );
93+ if (End == StringRef::npos) {
94+ SM.PrintMessage (SMLoc::getFromPointer (PatternStr.data ()),
95+ " found start of regex string with no end '}}'" , " error" );
96+ return true ;
97+ }
98+
99+ Regex R (PatternStr.substr (2 , End-2 ));
100+ std::string Error;
101+ if (!R.isValid (Error)) {
102+ SM.PrintMessage (SMLoc::getFromPointer (PatternStr.data ()+2 ),
103+ " invalid regex: " + Error, " error" );
104+ return true ;
105+ }
106+
107+ Chunks.push_back (std::make_pair (PatternStr.substr (2 , End-2 ), true ));
108+ PatternStr = PatternStr.substr (End+2 );
109+ }
110+
111+ return false ;
112+ }
77113
114+ // / Match - Match the pattern string against the input buffer Buffer. This
115+ // / returns the position that is matched or npos if there is no match. If
116+ // / there is a match, the size of the matched string is returned in MatchLen.
117+ size_t Pattern::Match (StringRef Buffer, size_t &MatchLen) const {
118+ size_t FirstMatch = StringRef::npos;
119+ MatchLen = 0 ;
78120
121+ SmallVector<StringRef, 4 > MatchInfo;
79122
80- Str = PatternStr;
81- return false ;
123+ while (!Buffer.empty ()) {
124+ StringRef MatchAttempt = Buffer;
125+
126+ unsigned ChunkNo = 0 , e = Chunks.size ();
127+ for (; ChunkNo != e; ++ChunkNo) {
128+ StringRef PatternStr = Chunks[ChunkNo].first ;
129+
130+ size_t ThisMatch = StringRef::npos;
131+ size_t ThisLength = StringRef::npos;
132+ if (!Chunks[ChunkNo].second ) {
133+ // Fixed string match.
134+ ThisMatch = MatchAttempt.find (Chunks[ChunkNo].first );
135+ ThisLength = Chunks[ChunkNo].first .size ();
136+ } else if (Regex (Chunks[ChunkNo].first , Regex::Sub).match (MatchAttempt, &MatchInfo)) {
137+ // Successful regex match.
138+ assert (!MatchInfo.empty () && " Didn't get any match" );
139+ StringRef FullMatch = MatchInfo[0 ];
140+ MatchInfo.clear ();
141+
142+ ThisMatch = FullMatch.data ()-MatchAttempt.data ();
143+ ThisLength = FullMatch.size ();
144+ }
145+
146+ // Otherwise, what we do depends on if this is the first match or not. If
147+ // this is the first match, it doesn't match to match at the start of
148+ // MatchAttempt.
149+ if (ChunkNo == 0 ) {
150+ // If the first match fails then this pattern will never match in
151+ // Buffer.
152+ if (ThisMatch == StringRef::npos)
153+ return ThisMatch;
154+
155+ FirstMatch = ThisMatch;
156+ MatchAttempt = MatchAttempt.substr (FirstMatch);
157+ ThisMatch = 0 ;
158+ }
159+
160+ // If this chunk didn't match, then the entire pattern didn't match from
161+ // FirstMatch, try later in the buffer.
162+ if (ThisMatch == StringRef::npos)
163+ break ;
164+
165+ // Ok, if the match didn't match at the beginning of MatchAttempt, then we
166+ // have something like "ABC{{DEF}} and something was in-between. Reject
167+ // the match.
168+ if (ThisMatch != 0 )
169+ break ;
170+
171+ // Otherwise, match the string and move to the next chunk.
172+ MatchLen += ThisLength;
173+ MatchAttempt = MatchAttempt.substr (ThisLength);
174+ }
175+
176+ // If the whole thing matched, we win.
177+ if (ChunkNo == e)
178+ return FirstMatch;
179+
180+ // Otherwise, try matching again after FirstMatch to see if this pattern
181+ // matches later in the buffer.
182+ Buffer = Buffer.substr (FirstMatch+1 );
183+ }
184+
185+ // If we ran out of stuff to scan, then we didn't match.
186+ return StringRef::npos;
82187}
83188
84189
@@ -367,14 +472,14 @@ int main(int argc, char **argv) {
367472
368473 // If this match had "not strings", verify that they don't exist in the
369474 // skipped region.
370- for (unsigned i = 0 , e = CheckStr.NotStrings .size (); i != e; ++i ) {
475+ for (unsigned ChunkNo = 0 , e = CheckStr.NotStrings .size (); ChunkNo != e; ++ChunkNo ) {
371476 size_t MatchLen = 0 ;
372- size_t Pos = CheckStr.NotStrings [i ].second .Match (SkippedRegion, MatchLen);
477+ size_t Pos = CheckStr.NotStrings [ChunkNo ].second .Match (SkippedRegion, MatchLen);
373478 if (Pos == StringRef::npos) continue ;
374479
375480 SM.PrintMessage (SMLoc::getFromPointer (LastMatch+Pos),
376481 CheckPrefix+" -NOT: string occurred!" , " error" );
377- SM.PrintMessage (CheckStr.NotStrings [i ].first ,
482+ SM.PrintMessage (CheckStr.NotStrings [ChunkNo ].first ,
378483 CheckPrefix+" -NOT: pattern specified here" , " note" );
379484 return 1 ;
380485 }
0 commit comments