-
Notifications
You must be signed in to change notification settings - Fork 47
/
parser.dart
447 lines (407 loc) · 16.6 KB
/
parser.dart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
// Copyright (c) 2013, Lukas Renggli <renggli@gmail.com>
part of petitparser;
/**
* Abstract base class of all parsers.
*/
abstract class Parser {
/**
* Primitive method doing the actual parsing.
*
* The method is overridden in concrete subclasses to implement the
* parser specific logic. The methods takes a parse [context] and
* returns the resulting context, which is either a [Success] or
* [Failure] context.
*/
Result parseOn(Context context);
/**
* Returns the parse result of the [input].
*
* The implementation creates a default parse context on the input and calls
* the internal parsing logic of the receiving parser.
*
* For example, [:letter().plus().parse('abc'):] results in an instance of
* [Success], where [Result.position] is [:3:] and [Success.value] is
* [:[a, b, c]:].
*
* Similarly, [:letter().plus().parse('123'):] results in an instance of
* [Failure], where [Result.position] is [:0:] and [Failure.message] is
* ['letter expected'].
*/
Result parse(dynamic input) {
return parseOn(new Context(input, 0));
}
/**
* Tests if the [input] can be successfully parsed.
*
* For example, [:letter().plus().accept('abc'):] returns [:true:], and
* [:letter().plus().accept('123'):] returns [:false:].
*/
bool accept(dynamic input) {
return parse(input).isSuccess;
}
/**
* Returns a list of all successful overlapping parses of the [input].
*
* For example, [:letter().plus().matches('abc de'):] results in the list
* [:[['a', 'b', 'c'], ['b', 'c'], ['c'], ['d', 'e'], ['e']]:]. See
* [Parser.matchesSkipping] to retrieve non-overlapping parse results.
*/
Iterable matches(dynamic input) {
var list = new List();
and().map((each) => list.add(each)).seq(any()).or(any()).star().parse(input);
return list;
}
/**
* Returns a list of all successful non-overlapping parses of the input.
*
* For example, [:letter().plus().matchesSkipping('abc de'):] results in the
* list [:[['a', 'b', 'c'], ['d', 'e']]:]. See [Parser.matches] to retrieve
* overlapping parse results.
*/
Iterable matchesSkipping(dynamic input) {
var list = new List();
map((each) => list.add(each)).or(any()).star().parse(input);
return list;
}
/**
* Returns new parser that accepts the receiver, if possible. The resulting
* parser returns the result of the receiver, or [:null:] if not applicable.
* The returned value can be provided as an optional argument [otherwise].
*
* For example, the parser [:letter().optional():] accepts a letter as input
* and returns that letter. When given something else the parser succeeds as
* well, does not consume anything and returns [:null:].
*/
Parser optional([dynamic otherwise]) => new _OptionalParser(this, otherwise);
/**
* Returns a parser that accepts the receiver zero or more times. The
* resulting parser returns a list of the parse results of the receiver.
*
* This is a greedy and blind implementation that tries to consume as much
* input as possible and that does not consider what comes afterwards.
*
* For example, the parser [:letter().star():] accepts the empty string or
* any sequence of letters and returns a possibly empty list of the parsed
* letters.
*/
Parser star() => repeat(0, 65536);
/**
* Returns a parser that parses the receiver zero or more times until it
* reaches a [limit]. This is a greedy non-blind implementation of the
* [Parser.star] operator. The [limit] is not consumed.
*/
Parser starGreedy(Parser limit) => repeatGreedy(limit, 0, 65536);
/**
* Returns a parser that parses the receiver zero or more times until it
* reaches a [limit]. This is a lazy non-blind implementation of the
* [Parser.star] operator. The [limit] is not consumed.
*/
Parser starLazy(Parser limit) => repeatLazy(limit, 0, 65536);
/**
* Returns a parser that accepts the receiver one or more times. The
* resulting parser returns a list of the parse results of the receiver.
*
* This is a greedy and blind implementation that tries to consume as much
* input as possible and that does not consider what comes afterwards.
*
* For example, the parser [:letter().plus():] accepts any sequence of
* letters and returns a list of the parsed letters.
*/
Parser plus() => repeat(1, 65536);
/**
* Returns a parser that parses the receiver one or more times until it
* reaches [limit]. This is a greedy non-blind implementation of the
* [Parser.plus] operator. The [limit] is not consumed.
*/
Parser plusGreedy(Parser limit) => repeatGreedy(limit, 1, 65536);
/**
* Returns a parser that parses the receiver zero or more times until it
* reaches a [limit]. This is a lazy non-blind implementation of the
* [Parser.plus] operator. The [limit] is not consumed.
*/
Parser plusLazy(Parser limit) => repeatLazy(limit, 1, 65536);
/**
* Returns a parser that accepts the receiver between [min] and [max] times.
* The resulting parser returns a list of the parse results of the receiver.
*
* This is a greedy and blind implementation that tries to consume as much
* input as possible and that does not consider what comes afterwards.
*
* For example, the parser [:letter().repeat(2, 4):] accepts a sequence of
* two, three, or four letters and returns the accepted letters as a list.
*/
Parser repeat(int min, int max) => new _PossessiveRepeatingParser(this, min, max);
/**
* Returns a parser that parses the receiver at least [min] and at most [max]
* times until it reaches a [limit]. This is a greedy non-blind implementation of
* the [Parser.repeat] operator. The [limit] is not consumed.
*/
Parser repeatGreedy(Parser limit, int min, int max) {
return new _GreedyRepeatingParser(this, limit, min, max);
}
/**
* Returns a parser that parses the receiver at least [min] and at most [max]
* times until it reaches a [limit]. This is a lazy non-blind implementation of
* the [Parser.repeat] operator. The [limit] is not consumed.
*/
Parser repeatLazy(Parser limit, int min, int max) {
return new _LazyRepeatingParser(this, limit, min, max);
}
/**
* Returns a parser that accepts the receiver exactly [count] times. The
* resulting parser returns a list of the parse results of the receiver.
*
* For example, the parser [:letter().times(2):] accepts two letters and
* returns a list of the two parsed letters.
*/
Parser times(int count) => repeat(count, count);
/**
* Returns a parser that accepts the receiver followed by [other]. The
* resulting parser returns a list of the parse result of the receiver
* followed by the parse result of [other]. Calling this method on an
* existing sequence code not nest this sequence into a new one, but
* instead augments the existing sequence with [other].
*
* For example, the parser [:letter().seq(digit()).seq(letter()):] accepts a
* letter followed by a digit and another letter. The parse result of the
* input string [:'a1b':] is the list [:['a', '1', 'b']:].
*/
Parser seq(Parser other) => new _SequenceParser([this, other]);
/**
* Convenience operator returning a parser accepts the receiver followed
* by [other]. See [Parser.seq] for details.
*/
Parser operator & (Parser other) => this.seq(other);
/**
* Returns a parser that accepts the receiver or [other]. The resulting
* parser returns the parse result of the receiver, if the receiver fails
* it returns the parse result of [other] (exclusive ordered choice).
*
* For example, the parser [:letter().or(digit()):] accepts a letter or a
* digit. An example where the order matters is the following choice between
* overlapping parsers: [:letter().or(char('a')):]. In the example the parser
* [:char('a'):] will never be activated, because the input is always consumed
* [:letter():]. This can be problematic if the author intended to attach a
* production action to [:char('a'):].
*/
Parser or(Parser other) => new _ChoiceParser([this, other]);
/**
* Convenience operator returning a parser accepts the receiver or
* [other]. See [Parser.or] for details.
*/
Parser operator | (Parser other) => this.or(other);
/**
* Returns a parser (logical and-predicate) that succeeds whenever the
* receiver does, but never consumes input.
*
* For example, the parser [:char('_').and().seq(identifier):] accepts
* identifiers that start with an underscore character. Since the predicate
* does not consume accepted input, the parser [:identifier:] is given the
* ability to process the complete identifier.
*/
Parser and() => new _AndParser(this);
/**
* Returns a parser (logical not-predicate) that succeeds whenever the
* receiver fails, but never consumes input.
*
* For example, the parser [:char('_').not().seq(identifier):] accepts
* identifiers that do not start with an underscore character. If the parser
* [:char('_'):] accepts the input, the negation and subsequently the
* complete parser fails. Otherwise the parser [:identifier:] is given the
* ability to process the complete identifier.
*/
Parser not([String message]) => new _NotParser(this, message);
/**
* Returns a parser that consumes any input token (character), but the
* receiver.
*
* For example, the parser [:letter().neg():] accepts any input but a letter.
* The parser fails for inputs like [:'a':] or [:'Z':], but succeeds for
* input like [:'1':], [:'_':] or [:'$':].
*/
Parser neg([String message]) => not(message).seq(any()).pick(1);
/**
* Returns a parser that discards the result of the receiver, and returns
* a sub-string of the consumed elements in the string/list being parsed.
*
* For example, the parser [:letter().plus().flatten():] returns [:'abc':]
* for the input [:'abc':]. In contrast, the parser [:letter().plus():] would
* return [:['a', 'b', 'c']:] for the same input instead.
*/
Parser flatten() => new _FlattenParser(this);
/**
* Returns a parser that discards the result of the receiver and returns
* a [Token]. The token carries information about where the token started and
* stopped in the input stream.
*
* For example, the parser [:letter().plus().token():] returns the token
* [:Token[start: 0, stop: 3, value: abc]:] for the input [:'abc':].
*/
Parser token() => new _TokenParser(this);
/**
* Returns a parser that consumes input before and after the receiver. The
* optional argument [trimmer] is a parser that consumes the excess input. By
* default [:whitespace():] is used.
*
* For example, the parser [:letter().plus().trim():] returns [:['a', 'b']:]
* for the input [:' ab\n':] and consumes the complete input string.
*/
Parser trim([Parser trimmer]) {
return new _TrimmingParser(this, trimmer == null ? whitespace() : trimmer);
}
/**
* Returns a parser that succeeds only if the receiver consumes the complete
* input, otherwise return a failure with the optional [message].
*
* For example, the parser [:letter().end():] succeeds on the input [:'a':]
* and fails on [:'ab':]. In contrast the parser [:letter():] alone would
* succeed on both inputs, but not consume everything for the second input.
*/
Parser end([String message = 'end of input expected']) {
return new _EndOfInputParser(this, message);
}
/**
* Returns a parser that points to the receiver, but can be changed to point
* to something else at a later point in time.
*
* For example, the parser [:letter().setable():] behaves exactly the same
* as [:letter():], but it can be replaced with another parser using
* [SetableParser.set].
*/
SetableParser setable() => new _SetableParser(this);
/**
* Returns a parser that evaluates [function] as action handler on success
* of the receiver.
*
* For example, the parser [:digit().map((char) => int.parse(char)):] returns
* the number [:1:] for the input string [:'1':].
*/
Parser map(Function function) => new _ActionParser(this, function);
/**
* Returns a parser that transform a successful parse result by returning
* the element at [index] of a list. A negative index can be used to access
* the elements from the back of the list.
*
* For example, the parser [:letter().star().pick(-1):] returns the last
* letter parsed. For the input [:'abc':] it returns [:'c':].
*/
Parser pick(int index) {
return this.map((List list) {
return list[index < 0 ? list.length + index : index];
});
}
/**
* Returns a parser that transforms a successful parse result by returning
* the permuted elements at [indexes] of a list. Negative indexes can be
* used to access the elements from the back of the list.
*
* For example, the parser [:letter().star().permute([0, -1]):] returns the
* first and last letter parsed. For the input [:'abc':] it returns
* [:['a', 'c']:].
*/
Parser permute(List<int> indexes) {
return this.map((List list) {
return indexes.map((index) {
return list[index < 0 ? list.length + index : index];
}).toList();
});
}
/**
* Returns a parser that consumes the receiver one or more times separated
* by the [separator] parser. The resulting parser returns a flat list of
* the parse results of the receiver interleaved with the parse result of the
* separator parser.
*
* If the optional argument [includeSeparators] is set to [:false:], then the
* separators are not included in the parse result. If the optional argument
* [optionalSeparatorAtEnd] is set to [:true:] the parser also accepts an
* optional separator at the end.
*
* For example, the parser [:digit().separatedBy(char('-')):] returns a parser
* that consumes input like [:'1-2-3':] and returns a list of the elements and
* separators: [:['1', '-', '2', '-', '3']:].
*/
Parser separatedBy(Parser separator, {bool includeSeparators: true,
bool optionalSeparatorAtEnd: false}) {
var repeater = new _SequenceParser([separator, this]).star();
var parser = new _SequenceParser(optionalSeparatorAtEnd
? [this, repeater, separator.optional(separator)]
: [this, repeater]);
return parser.map((List list) {
var result = new List();
result.add(list[0]);
for (var tuple in list[1]) {
if (includeSeparators) {
result.add(tuple[0]);
}
result.add(tuple[1]);
}
if (includeSeparators && optionalSeparatorAtEnd
&& !identical(list[2], separator)) {
result.add(list[2]);
}
return result;
});
}
/**
* Returns a shallow copy of the receiver.
*/
Parser copy();
/**
* Recusively tests for the equality of two parsers.
*
* The code can automatically deals with recursive parsers and parsers that
* refer to other parsers. This code is supposed to be overridden by parsers
* that add other state.
*/
bool match(Parser other, [Set<Parser> seen]) {
if (seen == null) {
seen = new Set();
}
if (this == other || seen.contains(this)) {
return true;
}
seen.add(this);
return runtimeType == other.runtimeType && _matchChildren(other, seen);
}
bool _matchChildren(Parser other, [Set<Parser> seen]) {
var thisChildren = children, otherChildren = other.children;
if (thisChildren.length != otherChildren.length) {
return false;
}
for (var i = 0; i < thisChildren.length; i++) {
if (!thisChildren[i].match(otherChildren[i], seen)) {
return false;
}
}
return true;
}
/**
* Returns a list of directly referenced parsers.
*
* For example, [:letter().children:] returns the empty collection [:[]:],
* because the letter parser is a primitive or leaf parser that does not
* depend or call any other parser.
*
* In contrast, [:letter().or(digit()).children:] returns a collection
* containing both the [:letter():] and [:digit():] parser.
*/
List<Parser> get children => [];
/**
* Changes the receiver by replacing [source] with [target]. Does nothing
* if [source] does not exist in [Parser.children].
*
* The following example creates a letter parser and then defines a parser
* called [:example:] that accepts one or more letters. Eventually the parser
* [:example:] is modified by replacing the [:letter:] parser with a new
* parser that accepts a digit. The resulting [:example:] parser accepts one
* or more digits.
*
* var letter = letter();
* var example = letter.plus();
* example.replace(letter, digit());
*/
void replace(Parser source, Parser target) {
// no children, nothing to do
}
}