forked from rebolsource/r3
/
maxmatch.parse.test.reb
312 lines (286 loc) · 10.9 KB
/
maxmatch.parse.test.reb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
; %maxmatch.parse.test.reb
;
; MAXMATCH was a thought experiment proposed as a way of illustrating that
; there can be multiple ideas for the "rollback" semantics of certain
; constructs--so there has to be a way for the combinator to decide vs.
; just assuming a simple rule. We try to offer the best of both worlds
; with a simple automatic rollback mechanism that can be overridden if
; the combinator expresses an intent to be directly involved.
;
; It's not intended to be particularly useful--it doesn't have an obvious
; right answer if the matches are both equal length. But it's the kind of
; combinator that a user could write and use off the cuff.
; In the first variation of the combinator we will use the default
; "rollback" mechanism. This happens when you do not mention the
; pending list of accrued items at all.
[(
maxmatch-D: combinator [ ; "(D)efault"
{Match maximum of two rules, keeping side effects of both if match}
return: "Result of the longest match (favors first parser if equal)"
[<opt> any-value!]
parser1 [action!]
parser2 [action!]
<local> result1' result2' remainder1 remainder2
][
[^result1' remainder1]: parser1 input
[^result2' remainder2]: parser2 input
if null? result2' [ ; parser2 didn't succeed
if null? result1' [return null] ; neither succeeded
] else [ ; parser2 succeeded
any [
null? result1'
(index of remainder1) < (index of remainder2)
] then [
set remainder remainder2
return unmeta result2'
]
]
set remainder remainder1
return unmeta result1'
]
true
)
; NON COLLECT VARIATIONS
; eight "a", so it's possible to get 4 matches of 2 "a" in but only
; 2 matches of 3 "a".
;
(4 = uparse "aaaaaaaa" [maxmatch-D [tally 2 "a"] [tally 3 "a"]])
(4 = uparse "aaaaaaaa" [maxmatch-D [tally 3 "a"] [tally 2 "a"]])
; With 9, it's the 3 "a" rule that gets further than the 2 "a" rule
; Just for fun show different bracketing options. :-)
;
(3 = uparse "aaaaaaaaa" [maxmatch-D tally 2 "a" tally 3 "a"])
(3 = uparse "aaaaaaaaa" [maxmatch-D tally [3 "a"] tally [2 "a"]])
; As long as one rule succeeds, it's the longest match
;
(4 = uparse "aaaaaaaa" [maxmatch-D [tally 2 "a"] [some "a" some "b"]])
(4 = uparse "aaaaaaaa" [maxmatch-D [some "a" some "b"] [tally 2 "a"]])
; If neither rule succeeds the maxmatch fails
;
(null = uparse "aaaaaaaa" [maxmatch-D [100 "a"] [some "a" some "b"]])
(null = uparse "aaaaaaaa" [maxmatch-D [some "a" some "b"] [100 "a"]])
; COLLECT VARIATIONS - DEMONSTRATE THE AUTOMATIC ROLLBACK VARIANT
;
; If a parser is successful its results are kept, if it fails then
; not. This does not account for the potential subtlety that the
; cretor of MAXMATCH might have wanted the less matching combinator
; to have its accrued results disregarded. That requires using the
; manual rollback interface.
; eight "a", so it's possible to get 4 matches of 2 "a" in but only
; 2 matches of 3 "a".
; (Both rules are successful, so by default both results are kept)
;
(["aa" "aa" "aa" "aa" "aaa" "aaa"] = uparse "aaaaaaaa" [
collect [
maxmatch-D [some keep across 2 "a"] [some keep across 3 "a"]
]
])
(["aaa" "aaa" "aa" "aa" "aa" "aa"] = uparse "aaaaaaaa" [
collect [
maxmatch-D [some keep across 3 "a"] [some keep across 2 "a"]
]
])
; With 9, it's the 3 "a" rule that gets further than the 2 "a" rule
; Just for fun show different bracketing options. :-)
; (Both rules are successful, so by default both results are kept)
;
(["aa" "aa" "aa" "aa" "aaa" "aaa" "aaa"] = uparse "aaaaaaaaa" [
collect [
maxmatch-D some [keep across 2 "a"] some [keep across 3 "a"]
]
])
(["aaa" "aaa" "aaa" "aa" "aa" "aa" "aa"] = uparse "aaaaaaaaa" [
collect [
maxmatch-D [some keep [across 3 "a"]] [some keep [across 2 "a"]]
]
])
; As long as one rule succeeds, it's the longest match
; (Failing rule has its results discarded, automatically)
;
(["aa" "aa" "aa" "aa"] = uparse "aaaaaaaa" [
collect [
maxmatch-D [some keep across 2 "a"] [some keep "a" some keep "b"]
]
])
(["aa" "aa" "aa" "aa"] = uparse "aaaaaaaa" [
collect [
maxmatch-D [some keep "a" some keep "b"] [some keep across 2 "a"]
]
])
; If neither rule succeeds the maxmatch fails
; (Nothing is collected, returns null)
;
(null = uparse "aaaaaaaa" [
collect [
maxmatch-D [100 keep "a"] [some keep "a" some keep "b"]
]
])
(null = uparse "aaaaaaaa" [
collect [
maxmatch-D [some keep "a" some keep "b"] [100 keep "a"]
]
])
; Mix it up with both a collect and a gather in effect...
; (The z and "aaa" triples from shorter match are still in final result)
(
g: ~
did all [
["aa" "aa" "aa" "aaa" "aaa"] = uparse "aaaaaaaa" [collect [
g: gather [
maxmatch-D [
3 keep across 2 "a"
emit x: ["a" (10)] emit y: ["a" (20)]
] [
emit z: (304)
some keep across 3 "a"
]
]
]]
g = make object! [
x: 10
y: 20
z: 304
]
]
)
]
; In the second variation of the combinator we will get involved directly with
; "rollback" and ask to not use the contributions from a successful parser
; if it was not the maximum match. This involves becoming manually involved
; with `pending`, both as a return result and as a parameter to the parsers
; that are called.
[(
maxmatch-C: combinator [ ; "(C)ustom"
{Match maximum of two rules, keeping side effects of both if match}
return: "Result of the longest match (favors first parser if equal)"
[<opt> any-value!]
pending: [blank! block!]
parser1 [action!]
parser2 [action!]
<local> result1' result2' remainder1 remainder2 pending1 pending2
][
[^result1' remainder1 pending1]: parser1 input
[^result2' remainder2 pending2]: parser2 input
if null? result2' [ ; parser2 didn't succeed
if null? result1' [return null] ; neither succeeded
] else [ ; parser2 succeeded
any [
null? result1'
(index of remainder1) < (index of remainder2)
] then [
set remainder remainder2
set pending pending2
return unmeta result2'
]
]
set remainder remainder1
set pending pending1
return unmeta result1'
]
true
)
; NON COLLECT VARIATIONS
; eight "a", so it's possible to get 4 matches of 2 "a" in but only
; 2 matches of 3 "a".
;
(4 = uparse "aaaaaaaa" [maxmatch-C [tally 2 "a"] [tally 3 "a"]])
(4 = uparse "aaaaaaaa" [maxmatch-C [tally 3 "a"] [tally 2 "a"]])
; With 9, it's the 3 "a" rule that gets further than the 2 "a" rule
; Just for fun show different bracketing options. :-)
;
(3 = uparse "aaaaaaaaa" [maxmatch-C tally 2 "a" tally 3 "a"])
(3 = uparse "aaaaaaaaa" [maxmatch-C tally [3 "a"] tally [2 "a"]])
; As long as one rule succeeds, it's the longest match
;
(4 = uparse "aaaaaaaa" [maxmatch-C [tally 2 "a"] [some "a" some "b"]])
(4 = uparse "aaaaaaaa" [maxmatch-C [some "a" some "b"] [tally 2 "a"]])
; If neither rule succeeds the maxmatch fails
;
(null = uparse "aaaaaaaa" [maxmatch-C [100 "a"] [some "a" some "b"]])
(null = uparse "aaaaaaaa" [maxmatch-C [some "a" some "b"] [100 "a"]])
; COLLECT VARIATIONS - DEMONSTRATE THE AUTOMATIC ROLLBACK VARIANT
;
; If a parser is successful its results are kept, if it fails then
; not. This does not account for the potential subtlety that the
; cretor of MAXMATCH might have wanted the less matching combinator
; to have its accrued results disregarded. That requires using the
; manual rollback interface.
; eight "a", so it's possible to get 4 matches of 2 "a" in but only
; 2 matches of 3 "a".
; (This version of maxmatch only keeps the maxmatch's contributions; the
; lesser match--though successful--has its contributions discarded)
;
(["aa" "aa" "aa" "aa"] = uparse "aaaaaaaa" [
collect [
maxmatch-C [some keep across 2 "a"] [some keep across 3 "a"]
]
])
(["aa" "aa" "aa" "aa"] = uparse "aaaaaaaa" [
collect [
maxmatch-C [some keep across 3 "a"] [some keep across 2 "a"]
]
])
; With 9, it's the 3 "a" rule that gets further than the 2 "a" rule
; Just for fun show different bracketing options. :-)
; (This version of maxmatch only keeps the maxmatch's contributions; the
; lesser match--though successful--has its contributions discarded)
;
(["aaa" "aaa" "aaa"] = uparse "aaaaaaaaa" [
collect [
maxmatch-C some [keep across 2 "a"] some [keep across 3 "a"]
]
])
(["aaa" "aaa" "aaa"] = uparse "aaaaaaaaa" [
collect [
maxmatch-C [some keep [across 3 "a"]] [some keep [across 2 "a"]]
]
])
; As long as one rule succeeds, it's the longest match
; (Failing rule has its results discarded, that's always the case)
;
(["aa" "aa" "aa" "aa"] = uparse "aaaaaaaa" [
collect [
maxmatch-C [some keep across 2 "a"] [some keep "a" some keep "b"]
]
])
(["aa" "aa" "aa" "aa"] = uparse "aaaaaaaa" [
collect [
maxmatch-C [some keep "a" some keep "b"] [some keep across 2 "a"]
]
])
; If neither rule succeeds the maxmatch fails
; (Nothing is collected, returns null)
;
(null = uparse "aaaaaaaa" [
collect [
maxmatch-C [100 keep "a"] [some keep "a" some keep "b"]
]
])
(null = uparse "aaaaaaaa" [
collect [
maxmatch-C [some keep "a" some keep "b"] [100 keep "a"]
]
])
; Mix it up with both a collect and a gather in effect...
; (The z and "aaa" triples from shorter match won't be in the final result)
(
g: ~
did all [
["aa" "aa" "aa"] = uparse "aaaaaaaa" [collect [
g: gather [
maxmatch-C [
3 keep across 2 "a"
emit x: ["a" (10)] emit y: ["a" (20)]
] [
emit z: (304)
some keep across 3 "a"
]
]
]]
g = make object! [
x: 10
y: 20
]
]
)
]