-
Notifications
You must be signed in to change notification settings - Fork 138
/
Grammar.pir
238 lines (189 loc) · 5.75 KB
/
Grammar.pir
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# Copyright (C) 2007-2008, Parrot Foundation.
=head1 NAME
PCT::Grammar - base grammar with useful rules
=head1 SYNOPSIS
grammar MyGrammar is PCT::Grammar;
rule abc { [ word | <panic: word not found> ] }
rule quote {
[ \' <string_literal: '> \'
| \" <string_literal: "> \"
]
}
=head1 DESCRIPTION
This file implements C<PCT::Grammar>, which is a basic grammar object
with a few useful methods for parsing thrown in.
=head2 Methods
=over 4
=item panic(match [, message, ...] )
Throws an exception at the current point of the match, with message
as part of the exception payload. The message doesn't end with
a newline, then the line number and offset of the match are
also included.
=cut
.namespace [ 'PCT';'Grammar' ]
.sub 'onload' :anon :init :load
load_bytecode 'PGE.pbc'
load_bytecode 'PGE/Util.pbc'
.local pmc p6meta
p6meta = new 'P6metaclass'
p6meta.'new_class'('PCT::Grammar', 'parent'=>'PGE::Grammar')
$P0 = split '::', 'PCT::Grammar'
$P0 = get_class $P0
$P1 = get_hll_global ['PGE';'Util'], 'die'
$P0.'add_method'('panic', $P1)
.return ()
.end
=item FAILGOAL($goal [, 'dba'=>dba])
Invoked when goal matching fails to find the goal. Builds an appropriate
error message and delegates the rest to C<panic>.
=cut
.sub 'FAILGOAL' :method
.param string goal
.param pmc options :named :slurpy
.local string dba
dba = options['dba']
if dba goto have_dba
## if no dba supplied, use the name of the caller sub
$P0 = getinterp
$P0 = $P0['sub';1]
dba = $P0
have_dba:
.tailcall self.'panic'("Unable to parse ", dba, "; couldn't find final ", goal)
.end
=item item()
Here we overload the item() method from PGE::Match to
throw an exception if a result object hasn't been set.
=cut
.sub 'ast' :method
.local pmc obj
obj = getattribute self, '$!ast'
unless null obj goto end
die "No result object"
end:
.return (obj)
.end
=item ww()
Special-purpose rule to return true if we're in the middle
of a word -- i.e., if the previous and next character are
both "word characters". This is roughly equivalent to
C<< <?after \w><?before \w> >> except it's much quicker.
In particular, C<< <!ww> >> can be used by :sigspace rules
to enforce whitespace between lexical words.
=cut
.include 'cclass.pasm'
.sub 'ww' :method
.param pmc adverbs :slurpy :named
.local pmc mob
.local int pos
.local string target
$P0 = get_hll_global ['PGE'], 'Match'
(mob, pos, target) = $P0.'new'(self)
if pos == 0 goto fail
$I0 = is_cclass .CCLASS_WORD, target, pos
unless $I0 goto fail
$I1 = pos - 1
$I0 = is_cclass .CCLASS_WORD, target, $I1
unless $I0 goto fail
mob.'to'(pos)
fail:
.return (mob)
.end
.sub 'string_literal' :method
.param string stop
.param pmc adverbs :slurpy :named
## create a new match object, get the new match position
.local pmc mob
.local int pos, lastpos, stoplen
.local string target, escapechars
(mob, pos, target) = self.'new'(self)
lastpos = length target
stoplen = length stop
$S0 = substr stop, 0, 1
escapechars = concat "\\", $S0
## leave space for close delimiter
lastpos -= stoplen
## now initialize and loop through target
literal_init:
.local string literal, litchar
literal = ''
literal_loop:
## if we're beyond the last possible position, fail
if pos > lastpos goto fail
## if ending delimiter, then we're done
$S0 = substr target, pos, stoplen
if $S0 == stop goto literal_end
if pos >= lastpos goto fail
## get next character in literal
litchar = substr target, pos, 1
inc pos
## add non-escape characters to literal
if litchar != "\\" goto add_litchar
## look at the next character, if it's always escaped, add it and
## move on
.local string escaped
escaped = substr target, pos, 1
$I0 = index escapechars, escaped
if $I0 < 0 goto interpolated_escape
inc pos
literal .= escaped
goto literal_loop
interpolated_escape:
## if not double-quoted delim, no interpolation
if stop != '"' goto add_litchar
litchar = escaped
inc pos
$I0 = index "abefnrt0xdo", litchar
if $I0 < 0 goto add_litchar
## if it's one of "xdo", then handle that specially
if $I0 >= 8 goto literal_xdo
litchar = substr "\a\b\e\f\n\r\t\0", $I0, 1
goto add_litchar
literal_xdo:
## handle \x, \d, and \o escapes. start by converting
## the 'o', 'd', or 'x' into 8, 10, or 16 (yes, it's hack
## but it works). Then loop through the characters that
## follow to compute the integer value of the codepoint,
## and add that codepoint to our literal.
.local int base, codepoint, isbracketed
base = index ' o d x', litchar
codepoint = 0
$S0 = substr target, pos, 1
isbracketed = iseq $S0, '['
pos += isbracketed
literal_xdo_char_loop:
$S0 = substr target, pos, 1
$I0 = index '0123456789abcdef', $S0
if $I0 < 0 goto literal_xdo_char_end
if $I0 >= base goto literal_xdo_char_end
codepoint *= base
codepoint += $I0
inc pos
goto literal_xdo_char_loop
literal_xdo_char_end:
$S1 = chr codepoint
literal = concat literal, $S1
unless isbracketed goto literal_xdo_end
if $S0 == ']' goto literal_xdo_end
if $S0 != ',' goto fail
inc pos
codepoint = 0
goto literal_xdo_char_loop
literal_xdo_end:
pos += isbracketed
goto literal_loop
add_litchar:
literal .= litchar
goto literal_loop
literal_end:
mob.'to'(pos)
mob.'!make'(literal)
.return (mob)
fail:
mob.'to'(-1)
.return (mob)
.end
# Local Variables:
# mode: pir
# fill-column: 100
# End:
# vim: expandtab shiftwidth=4 ft=pir: