-
Notifications
You must be signed in to change notification settings - Fork 0
/
larkparse.py
425 lines (369 loc) · 11.7 KB
/
larkparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
import sys
from ply import *
import larklex
from core import Val, nil, true, false, SyntaxError
tokens = larklex.tokens
precedence = (
('left', 'PEVAL'),
('left', 'EQ', 'GT', 'LT', 'GTE', 'LTE', 'INEQ'),
('left', 'PLUS', 'MINUS'),
('left', 'TIMES', 'DIVIDE'),
# ('right', 'UMINUS'),
('right', 'NOT'),
)
def p_all(p):
'''all : program expression SEMI NEWLINE
| program expression SEMI
| program expression NEWLINE
| program expression
| program
| expression'''
if len(p) > 2:
p[0] = p[1] + [p[2]]
else:
if isinstance(p[1], list):
p[0] = p[1]
else:
p[0] = [p[1]]
def p_program(p):
'''program : program statement
| program NEWLINE
| statement
| NEWLINE'''
if len(p) == 3:
p[0] = p[1]
if p[2] is not None:
p[0] += [p[2]]
elif p[1] is None:
p[0] = []
else:
p[0] = [p[1]]
def p_statement(p):
'''statement : expression SEMI NEWLINE
| expression SEMI
| expression NEWLINE'''
p[0] = p[1]
# def p_program_error(p):
# '''program : error'''
# p[0] = None
# p.parser.error = 1
def p_extern_import(p):
'''statement : extern import identifier'''
p[0] = ('extern-import', p[3])
def p_import_statement(p):
'''statement : import identifier as ID
| import identifier'''
if len(p) == 5:
p[0] = ('import-as', p[2], p[4])
else:
p[0] = ('import', p[2])
def p_primary_expression(p):
'''primary_expression : evaluation
| extern_block
| param_val
| dot_op
| primitive
| LPAREN NEWLINE all NEWLINE RPAREN
| LPAREN NEWLINE all RPAREN
| LPAREN all NEWLINE RPAREN
| LPAREN all RPAREN'''
if len(p) == 6:
p[0] = ('group', p[3])
elif len(p) == 5:
if p[2] is None:
p[0] = ('group', p[3])
else:
p[0] = ('group', p[2])
elif len(p) == 4:
p[0] = ('group', p[2])
else:
p[0] = p[1]
def p_ref(p):
'''ref : HAT identifier'''
if '::' in p[2] or p[2] not in p.parser.defs[-1]:
p.parser.refs[-1].add(p[2])
p[0] = ('ref', p[2])
def p_namespace_block(p):
'''statement : namespace ID LCURLY all RCURLY'''
p[0] = ('namespace', p[2], p[4])
def p_extern_block(p):
'''extern_block : extern DOCSTRING'''
p[0] = ('extern', p[2])
def p_extern_expr(p):
'''extern_block : extern STRING'''
p[0] = ('extern-expr', p[2])
def p_expression(p):
'''expression : assignment
| conditional_expression
| loop_expression
| tuple
| additive_expression'''
p[0] = p[1]
def p_conditional_expression(p):
'''conditional_expression : if_start all else_ifs else all end
| if_start all else all end
| if_start all else_ifs end
| if_start all end''' # should include "then" here?
if len(p) == 7:
p[0] = ('cond-else', p[1], ('group', p[2]), p[3], ('group', p[5]))
elif len(p) == 6:
p[0] = ('cond-else', p[1], ('group', p[2]), ('group', p[4]))
elif len(p) == 5:
p[0] = ('cond', p[1], ('group', p[2]), p[3])
else:
p[0] = ('cond', p[1], ('group', p[2]))
def p_loop_expression(p):
'''loop_expression : loop expression all end'''
p[0] = ('loop', p[2], p[3])
def p_break_statement(p):
'''expression : break'''
p[0] = ('break',)
def p_return_statement(p):
'''expression : return expression
| return'''
if len(p) == 3:
p[0] = ('return', p[2])
else:
p[0] = ('return', nil)
def p_continue_statement(p):
'''expression : continue'''
p[0] = ('continue',)
def p_if_start(p):
'''if_start : if statement
| if expression'''
p[0] = p[2]
def p_else_ifs(p):
'''else_ifs : else_ifs elif expression all
| elif expression all'''
if len(p) == 6:
p[0] = p[1] + [(p[3], ('group', p[4]))]
else:
p[0] = [(p[2], ('group', p[3]))]
def p_additive_expression(p):
'''additive_expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression
| expression MOD expression
| expression LT expression
| expression EQ expression
| expression GT expression
| expression INEQ expression
| expression LTE expression
| expression GTE expression
| MINUS primary_expression
| NOT primary_expression
| primary_expression'''
if len(p) == 4:
p[0] = ('binary', p[2], p[1], p[3])
elif len(p) == 3:
p[0] = ('unary', p[1], p[2])
else:
p[0] = p[1]
def p_assignment(p):
'''assignment : HAT ID ASSIGN expression
| dot_op ASSIGN expression
| identifier ASSIGN expression'''
if len(p) == 4:
if p[1][0] in ['dot', 'indirect-dot']:
p[0] = ('member-assign', p[1], p[3])
else:
if '::' not in p[1]:
p.parser.defs[-1].add(p[1])
p[0] = ('assign', p[1], p[3])
else:
p.parser.refs[-1].add(p[2])
p[0] = ('upval-assign', p[2], p[4])
def p_op_assign(p):
'''assignment : identifier assignment_op expression
| dot_op assignment_op expression'''
if '::' in p[1] or p[1] not in p.parser.defs[-1]:
p.parser.refs[-1].add(p[1])
p[0] = ('op-assign', p[2][0], p[1], p[3])
def p_assignment_op(p):
'''assignment_op : PLUS_ASSIGN
| MINUS_ASSIGN
| TIMES_ASSIGN
| DIVIDE_ASSIGN'''
p[0] = p[1]
def p_param_val(p):
'''param_val : LSQUARE param_names_list RSQUARE LCURLY clear_defs all RCURLY
| LCURLY clear_defs all RCURLY'''
if len(p) == 5:
p[0] = ('pval', p[3], list(p.parser.refs.pop()))
else:
p[0] = ('pval', p[2], p[6], list(p.parser.refs.pop()))
p.parser.defs.pop()
def p_dot_op(p):
'''dot_op : primary_expression DOT LPAREN expression RPAREN
| primary_expression DOT ID
| primary_expression DOT INTEGER'''
if len(p) == 6:
p[0] = ('indirect-dot', p[1], p[4])
else:
v = p[3]
try:
v = int(v)
except ValueError:
pass
p[0] = ('dot', p[1], v)
def p_clear_defs(p):
'''clear_defs :'''
p.parser.refs.append(set())
p.parser.defs.append(set())
def p_parameters(p):
'''parameters : parameters tuple_sep expression
| ref
| expression'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1]
p[0].append(p[3])
def p_param_names_list(p):
'''param_names_list : param_names_with_defaults
| param_names'''
p[0] = p[1]
def p_param_names_with_defaults(p):
'''param_names_with_defaults : param_names_with_defaults tuple_sep default_param
| param_names tuple_sep default_param
| default_param'''
if len(p) == 4:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
def p_param_names(p):
'''param_names : param_names tuple_sep param_definition
| param_definition'''
if len(p) == 4:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
def p_default_param(p):
'''default_param : ID ASSIGN primary_expression'''
p[0] = ('default', p[1], p[3])
def p_param_definition(p):
'''param_definition : HAT ID
| ID'''
if len(p) == 3:
p[0] = ('ref', p[2])
p.parser.defs[-1].add(p[2])
else:
p[0] = ('param', p[1])
def p_evaluation(p):
'''evaluation : primary_expression param_open parameters param_close %prec PEVAL
| identifier'''
if len(p) == 5:
p[0] = ('param-eval', p[1], p[3])
else:
if '::' in p[1] or p[1] not in p.parser.defs[-1]:
p.parser.refs[-1].add(p[1])
p[0] = ('evaluation', p[1])
def p_identifier(p):
'''identifier : identifier NSSEP ID
| ID'''
if len(p) == 4:
p[0] = '{0}::{1}'.format(p[1], p[3])
else:
p[0] = p[1]
def p_param_open(p):
'''param_open : param_open NEWLINE
| LSQUARE'''
p[0] = p[1]
def p_param_close(p):
'''param_close : NEWLINE param_close
| RSQUARE'''
p[0] = ']'
def p_primitive(p):
'''primitive : numval
| stringval
| boolval
| nilval'''
p[0] = p[1]
def p_int(p):
'''numval : INTEGER'''
p[0] = Val('int', eval(p[1]))
def p_float(p):
'''numval : FLOAT'''
p[0] = Val('float', eval(p[1]))
def p_tuple(p):
'''tuple : tuple_contents NEWLINE RPAREN
| tuple_contents RPAREN
| tuple_start NEWLINE RPAREN
| tuple_start RPAREN'''
p[0] = ('tuple', p[1])
def p_tuple_contents(p):
'''tuple_contents : tuple_contents tuple_sep labelled_member
| tuple_contents tuple_sep expression
| tuple_start labelled_member
| tuple_start expression'''
if len(p) == 4:
p[0] = p[1] + [p[3]]
else:
p[0] = p[1] + [p[2]]
def p_member_label(p):
'''member_label : LPAREN additive_expression RPAREN
| STRING
| ID'''
if len(p) == 4:
p[0] = ('member-label', p[2])
else:
p[0] = ('member-label-literal', p[1])
def p_tuple_start(p):
'''tuple_start : LPAREN NEWLINE labelled_member tuple_sep
| LPAREN NEWLINE expression tuple_sep
| LPAREN labelled_member tuple_sep
| LPAREN expression tuple_sep'''
if len(p) == 5:
p[0] = [p[3]]
else:
p[0] = [p[2]]
def p_labelled_member(p):
'''labelled_member : member_label COLON NEWLINE expression
| member_label COLON expression'''
if len(p) == 5:
p[0] = ('named-member', p[1], p[4])
else:
p[0] = ('named-member', p[1], p[3])
def p_tuple_sep(p): # optional newline
'''tuple_sep : tuple_sep NEWLINE
| COMMA'''
p[0] = ','
def p_stringval(p):
'''stringval : DOCSTRING
| STRING'''
p[0] = Val('string', p[1])
def p_boolval(p):
'''boolval : true
| false'''
p[0] = true if p[1] == 'true' else false
def p_nilval(p):
'''nilval : nil'''
p[0] = nil
def p_error(p):
lines = p.lexer.lexdata.split('\n')
col = len(p.lexer.lexdata[:p.lexpos].rsplit('\n')[-1])
line = "(l{0},c{1}) unexpected token {2}\n{3}\n{4}^".format(
p.lineno, col+1, p.type, lines[p.lineno-1], ' '*col)
raise SyntaxError(line)
parser = yacc.yacc()
def parse(data, debug=0):
larklex.lexer.lineno = 1
parser.error = 0
parser.refs = [set()]
parser.defs = [set()]
p = parser.parse(data, lexer=larklex.lexer, debug=debug)
if parser.error:
return None
return p
if __name__ == '__main__':
if len(sys.argv) < 2:
try:
line = sys.stdin.readline()
print parse(line)
except KeyboardInterrupt:
pass
else:
print parse(sys.argv[1])