/
parse36.py
458 lines (411 loc) · 21.5 KB
/
parse36.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
# Copyright (c) 2016-2019 Rocky Bernstein
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
spark grammar differences over Python 3.5 for Python 3.6.
"""
from __future__ import print_function
from uncompyle6.parser import PythonParserSingle, nop_func
from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6.parsers.parse35 import Python35Parser
from uncompyle6.scanners.tok import Token
class Python36Parser(Python35Parser):
def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG):
super(Python36Parser, self).__init__(debug_parser)
self.customized = {}
def p_36misc(self, args):
"""sstmt ::= sstmt RETURN_LAST
# long except clauses in a loop can sometimes cause a JUMP_BACK to turn into a
# JUMP_FORWARD to a JUMP_BACK. And when this happens there is an additional
# ELSE added to the except_suite. With better flow control perhaps we can
# sort this out better.
except_suite ::= c_stmts_opt POP_EXCEPT jump_except ELSE
except_suite_finalize ::= SETUP_FINALLY c_stmts_opt except_var_finalize END_FINALLY
_jump ELSE
# 3.6 redoes how return_closure works. FIXME: Isolate to LOAD_CLOSURE
return_closure ::= LOAD_CLOSURE DUP_TOP STORE_NAME RETURN_VALUE RETURN_LAST
for_block ::= l_stmts_opt come_from_loops JUMP_BACK
come_from_loops ::= COME_FROM_LOOP*
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt
JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt
come_froms JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP
# 3.6 due to jump optimization, we sometimes add RETURN_END_IF where
# RETURN_VALUE is meant. Specifcally this can happen in
# ifelsestmt -> ...else_suite _. suite_stmts... (last) stmt
return ::= ret_expr RETURN_END_IF
return ::= ret_expr RETURN_VALUE COME_FROM
return_stmt_lambda ::= ret_expr RETURN_VALUE_LAMBDA COME_FROM
# A COME_FROM is dropped off because of JUMP-to-JUMP optimization
and ::= expr jmp_false expr
and ::= expr jmp_false expr jmp_false
jf_cf ::= JUMP_FORWARD COME_FROM
cf_jf_else ::= come_froms JUMP_FORWARD ELSE
conditional ::= expr jmp_false expr jf_cf expr COME_FROM
async_for_stmt ::= SETUP_LOOP expr
GET_AITER
LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST
YIELD_FROM
store
POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT DUP_TOP
LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_FALSE
POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_BLOCK
JUMP_ABSOLUTE END_FINALLY COME_FROM
for_block POP_BLOCK
COME_FROM_LOOP
stmt ::= async_for_stmt36
async_for_stmt36 ::= SETUP_LOOP expr
GET_AITER
LOAD_CONST YIELD_FROM
SETUP_EXCEPT GET_ANEXT LOAD_CONST
YIELD_FROM
store
POP_BLOCK JUMP_BACK COME_FROM_EXCEPT DUP_TOP
LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE
END_FINALLY for_block
COME_FROM
POP_TOP POP_TOP POP_TOP POP_EXCEPT
POP_TOP POP_BLOCK
COME_FROM_LOOP
async_forelse_stmt ::= SETUP_LOOP expr
GET_AITER
LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST
YIELD_FROM
store
POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT DUP_TOP
LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_FALSE
POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_BLOCK
JUMP_ABSOLUTE END_FINALLY COME_FROM
for_block POP_BLOCK
else_suite COME_FROM_LOOP
# Adds a COME_FROM_ASYNC_WITH over 3.5
# FIXME: remove corresponding rule for 3.5?
except_suite ::= c_stmts_opt COME_FROM POP_EXCEPT jump_except COME_FROM
jb_cfs ::= JUMP_BACK come_froms
ifelsestmtl ::= testexpr c_stmts_opt jb_cfs else_suitel
ifelsestmtl ::= testexpr c_stmts_opt cf_jf_else else_suitel
# In 3.6+, A sequence of statements ending in a RETURN can cause
# JUMP_FORWARD END_FINALLY to be omitted from try middle
except_return ::= POP_TOP POP_TOP POP_TOP returns
except_handler ::= JUMP_FORWARD COME_FROM_EXCEPT except_return
# Try middle following a returns
except_handler36 ::= COME_FROM_EXCEPT except_stmts END_FINALLY
stmt ::= try_except36
try_except36 ::= SETUP_EXCEPT returns except_handler36
opt_come_from_except
try_except36 ::= SETUP_EXCEPT suite_stmts
try_except36 ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
except_handler36 opt_come_from_except
# 3.6 omits END_FINALLY sometimes
except_handler36 ::= COME_FROM_EXCEPT except_stmts
except_handler36 ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts
except_handler ::= jmp_abs COME_FROM_EXCEPT except_stmts
stmt ::= tryfinally36
tryfinally36 ::= SETUP_FINALLY returns
COME_FROM_FINALLY suite_stmts
tryfinally36 ::= SETUP_FINALLY returns
COME_FROM_FINALLY suite_stmts_opt END_FINALLY
except_suite_finalize ::= SETUP_FINALLY returns
COME_FROM_FINALLY suite_stmts_opt END_FINALLY _jump
stmt ::= tryfinally_return_stmt
tryfinally_return_stmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST
COME_FROM_FINALLY
compare_chained2 ::= expr COMPARE_OP come_froms JUMP_FORWARD
"""
def customize_grammar_rules(self, tokens, customize):
# self.remove_rules("""
# """)
super(Python36Parser, self).customize_grammar_rules(tokens, customize)
self.remove_rules("""
except_handler ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM
async_for_stmt ::= SETUP_LOOP expr
GET_AITER
LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST
YIELD_FROM
store
POP_BLOCK jump_except COME_FROM_EXCEPT DUP_TOP
LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_FALSE
POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_BLOCK
JUMP_ABSOLUTE END_FINALLY COME_FROM
for_block POP_BLOCK JUMP_ABSOLUTE
COME_FROM_LOOP
async_forelse_stmt ::= SETUP_LOOP expr
GET_AITER
LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST
YIELD_FROM
store
POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT DUP_TOP
LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_FALSE
POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_BLOCK
JUMP_ABSOLUTE END_FINALLY COME_FROM
for_block pb_ja
else_suite COME_FROM_LOOP
""")
self.check_reduce['call_kw'] = 'AST'
for i, token in enumerate(tokens):
opname = token.kind
if opname == 'FORMAT_VALUE':
rules_str = """
expr ::= formatted_value1
formatted_value1 ::= expr FORMAT_VALUE
"""
self.add_unique_doc_rules(rules_str, customize)
elif opname == 'FORMAT_VALUE_ATTR':
rules_str = """
expr ::= formatted_value2
formatted_value2 ::= expr expr FORMAT_VALUE_ATTR
"""
self.add_unique_doc_rules(rules_str, customize)
elif opname == 'MAKE_FUNCTION_8':
if 'LOAD_DICTCOMP' in self.seen_ops:
# Is there something general going on here?
rule = """
dict_comp ::= load_closure LOAD_DICTCOMP LOAD_STR
MAKE_FUNCTION_8 expr
GET_ITER CALL_FUNCTION_1
"""
self.addRule(rule, nop_func)
elif 'LOAD_SETCOMP' in self.seen_ops:
rule = """
set_comp ::= load_closure LOAD_SETCOMP LOAD_STR
MAKE_FUNCTION_8 expr
GET_ITER CALL_FUNCTION_1
"""
self.addRule(rule, nop_func)
elif opname == 'BEFORE_ASYNC_WITH':
rules_str = """
stmt ::= async_with_stmt
async_with_as_stmt ::= expr
BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM
SETUP_ASYNC_WITH store
suite_stmts_opt
POP_BLOCK LOAD_CONST
COME_FROM_ASYNC_WITH
WITH_CLEANUP_START
GET_AWAITABLE LOAD_CONST YIELD_FROM
WITH_CLEANUP_FINISH END_FINALLY
stmt ::= async_with_as_stmt
async_with_stmt ::= expr
BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM
SETUP_ASYNC_WITH POP_TOP suite_stmts_opt
POP_BLOCK LOAD_CONST
COME_FROM_ASYNC_WITH
WITH_CLEANUP_START
GET_AWAITABLE LOAD_CONST YIELD_FROM
WITH_CLEANUP_FINISH END_FINALLY
"""
self.addRule(rules_str, nop_func)
elif opname.startswith('BUILD_STRING'):
v = token.attr
rules_str = """
expr ::= joined_str
joined_str ::= %sBUILD_STRING_%d
""" % ("expr " * v, v)
self.add_unique_doc_rules(rules_str, customize)
if 'FORMAT_VALUE_ATTR' in self.seen_ops:
rules_str = """
formatted_value_attr ::= expr expr FORMAT_VALUE_ATTR expr BUILD_STRING
expr ::= formatted_value_attr
"""
self.add_unique_doc_rules(rules_str, customize)
elif opname.startswith('BUILD_MAP_UNPACK_WITH_CALL'):
v = token.attr
rule = 'build_map_unpack_with_call ::= %s%s' % ('expr ' * v, opname)
self.addRule(rule, nop_func)
elif opname.startswith('BUILD_TUPLE_UNPACK_WITH_CALL'):
v = token.attr
rule = ('build_tuple_unpack_with_call ::= ' + 'expr1024 ' * int(v//1024) +
'expr32 ' * int((v//32) % 32) +
'expr ' * (v % 32) + opname)
self.addRule(rule, nop_func)
rule = ('starred ::= %s %s' % ('expr ' * v, opname))
self.addRule(rule, nop_func)
elif opname == 'SETUP_ANNOTATIONS':
# 3.6 Variable Annotations PEP 526
# This seems to come before STORE_ANNOTATION, and doesn't
# correspond to direct Python source code.
rule = """
stmt ::= SETUP_ANNOTATIONS
stmt ::= ann_assign_init_value
stmt ::= ann_assign_no_init
ann_assign_init_value ::= expr store store_annotation
ann_assign_no_init ::= store_annotation
store_annotation ::= LOAD_NAME STORE_ANNOTATION
store_annotation ::= subscript STORE_ANNOTATION
"""
self.addRule(rule, nop_func)
# Check to combine assignment + annotation into one statement
self.check_reduce['assign'] = 'token'
elif opname == 'SETUP_WITH':
rules_str = """
withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt COME_FROM_WITH
WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
# Removes POP_BLOCK LOAD_CONST from 3.6-
withasstmt ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH
WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
"""
if self.version < 3.8:
rules_str += """
withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK
LOAD_CONST
WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
"""
else:
rules_str += """
withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK
BEGIN_FINALLY COME_FROM_WITH
WITH_CLEANUP_START WITH_CLEANUP_FINISH
END_FINALLY
"""
self.addRule(rules_str, nop_func)
pass
pass
return
def custom_classfunc_rule(self, opname, token, customize, next_token, is_pypy):
args_pos, args_kw = self.get_pos_kw(token)
# Additional exprs for * and ** args:
# 0 if neither
# 1 for CALL_FUNCTION_VAR or CALL_FUNCTION_KW
# 2 for * and ** args (CALL_FUNCTION_VAR_KW).
# Yes, this computation based on instruction name is a little bit hoaky.
nak = ( len(opname)-len('CALL_FUNCTION') ) // 3
uniq_param = args_kw + args_pos
if frozenset(('GET_AWAITABLE', 'YIELD_FROM')).issubset(self.seen_ops):
rule = ('async_call ::= expr ' +
('pos_arg ' * args_pos) +
('kwarg ' * args_kw) +
'expr ' * nak + token.kind +
' GET_AWAITABLE LOAD_CONST YIELD_FROM')
self.add_unique_rule(rule, token.kind, uniq_param, customize)
self.add_unique_rule('expr ::= async_call', token.kind, uniq_param, customize)
if opname.startswith('CALL_FUNCTION_KW'):
if is_pypy:
# PYPY doesn't follow CPython 3.6 CALL_FUNCTION_KW conventions
super(Python36Parser, self).custom_classfunc_rule(opname, token, customize, next_token, is_pypy)
else:
self.addRule("expr ::= call_kw36", nop_func)
values = 'expr ' * token.attr
rule = "call_kw36 ::= expr {values} LOAD_CONST {opname}".format(**locals())
self.add_unique_rule(rule, token.kind, token.attr, customize)
elif opname == 'CALL_FUNCTION_EX_KW':
# Note: this doesn't exist in 3.7 and later
self.addRule("""expr ::= call_ex_kw4
call_ex_kw4 ::= expr
expr
expr
CALL_FUNCTION_EX_KW
""",
nop_func)
if 'BUILD_MAP_UNPACK_WITH_CALL' in self.seen_op_basenames:
self.addRule("""expr ::= call_ex_kw
call_ex_kw ::= expr expr build_map_unpack_with_call
CALL_FUNCTION_EX_KW
""", nop_func)
if 'BUILD_TUPLE_UNPACK_WITH_CALL' in self.seen_op_basenames:
# FIXME: should this be parameterized by EX value?
self.addRule("""expr ::= call_ex_kw3
call_ex_kw3 ::= expr
build_tuple_unpack_with_call
expr
CALL_FUNCTION_EX_KW
""", nop_func)
if 'BUILD_MAP_UNPACK_WITH_CALL' in self.seen_op_basenames:
# FIXME: should this be parameterized by EX value?
self.addRule("""expr ::= call_ex_kw2
call_ex_kw2 ::= expr
build_tuple_unpack_with_call
build_map_unpack_with_call
CALL_FUNCTION_EX_KW
""", nop_func)
elif opname == 'CALL_FUNCTION_EX':
self.addRule("""
expr ::= call_ex
starred ::= expr
call_ex ::= expr starred CALL_FUNCTION_EX
""", nop_func)
if self.version >= 3.6:
if 'BUILD_MAP_UNPACK_WITH_CALL' in self.seen_ops:
self.addRule("""
expr ::= call_ex_kw
call_ex_kw ::= expr expr
build_map_unpack_with_call CALL_FUNCTION_EX
""", nop_func)
if 'BUILD_TUPLE_UNPACK_WITH_CALL' in self.seen_ops:
self.addRule("""
expr ::= call_ex_kw3
call_ex_kw3 ::= expr
build_tuple_unpack_with_call
%s
CALL_FUNCTION_EX
""" % 'expr ' * token.attr, nop_func)
pass
# FIXME: Is this right?
self.addRule("""
expr ::= call_ex_kw4
call_ex_kw4 ::= expr
expr
expr
CALL_FUNCTION_EX
""", nop_func)
pass
else:
super(Python36Parser, self).custom_classfunc_rule(opname, token, customize, next_token, is_pypy)
def reduce_is_invalid(self, rule, ast, tokens, first, last):
invalid = super(Python36Parser,
self).reduce_is_invalid(rule, ast,
tokens, first, last)
if invalid:
return invalid
if rule[0] == 'assign':
# Try to combine assignment + annotation into one statement
if (len(tokens) >= last + 1 and
tokens[last] == 'LOAD_NAME' and
tokens[last+1] == 'STORE_ANNOTATION' and
tokens[last-1].pattr == tokens[last+1].pattr):
# Will handle as ann_assign_init_value
return True
pass
if rule[0] == 'call_kw':
# Make sure we don't derive call_kw
nt = ast[0]
while not isinstance(nt, Token):
if nt[0] == 'call_kw':
return True
nt = nt[0]
pass
pass
return False
class Python36ParserSingle(Python36Parser, PythonParserSingle):
pass
if __name__ == '__main__':
# Check grammar
p = Python36Parser()
p.check_grammar()
from uncompyle6 import PYTHON_VERSION, IS_PYPY
if PYTHON_VERSION == 3.6:
lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets()
from uncompyle6.scanner import get_scanner
s = get_scanner(PYTHON_VERSION, IS_PYPY)
opcode_set = set(s.opc.opname).union(set(
"""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME
LAMBDA_MARKER RETURN_LAST
""".split()))
remain_tokens = set(tokens) - opcode_set
import re
remain_tokens = set([re.sub(r'_\d+$', '', t) for t in remain_tokens])
remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens])
remain_tokens = set(remain_tokens) - opcode_set
print(remain_tokens)
# print(sorted(p.rule2name.items()))