Permalink
Please sign in to comment.
Browse files
Pristine copy of a subset of lib2to3 from Python 3.6.1.
For an experiment in running/bootstrapping OSH.
- Loading branch information...
Showing
with
2,686 additions
and 0 deletions.
- +173 −0 opy/Grammar.txt
- +4 −0 opy/pgen2/__init__.py
- +160 −0 opy/pgen2/driver.py
- +207 −0 opy/pgen2/grammar.py
- +201 −0 opy/pgen2/parse.py
- +386 −0 opy/pgen2/pgen.py
- +85 −0 opy/pgen2/token.py
- +576 −0 opy/pgen2/tokenize.py
- +40 −0 opy/pygram.py
- +854 −0 opy/pytree.py
| @@ -0,0 +1,173 @@ | ||
| # Grammar for 2to3. This grammar supports Python 2.x and 3.x. | ||
| # Note: Changing the grammar specified in this file will most likely | ||
| # require corresponding changes in the parser module | ||
| # (../Modules/parsermodule.c). If you can't make the changes to | ||
| # that module yourself, please co-ordinate the required changes | ||
| # with someone who can; ask around on python-dev for help. Fred | ||
| # Drake <fdrake@acm.org> will probably be listening there. | ||
| # NOTE WELL: You should also follow all the steps listed in PEP 306, | ||
| # "How to Change Python's Grammar" | ||
| # Commands for Kees Blom's railroad program | ||
| #diagram:token NAME | ||
| #diagram:token NUMBER | ||
| #diagram:token STRING | ||
| #diagram:token NEWLINE | ||
| #diagram:token ENDMARKER | ||
| #diagram:token INDENT | ||
| #diagram:output\input python.bla | ||
| #diagram:token DEDENT | ||
| #diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm | ||
| #diagram:rules | ||
| # Start symbols for the grammar: | ||
| # file_input is a module or sequence of commands read from an input file; | ||
| # single_input is a single interactive statement; | ||
| # eval_input is the input for the eval() and input() functions. | ||
| # NB: compound_stmt in single_input is followed by extra NEWLINE! | ||
| file_input: (NEWLINE | stmt)* ENDMARKER | ||
| single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE | ||
| eval_input: testlist NEWLINE* ENDMARKER | ||
| decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE | ||
| decorators: decorator+ | ||
| decorated: decorators (classdef | funcdef | async_funcdef) | ||
| async_funcdef: ASYNC funcdef | ||
| funcdef: 'def' NAME parameters ['->' test] ':' suite | ||
| parameters: '(' [typedargslist] ')' | ||
| typedargslist: ((tfpdef ['=' test] ',')* | ||
| ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname) | ||
| | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) | ||
| tname: NAME [':' test] | ||
| tfpdef: tname | '(' tfplist ')' | ||
| tfplist: tfpdef (',' tfpdef)* [','] | ||
| varargslist: ((vfpdef ['=' test] ',')* | ||
| ('*' [vname] (',' vname ['=' test])* [',' '**' vname] | '**' vname) | ||
| | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) | ||
| vname: NAME | ||
| vfpdef: vname | '(' vfplist ')' | ||
| vfplist: vfpdef (',' vfpdef)* [','] | ||
| stmt: simple_stmt | compound_stmt | ||
| simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE | ||
| small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | | ||
| import_stmt | global_stmt | exec_stmt | assert_stmt) | ||
| expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | | ||
| ('=' (yield_expr|testlist_star_expr))*) | ||
| annassign: ':' test ['=' test] | ||
| testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] | ||
| augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | | ||
| '<<=' | '>>=' | '**=' | '//=') | ||
| # For normal and annotated assignments, additional restrictions enforced by the interpreter | ||
| print_stmt: 'print' ( [ test (',' test)* [','] ] | | ||
| '>>' test [ (',' test)+ [','] ] ) | ||
| del_stmt: 'del' exprlist | ||
| pass_stmt: 'pass' | ||
| flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt | ||
| break_stmt: 'break' | ||
| continue_stmt: 'continue' | ||
| return_stmt: 'return' [testlist] | ||
| yield_stmt: yield_expr | ||
| raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]] | ||
| import_stmt: import_name | import_from | ||
| import_name: 'import' dotted_as_names | ||
| import_from: ('from' ('.'* dotted_name | '.'+) | ||
| 'import' ('*' | '(' import_as_names ')' | import_as_names)) | ||
| import_as_name: NAME ['as' NAME] | ||
| dotted_as_name: dotted_name ['as' NAME] | ||
| import_as_names: import_as_name (',' import_as_name)* [','] | ||
| dotted_as_names: dotted_as_name (',' dotted_as_name)* | ||
| dotted_name: NAME ('.' NAME)* | ||
| global_stmt: ('global' | 'nonlocal') NAME (',' NAME)* | ||
| exec_stmt: 'exec' expr ['in' test [',' test]] | ||
| assert_stmt: 'assert' test [',' test] | ||
| compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | ||
| async_stmt: ASYNC (funcdef | with_stmt | for_stmt) | ||
| if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] | ||
| while_stmt: 'while' test ':' suite ['else' ':' suite] | ||
| for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] | ||
| try_stmt: ('try' ':' suite | ||
| ((except_clause ':' suite)+ | ||
| ['else' ':' suite] | ||
| ['finally' ':' suite] | | ||
| 'finally' ':' suite)) | ||
| with_stmt: 'with' with_item (',' with_item)* ':' suite | ||
| with_item: test ['as' expr] | ||
| with_var: 'as' expr | ||
| # NB compile.c makes sure that the default except clause is last | ||
| except_clause: 'except' [test [(',' | 'as') test]] | ||
| suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT | ||
| # Backward compatibility cruft to support: | ||
| # [ x for x in lambda: True, lambda: False if x() ] | ||
| # even while also allowing: | ||
| # lambda x: 5 if x else 2 | ||
| # (But not a mix of the two) | ||
| testlist_safe: old_test [(',' old_test)+ [',']] | ||
| old_test: or_test | old_lambdef | ||
| old_lambdef: 'lambda' [varargslist] ':' old_test | ||
| test: or_test ['if' or_test 'else' test] | lambdef | ||
| or_test: and_test ('or' and_test)* | ||
| and_test: not_test ('and' not_test)* | ||
| not_test: 'not' not_test | comparison | ||
| comparison: expr (comp_op expr)* | ||
| comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' | ||
| star_expr: '*' expr | ||
| expr: xor_expr ('|' xor_expr)* | ||
| xor_expr: and_expr ('^' and_expr)* | ||
| and_expr: shift_expr ('&' shift_expr)* | ||
| shift_expr: arith_expr (('<<'|'>>') arith_expr)* | ||
| arith_expr: term (('+'|'-') term)* | ||
| term: factor (('*'|'@'|'/'|'%'|'//') factor)* | ||
| factor: ('+'|'-'|'~') factor | power | ||
| power: [AWAIT] atom trailer* ['**' factor] | ||
| atom: ('(' [yield_expr|testlist_gexp] ')' | | ||
| '[' [listmaker] ']' | | ||
| '{' [dictsetmaker] '}' | | ||
| '`' testlist1 '`' | | ||
| NAME | NUMBER | STRING+ | '.' '.' '.') | ||
| listmaker: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) | ||
| testlist_gexp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) | ||
| lambdef: 'lambda' [varargslist] ':' test | ||
| trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME | ||
| subscriptlist: subscript (',' subscript)* [','] | ||
| subscript: test | [test] ':' [test] [sliceop] | ||
| sliceop: ':' [test] | ||
| exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] | ||
| testlist: test (',' test)* [','] | ||
| dictsetmaker: ( ((test ':' test | '**' expr) | ||
| (comp_for | (',' (test ':' test | '**' expr))* [','])) | | ||
| ((test | star_expr) | ||
| (comp_for | (',' (test | star_expr))* [','])) ) | ||
| classdef: 'class' NAME ['(' [arglist] ')'] ':' suite | ||
| arglist: argument (',' argument)* [','] | ||
| # "test '=' test" is really "keyword '=' test", but we have no such token. | ||
| # These need to be in a single rule to avoid grammar that is ambiguous | ||
| # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, | ||
| # we explicitly match '*' here, too, to give it proper precedence. | ||
| # Illegal combinations and orderings are blocked in ast.c: | ||
| # multiple (test comp_for) arguments are blocked; keyword unpackings | ||
| # that precede iterable unpackings are blocked; etc. | ||
| argument: ( test [comp_for] | | ||
| test '=' test | | ||
| '**' expr | | ||
| star_expr ) | ||
| comp_iter: comp_for | comp_if | ||
| comp_for: [ASYNC] 'for' exprlist 'in' testlist_safe [comp_iter] | ||
| comp_if: 'if' old_test [comp_iter] | ||
| testlist1: test (',' test)* | ||
| # not used in grammar, but may appear in "node" passed from Parser to Compiler | ||
| encoding_decl: NAME | ||
| yield_expr: 'yield' [yield_arg] | ||
| yield_arg: 'from' test | testlist |
| @@ -0,0 +1,4 @@ | ||
| # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. | ||
| # Licensed to PSF under a Contributor Agreement. | ||
| """The pgen2 package.""" |
| @@ -0,0 +1,160 @@ | ||
| # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. | ||
| # Licensed to PSF under a Contributor Agreement. | ||
| # Modifications: | ||
| # Copyright 2006 Google, Inc. All Rights Reserved. | ||
| # Licensed to PSF under a Contributor Agreement. | ||
| """Parser driver. | ||
| This provides a high-level interface to parse a file into a syntax tree. | ||
| """ | ||
| __author__ = "Guido van Rossum <guido@python.org>" | ||
| __all__ = ["Driver", "load_grammar"] | ||
| # Python imports | ||
| import codecs | ||
| import io | ||
| import os | ||
| import logging | ||
| import sys | ||
| # Pgen imports | ||
| from . import grammar, parse, token, tokenize, pgen | ||
| class Driver(object): | ||
| def __init__(self, grammar, convert=None, logger=None): | ||
| self.grammar = grammar | ||
| if logger is None: | ||
| logger = logging.getLogger() | ||
| self.logger = logger | ||
| self.convert = convert | ||
| def parse_tokens(self, tokens, debug=False): | ||
| """Parse a series of tokens and return the syntax tree.""" | ||
| # XXX Move the prefix computation into a wrapper around tokenize. | ||
| p = parse.Parser(self.grammar, self.convert) | ||
| p.setup() | ||
| lineno = 1 | ||
| column = 0 | ||
| type = value = start = end = line_text = None | ||
| prefix = "" | ||
| for quintuple in tokens: | ||
| type, value, start, end, line_text = quintuple | ||
| if start != (lineno, column): | ||
| assert (lineno, column) <= start, ((lineno, column), start) | ||
| s_lineno, s_column = start | ||
| if lineno < s_lineno: | ||
| prefix += "\n" * (s_lineno - lineno) | ||
| lineno = s_lineno | ||
| column = 0 | ||
| if column < s_column: | ||
| prefix += line_text[column:s_column] | ||
| column = s_column | ||
| if type in (tokenize.COMMENT, tokenize.NL): | ||
| prefix += value | ||
| lineno, column = end | ||
| if value.endswith("\n"): | ||
| lineno += 1 | ||
| column = 0 | ||
| continue | ||
| if type == token.OP: | ||
| type = grammar.opmap[value] | ||
| if debug: | ||
| self.logger.debug("%s %r (prefix=%r)", | ||
| token.tok_name[type], value, prefix) | ||
| if p.addtoken(type, value, (prefix, start)): | ||
| if debug: | ||
| self.logger.debug("Stop.") | ||
| break | ||
| prefix = "" | ||
| lineno, column = end | ||
| if value.endswith("\n"): | ||
| lineno += 1 | ||
| column = 0 | ||
| else: | ||
| # We never broke out -- EOF is too soon (how can this happen???) | ||
| raise parse.ParseError("incomplete input", | ||
| type, value, (prefix, start)) | ||
| return p.rootnode | ||
| def parse_stream_raw(self, stream, debug=False): | ||
| """Parse a stream and return the syntax tree.""" | ||
| tokens = tokenize.generate_tokens(stream.readline) | ||
| return self.parse_tokens(tokens, debug) | ||
| def parse_stream(self, stream, debug=False): | ||
| """Parse a stream and return the syntax tree.""" | ||
| return self.parse_stream_raw(stream, debug) | ||
| def parse_file(self, filename, encoding=None, debug=False): | ||
| """Parse a file and return the syntax tree.""" | ||
| stream = codecs.open(filename, "r", encoding) | ||
| try: | ||
| return self.parse_stream(stream, debug) | ||
| finally: | ||
| stream.close() | ||
| def parse_string(self, text, debug=False): | ||
| """Parse a string and return the syntax tree.""" | ||
| tokens = tokenize.generate_tokens(io.StringIO(text).readline) | ||
| return self.parse_tokens(tokens, debug) | ||
| def _generate_pickle_name(gt): | ||
| head, tail = os.path.splitext(gt) | ||
| if tail == ".txt": | ||
| tail = "" | ||
| return head + tail + ".".join(map(str, sys.version_info)) + ".pickle" | ||
| def load_grammar(gt="Grammar.txt", gp=None, | ||
| save=True, force=False, logger=None): | ||
| """Load the grammar (maybe from a pickle).""" | ||
| if logger is None: | ||
| logger = logging.getLogger() | ||
| gp = _generate_pickle_name(gt) if gp is None else gp | ||
| if force or not _newer(gp, gt): | ||
| logger.info("Generating grammar tables from %s", gt) | ||
| g = pgen.generate_grammar(gt) | ||
| if save: | ||
| logger.info("Writing grammar tables to %s", gp) | ||
| try: | ||
| g.dump(gp) | ||
| except OSError as e: | ||
| logger.info("Writing failed: %s", e) | ||
| else: | ||
| g = grammar.Grammar() | ||
| g.load(gp) | ||
| return g | ||
| def _newer(a, b): | ||
| """Inquire whether file a was written since file b.""" | ||
| if not os.path.exists(a): | ||
| return False | ||
| if not os.path.exists(b): | ||
| return True | ||
| return os.path.getmtime(a) >= os.path.getmtime(b) | ||
| def main(*args): | ||
| """Main program, when run as a script: produce grammar pickle files. | ||
| Calls load_grammar for each argument, a path to a grammar text file. | ||
| """ | ||
| if not args: | ||
| args = sys.argv[1:] | ||
| logging.basicConfig(level=logging.INFO, stream=sys.stdout, | ||
| format='%(message)s') | ||
| for gt in args: | ||
| load_grammar(gt, save=True, force=True) | ||
| return True | ||
| if __name__ == "__main__": | ||
| sys.exit(int(not main())) |
Oops, something went wrong.
0 comments on commit
ad004f5