Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to the python grammar #1351

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/python-grammar-tests/Cpython-tests/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.py
19 changes: 19 additions & 0 deletions examples/python-grammar-tests/all_parse_succesfully.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from __future__ import annotations

import logging
import os

from lark import Lark, UnexpectedInput, logger
from lark.indenter import PythonIndenter
from pathlib import Path
logger.setLevel(logging.DEBUG)
python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'],
parser='lalr', postlex=PythonIndenter(), start=['file_input', 'single_input', 'eval_input'], debug=True)
# python_parser3.parse('def f(it, *varargs, **kwargs):\n return list(it)\n\n\n', start="single_input")

for file in (Path(__file__).parent / "Cpython-tests").glob("*.py"):
try:
tree = python_parser3.parse(file.read_text(encoding="utf-8"), start="file_input")
except UnexpectedInput as e:
print(f'File "{file}", line {e.line}')
print(f"{e.__class__.__qualname__}: {str(e)}")
45 changes: 45 additions & 0 deletions examples/python-grammar-tests/get_std_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Downloads the three test files from the Cpython repo for their parser.
These are then analyzed, preprocessed and then run by other scripts in this folder
"""
import urllib.request
import os

files = {
"Lib/test/test_grammar.py": ["test_with_statement"], # List of function names to comment out
"Lib/test/test_syntax.py": [],
"Lib/test/test_exceptions.py": [],
"Lib/test/test_patma.py": [],
"Lib/test/test_pep646_syntax.py": [],
}

url_template = "https://raw.githubusercontent.com/python/cpython/main/{}"
file_template = f"{os.path.dirname(__file__)}/CPython-tests/{{}}"

for filename in files:
file = file_template.format(filename.rpartition("/")[2])
print(file)
urllib.request.urlretrieve(
url_template.format(filename),
file
)
if files[filename]:
with open(file, "r+", encoding="utf-8") as f:
out = []
commenting_out = None
f.seek(0)
for line in f.readlines():
if any(name in line for name in files[filename]):
commenting_out = line[:line.index("def")] + ' '
out.append(f"# {line}")
continue
if commenting_out is not None and (
line.startswith(commenting_out) or
line.strip() == '' or
line.strip().startswith('#')):
out.append(f"# {line}")
else:
commenting_out = None
out.append(line)
f.seek(0)
f.writelines(out)
69 changes: 69 additions & 0 deletions examples/python-grammar-tests/semi_run_doctests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations
from __future__ import annotations

import logging
import os
import doctest

from lark import Lark, UnexpectedInput, logger
from lark.indenter import PythonIndenter
from pathlib import Path
logger.setLevel(logging.DEBUG)
python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'],
parser='lalr', postlex=PythonIndenter(), start=['file_input', 'single_input', 'eval_input'], debug=True)

no_print = {
"fp": lambda *args, **kwargs: None,
"fn": lambda *args, **kwargs: None,

"tp": lambda *args, **kwargs: None,
"tn": lambda *args, **kwargs: None
}


print_all = {
"fp": print,
"fn": print,

"tp": print,
"tn": print
}

for file in (Path(__file__).parent / "Cpython-tests").glob("*.py"):
text = file.read_text(encoding="utf-8")
if "import doctest" in text:
doc_parser = doctest.DocTestParser()
docstring = next(t.value for t in python_parser3.lex(text) if "STRING" in t.type)
examples = doc_parser.get_examples(eval(docstring), str(file))
data = {"fp": 0, "tp": 0, "fn": 0, "tn": 0}
if "test_pep646_syntax" in file.name:
functions = print_all
else:
functions = no_print
for example in examples:
try:
tree = python_parser3.parse(example.source + "\n", start="single_input")
err = None
except UnexpectedInput as e:
tree = None
err = e
if example.exc_msg is not None:
if err is None:
functions["fp"](f"Unexpected success with example:\n{example.source.rstrip()}")
functions["fp"]("Excepted error message:", example.exc_msg.rstrip())
functions["fp"]()
data["fp"] += 1
else:
functions["tn"]("Correctly errored on:\n", example.source.rstrip())
data["tn"] += 1
else:
if err is not None:
functions["fn"](f"Unexpected failure with example:\n{example.source.rstrip()}")
functions["fn"](f"Got error message: {err.__class__.__qualname__}: {str(err)}")
functions["fn"](repr(example.source))
functions["fn"]()
data["fn"] += 1
else:
functions["tp"]("Correctly parsed:\n", example.source.rstrip())
data["tp"] += 1
print(file, data)
70 changes: 35 additions & 35 deletions lark/grammars/python.lark
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,31 @@
// NB: compound_stmt in single_input is followed by extra NEWLINE!
//

single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE
single_input: _NEWLINE* (simple_stmt | compound_stmt _NEWLINE*)
file_input: (_NEWLINE | stmt)*
eval_input: testlist _NEWLINE*

decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE
decorator: "@" test _NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)

async_funcdef: "async" funcdef
funcdef: "def" name "(" [parameters] ")" ["->" test] ":" suite

parameters: paramvalue ("," paramvalue)* ["," SLASH ("," paramvalue)*] ["," [starparams | kwparams]]
| starparams
| kwparams

SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result
starparams: (starparam | starguard) poststarparams
starparam: "*" typedparam
starguard: "*"
poststarparams: ("," paramvalue)* ["," kwparams]
kwparams: "**" typedparam ","?
parameters: slash_params ("," paramvalue)* ("," star_etc?)?
| paramvalue ("," paramvalue)* ("," star_etc?)?
| star_etc

?paramvalue: typedparam ("=" test)?
?typedparam: name (":" test)?
slash_params: paramvalue ("," paramvalue)* "," "/"
star_etc: kwds ","?
| "*" typedstarparam ("," paramvalue)* ("," kwds)? ","?
| "*" ("," paramvalue)+ ("," kwds)? ","?
kwds: "**" typedparam

paramvalue: typedparam ("=" test)?
typedparam: name (":" test)?
typedstarparam: name (":" (test | star_expr))?


lambdef: "lambda" [lambda_params] ":" test
Expand Down Expand Up @@ -95,16 +96,18 @@ for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite]
try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally]
| "try" ":" suite finally -> try_finally
finally: "finally" ":" suite
except_clauses: except_clause+
except_clauses: (except_clause+ | except_star_clause+)
except_star_clause: EXCEPT_STAR [test ["as" name]] ":" suite
except_clause: "except" [test ["as" name]] ":" suite
EXCEPT_STAR.1: "except*"
// NB compile.c makes sure that the default except clause is last


with_stmt: "with" with_items ":" suite
with_items: with_item ("," with_item)*
with_item: test ["as" name]
with_item: test ["as" (name|("(" _cs_list{name} ")"))]

match_stmt: "match" test ":" _NEWLINE _INDENT case+ _DEDENT
match_stmt: "match" testlist ":" _NEWLINE _INDENT case+ _DEDENT

case: "case" pattern ["if" test] ":" suite

Expand All @@ -125,11 +128,13 @@ case: "case" pattern ["if" test] ":" suite

literal_pattern: inner_literal_pattern

?inner_literal_pattern: "None" -> const_none
| "True" -> const_true
| "False" -> const_false
| STRING -> string
| number
!?inner_literal_pattern: "None" -> const_none
| "True" -> const_true
| "False" -> const_false
| STRING -> string
| number
| "-" number -> neg_number
| "-"? number ("+"|"-") number -> complex_number

attr_pattern: NAME ("." NAME)+ -> value

Expand All @@ -142,11 +147,9 @@ _sequence_pattern: (sequence_item_pattern ("," sequence_item_pattern)* ","?)?
| "*" NAME -> star_pattern

class_pattern: name_or_attr_pattern "(" [arguments_pattern ","?] ")"
arguments_pattern: pos_arg_pattern ["," keyws_arg_pattern]
| keyws_arg_pattern -> no_pos_arguments
arguments_pattern: as_pattern ("," as_pattern)* ("," keyw_arg_pattern ("," keyw_arg_pattern)*)?
| keyw_arg_pattern ("," keyw_arg_pattern)*

pos_arg_pattern: as_pattern ("," as_pattern)*
keyws_arg_pattern: keyw_arg_pattern ("," keyw_arg_pattern)*
keyw_arg_pattern: NAME "=" as_pattern


Expand Down Expand Up @@ -222,12 +225,12 @@ _tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",")

?subscriptlist: subscript
| subscript (("," subscript)+ [","] | ",") -> subscript_tuple
?subscript: test | ([test] ":" [test] [sliceop]) -> slice
?subscript: test | star_expr | ([test] ":" [test] [sliceop]) -> slice
sliceop: ":" [test]
?exprlist: (expr|star_expr)
| (expr|star_expr) (("," (expr|star_expr))+ [","]|",")
?testlist: test | testlist_tuple
testlist_tuple: test (("," test)+ [","] | ",")
testlist_tuple: (test|star_expr) (("," (test|star_expr))+ [","] | ",")
_dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","]

key_value: test ":" test
Expand All @@ -250,15 +253,12 @@ kwargs: "**" test ("," argvalue)*
?argvalue: test ("=" test)?


comprehension{comp_result}: comp_result comp_fors [comp_if]
comp_fors: comp_for+
comp_for: [ASYNC] "for" exprlist "in" or_test
comprehension{comp_result}: comp_result comp_forifs
comp_forifs: comp_forif+
comp_forif: [ASYNC] "for" exprlist "in" or_test comp_if*
ASYNC: "async"
?comp_if: "if" test_nocond

// not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: name

yield_expr: "yield" [testlist]
| "yield" "from" test -> yield_from

Expand All @@ -267,7 +267,7 @@ string: STRING | LONG_STRING

// Other terminals

_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
_NEWLINE: (COMMENT? /\r?\n[\t ]*/ )+

%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
Expand All @@ -281,7 +281,7 @@ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
NAME: /[^\W\d]\w*/
COMMENT: /#[^\n]*/

STRING: /([ubf]?r?|r[ubf])("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
STRING: /([ubf]?r?|r[ubf])("(?!"")([^\\\n"]|\\(.|\n))*"|'(?!'')([^\\\n']|\\(.|\n))*')/i
LONG_STRING: /([ubf]?r?|r[ubf])(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is

_SPECIAL_DEC: "0".."9" ("_"? "0".."9" )*
Expand Down
2 changes: 1 addition & 1 deletion lark/parsers/lalr_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None)
assert token is not None
state.feed_token(token)

end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
end_token = Token.new_borrow_pos('$END', '', token) if token is not None else Token('$END', '', 0, 1, 1)
return state.feed_token(end_token, True)
except UnexpectedInput as e:
try:
Expand Down
Loading