Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-36143: Regenerate Lib/keyword.py from the Grammar and Tokens file using pgen #12456

Merged
merged 5 commits into from Mar 25, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
127 changes: 43 additions & 84 deletions Lib/keyword.py 100755 → 100644
@@ -1,98 +1,57 @@
\
#! /usr/bin/env python3
pablogsal marked this conversation as resolved.
Show resolved Hide resolved

"""Keywords (from "graminit.c")
"""Keywords (from "Grammar/Grammar")

This file is automatically generated; please don't muck it up!

To update the symbols in this file, 'cd' to the top directory of
the python source tree after building the interpreter and run:
the python source tree and run:

./python Lib/keyword.py
python -m Parser.pgen.keywordgen ./Grammar/Grammar \
./Grammar/Tokens \
./Lib/keyword.py

Alternatively, you can run 'make regen-keyword'.
"""

__all__ = ["iskeyword", "kwlist"]

kwlist = [
#--start keywords--
'False',
'None',
'True',
'and',
'as',
'assert',
'break',
'class',
'continue',
'def',
'del',
'elif',
'else',
'except',
'finally',
'for',
'from',
'global',
'if',
'import',
'in',
'is',
'lambda',
'nonlocal',
'not',
'or',
'pass',
'raise',
'return',
'try',
'while',
'with',
'yield',
#--end keywords--
]

kwlist.append('async')
kwlist.append('await')
kwlist.sort()
'False',
'None',
'True',
'and',
'as',
'assert',
'async',
'await',
'break',
'class',
'continue',
'def',
'del',
'elif',
'else',
'except',
'finally',
'for',
'from',
'global',
'if',
'import',
'in',
'is',
'lambda',
'nonlocal',
'not',
'or',
'pass',
'raise',
'return',
'try',
'while',
'with',
'yield']
pablogsal marked this conversation as resolved.
Show resolved Hide resolved

iskeyword = frozenset(kwlist).__contains__

def main():
import sys, re

args = sys.argv[1:]
iptfile = args and args[0] or "Python/graminit.c"
if len(args) > 1: optfile = args[1]
else: optfile = "Lib/keyword.py"

# load the output skeleton from the target, taking care to preserve its
# newline convention.
with open(optfile, newline='') as fp:
format = fp.readlines()
nl = format[0][len(format[0].strip()):] if format else '\n'

# scan the source file for keywords
with open(iptfile) as fp:
strprog = re.compile('"([^"]+)"')
lines = []
for line in fp:
if '{1, "' in line:
match = strprog.search(line)
if match:
lines.append(" '" + match.group(1) + "'," + nl)
lines.sort()

# insert the lines of keywords into the skeleton
try:
start = format.index("#--start keywords--" + nl) + 1
end = format.index("#--end keywords--" + nl)
format[start:end] = lines
except ValueError:
sys.stderr.write("target does not contain format markers\n")
sys.exit(1)

# write the output file
with open(optfile, 'w', newline='') as fp:
fp.writelines(format)

if __name__ == "__main__":
main()
119 changes: 9 additions & 110 deletions Lib/test/test_keyword.py
@@ -1,20 +1,5 @@
import keyword
import unittest
from test import support
import filecmp
import os
import sys
import subprocess
import shutil
import textwrap

KEYWORD_FILE = support.findfile('keyword.py')
GRAMMAR_FILE = os.path.join(os.path.split(__file__)[0],
'..', '..', 'Python', 'graminit.c')
TEST_PY_FILE = 'keyword_test.py'
GRAMMAR_TEST_FILE = 'graminit_test.c'
PY_FILE_WITHOUT_KEYWORDS = 'minimal_keyword.py'
NONEXISTENT_FILE = 'not_here.txt'


class Test_iskeyword(unittest.TestCase):
Expand All @@ -35,103 +20,17 @@ def test_changing_the_kwlist_does_not_affect_iskeyword(self):
keyword.kwlist = ['its', 'all', 'eggs', 'beans', 'and', 'a', 'slice']
self.assertFalse(keyword.iskeyword('eggs'))

def test_all_keywords_fail_to_be_used_as_names(self):
for key in keyword.kwlist:
with self.assertRaises(SyntaxError):
exec(f"{key} = 42")

class TestKeywordGeneration(unittest.TestCase):

def _copy_file_without_generated_keywords(self, source_file, dest_file):
with open(source_file, 'rb') as fp:
lines = fp.readlines()
nl = lines[0][len(lines[0].strip()):]
with open(dest_file, 'wb') as fp:
fp.writelines(lines[:lines.index(b"#--start keywords--" + nl) + 1])
fp.writelines(lines[lines.index(b"#--end keywords--" + nl):])

def _generate_keywords(self, grammar_file, target_keyword_py_file):
proc = subprocess.Popen([sys.executable,
KEYWORD_FILE,
grammar_file,
target_keyword_py_file], stderr=subprocess.PIPE)
stderr = proc.communicate()[1]
return proc.returncode, stderr

@unittest.skipIf(not os.path.exists(GRAMMAR_FILE),
'test only works from source build directory')
def test_real_grammar_and_keyword_file(self):
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
self.addCleanup(support.unlink, TEST_PY_FILE)
self.assertFalse(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
self.assertEqual((0, b''), self._generate_keywords(GRAMMAR_FILE,
TEST_PY_FILE))
self.assertTrue(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))

def test_grammar(self):
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
self.addCleanup(support.unlink, TEST_PY_FILE)
with open(GRAMMAR_TEST_FILE, 'w') as fp:
# Some of these are probably implementation accidents.
fp.writelines(textwrap.dedent("""\
{2, 1},
{11, "encoding_decl", 0, 2, states_79,
"\000\000\040\000\000\000\000\000\000\000\000\000"
"\000\000\000\000\000\000\000\000\000"},
{1, "jello"},
{326, 0},
{1, "turnip"},
\t{1, "This one is tab indented"
{278, 0},
{1, "crazy but legal"
"also legal" {1, "
{1, "continue"},
{1, "lemon"},
{1, "tomato"},
{1, "wigii"},
{1, 'no good'}
{283, 0},
{1, "too many spaces"}"""))
self.addCleanup(support.unlink, GRAMMAR_TEST_FILE)
self._generate_keywords(GRAMMAR_TEST_FILE, TEST_PY_FILE)
expected = [
" 'This one is tab indented',",
" 'also legal',",
" 'continue',",
" 'crazy but legal',",
" 'jello',",
" 'lemon',",
" 'tomato',",
" 'turnip',",
" 'wigii',",
]
with open(TEST_PY_FILE) as fp:
lines = fp.read().splitlines()
start = lines.index("#--start keywords--") + 1
end = lines.index("#--end keywords--")
actual = lines[start:end]
self.assertEqual(actual, expected)

def test_empty_grammar_results_in_no_keywords(self):
self._copy_file_without_generated_keywords(KEYWORD_FILE,
PY_FILE_WITHOUT_KEYWORDS)
self.addCleanup(support.unlink, PY_FILE_WITHOUT_KEYWORDS)
shutil.copyfile(KEYWORD_FILE, TEST_PY_FILE)
self.addCleanup(support.unlink, TEST_PY_FILE)
self.assertEqual((0, b''), self._generate_keywords(os.devnull,
TEST_PY_FILE))
self.assertTrue(filecmp.cmp(TEST_PY_FILE, PY_FILE_WITHOUT_KEYWORDS))

def test_keywords_py_without_markers_produces_error(self):
rc, stderr = self._generate_keywords(os.devnull, os.devnull)
self.assertNotEqual(rc, 0)
self.assertRegex(stderr, b'does not contain format markers')

def test_missing_grammar_file_produces_error(self):
rc, stderr = self._generate_keywords(NONEXISTENT_FILE, KEYWORD_FILE)
self.assertNotEqual(rc, 0)
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
def test_async_and_await_are_keywords(self):
self.assertIn("async", keyword.kwlist)
self.assertIn("await", keyword.kwlist)

def test_missing_keywords_py_file_produces_error(self):
rc, stderr = self._generate_keywords(os.devnull, NONEXISTENT_FILE)
self.assertNotEqual(rc, 0)
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
def test_keywords_are_sorted(self):
self.assertListEqual(sorted(keyword.kwlist), keyword.kwlist)


if __name__ == "__main__":
Expand Down
11 changes: 10 additions & 1 deletion Makefile.pre.in
Expand Up @@ -724,7 +724,7 @@ regen-importlib: Programs/_freeze_importlib
# Regenerate all generated files

regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \
regen-token regen-symbol regen-ast regen-importlib clinic
regen-token regen-keyword regen-symbol regen-ast regen-importlib clinic

############################################################################
# Special rules for object files
Expand Down Expand Up @@ -843,6 +843,15 @@ regen-token:
$(srcdir)/Grammar/Tokens \
$(srcdir)/Lib/token.py

.PHONY: regen-keyword
regen-keyword:
# Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens
# using Parser/pgen
$(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \
$(srcdir)/Grammar/Tokens \
$(srcdir)/Lib/keyword.py.new
$(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new

.PHONY: regen-symbol
regen-symbol: $(srcdir)/Include/graminit.h
# Regenerate Lib/symbol.py from Include/graminit.h
Expand Down
@@ -0,0 +1,2 @@
Regenerate :mod:`keyword` from the Grammar and Tokens file using pgen. Patch
by Pablo Galindo.
61 changes: 61 additions & 0 deletions Parser/pgen/keywordgen.py
@@ -0,0 +1,61 @@
"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""

import argparse

from .pgen import ParserGenerator

TEMPLATE = r'''\
pablogsal marked this conversation as resolved.
Show resolved Hide resolved
#! /usr/bin/env python3

"""Keywords (from "Grammar/Grammar")

This file is automatically generated; please don't muck it up!

To update the symbols in this file, 'cd' to the top directory of
the python source tree and run:

python -m Parser.pgen.keywordgen ./Grammar/Grammar \
pablogsal marked this conversation as resolved.
Show resolved Hide resolved
./Grammar/Tokens \
./Lib/keyword.py
pablogsal marked this conversation as resolved.
Show resolved Hide resolved

Alternatively, you can run 'make regen-keyword'.
vstinner marked this conversation as resolved.
Show resolved Hide resolved
"""

__all__ = ["iskeyword", "kwlist"]

kwlist = [
{keywords}]

iskeyword = frozenset(kwlist).__contains__
'''
pablogsal marked this conversation as resolved.
Show resolved Hide resolved

EXTRA_KEYWORDS = ["async", "await"]


def main():
parser = argparse.ArgumentParser(description="Parser generator main program.")
pablogsal marked this conversation as resolved.
Show resolved Hide resolved
parser.add_argument(
"grammar", type=str, help="The file with the grammar definition in EBNF format"
)
parser.add_argument(
"tokens", type=str, help="The file with the token definitions"
serhiy-storchaka marked this conversation as resolved.
Show resolved Hide resolved
)
parser.add_argument(
"keyword_file",
type=argparse.FileType('w'),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to make the script working on read-only sources (doing nothing if the file is not changed). See for example Tools/scripts/generate_token.py.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I am not sure I understand exactly what do you mean :(

Can you elaborate a bit more? Or if you want to do it directly, please, push directly to my branch to update the PR :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See "make regen-opcode": Antoine Pitrou wrote $(UPDATE_FILE) which leaves the file unchanged if the content didn't change. One advantage is to not touch the modification time of the file at all. It matters for Makefile to avoid useless recompilation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I propose to use function update_file() from Tools/scripts/generate_token.py.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although I like update_file() approach, I also like the fact that $(UPDATE_FILE) will check that the files are updated in every PR if someone changes the generator as this has been very valuable for gramminit.c and friends.

help="The path to write the keyword definitions",
)
args = parser.parse_args()
p = ParserGenerator(args.grammar, args.tokens)
grammar = p.make_grammar()

with args.keyword_file as thefile:
all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS)

keywords = ",\n ".join("'{}'".format(keyword)
pablogsal marked this conversation as resolved.
Show resolved Hide resolved
for keyword in all_keywords)
thefile.write(TEMPLATE.lstrip().format(keywords=keywords))


if __name__ == "__main__":
main()