Skip to content

Commit

Permalink
bpo-35808: Retire pgen and use pgen2 to generate the parser (GH-11814)
Browse files Browse the repository at this point in the history
Pgen is the oldest piece of technology in the CPython repository, building it requires various #if[n]def PGEN hacks in other parts of the code and it also depends more and more on CPython internals. This commit removes the old pgen C code and replaces it for a new version implemented in pure Python. This is a modified and adapted version of lib2to3/pgen2 that can generate grammar files compatibles with the current parser.

This commit also eliminates all the #ifdef and code branches related to pgen, simplifying the code and making it more maintainable. The regen-grammar step now uses $(PYTHON_FOR_REGEN) that can be any version of the interpreter, so the new pgen code maintains compatibility with older versions of the interpreter (this also allows regenerating the grammar with the current CI solution that uses Python3.5). The new pgen Python module also makes use of the Grammar/Tokens file that holds the token specification, so is always kept in sync and avoids having to maintain duplicate token definitions.
  • Loading branch information
pablogsal committed Mar 1, 2019
1 parent 7eebbbd commit 1f24a71
Show file tree
Hide file tree
Showing 27 changed files with 1,480 additions and 2,684 deletions.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ PCbuild/arm32/
PCbuild/obj/
PCbuild/win32/
.purify
Parser/pgen
Parser/pgen.exe
__pycache__
autom4te.cache
build/
Expand Down
18 changes: 0 additions & 18 deletions Include/metagrammar.h

This file was deleted.

3 changes: 0 additions & 3 deletions Include/parsetok.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@ extern "C" {

typedef struct {
int error;
#ifndef PGEN
/* The filename is useless for pgen, see comment in tok_state structure */
PyObject *filename;
#endif
int lineno;
int offset;
char *text; /* UTF-8-encoded string */
Expand Down
18 changes: 0 additions & 18 deletions Include/pgen.h

This file was deleted.

42 changes: 4 additions & 38 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -290,40 +290,21 @@ LIBFFI_INCLUDEDIR= @LIBFFI_INCLUDEDIR@

##########################################################################
# Parser
PGEN= Parser/pgen$(EXE)

POBJS= \
Parser/acceler.o \
Parser/grammar1.o \
Parser/listnode.o \
Parser/node.o \
Parser/parser.o \
Parser/bitset.o \
Parser/metagrammar.o \
Parser/firstsets.o \
Parser/grammar.o \
Parser/token.o \
Parser/pgen.o

PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o

PGOBJS= \
Objects/obmalloc.o \
Python/dynamic_annotations.o \
Python/mysnprintf.o \
Python/pyctype.o \
Parser/tokenizer_pgen.o \
Parser/printgrammar.o \
Parser/parsetok_pgen.o \
Parser/pgenmain.o

PARSER_HEADERS= \
$(srcdir)/Parser/parser.h \
$(srcdir)/Include/parsetok.h \
$(srcdir)/Parser/tokenizer.h

PGENOBJS= $(POBJS) $(PGOBJS)

##########################################################################
# Python

Expand Down Expand Up @@ -802,31 +783,18 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile

$(IO_OBJS): $(IO_H)

$(PGEN): $(PGENOBJS)
$(CC) $(OPT) $(PY_CORE_LDFLAGS) $(PGENOBJS) $(LIBS) -o $(PGEN)

.PHONY: regen-grammar
regen-grammar: $(PGEN)
regen-grammar: regen-token
# Regenerate Include/graminit.h and Python/graminit.c
# from Grammar/Grammar using pgen
@$(MKDIR_P) Include
$(PGEN) $(srcdir)/Grammar/Grammar \
$(PYTHON_FOR_REGEN) -m Parser.pgen $(srcdir)/Grammar/Grammar \
$(srcdir)/Grammar/Tokens \
$(srcdir)/Include/graminit.h.new \
$(srcdir)/Python/graminit.c.new
$(UPDATE_FILE) $(srcdir)/Include/graminit.h $(srcdir)/Include/graminit.h.new
$(UPDATE_FILE) $(srcdir)/Python/graminit.c $(srcdir)/Python/graminit.c.new

Parser/grammar.o: $(srcdir)/Parser/grammar.c \
$(srcdir)/Include/token.h \
$(srcdir)/Include/grammar.h
Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c

Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c
Parser/parsetok_pgen.o: $(srcdir)/Parser/parsetok.c
Parser/printgrammar.o: $(srcdir)/Parser/printgrammar.c

Parser/pgenmain.o: $(srcdir)/Include/parsetok.h

.PHONY=regen-ast
regen-ast:
# Regenerate Include/Python-ast.h using Parser/asdl_c.py -h
Expand Down Expand Up @@ -1016,7 +984,6 @@ PYTHON_HEADERS= \
$(srcdir)/Include/longobject.h \
$(srcdir)/Include/marshal.h \
$(srcdir)/Include/memoryobject.h \
$(srcdir)/Include/metagrammar.h \
$(srcdir)/Include/methodobject.h \
$(srcdir)/Include/modsupport.h \
$(srcdir)/Include/moduleobject.h \
Expand All @@ -1028,7 +995,6 @@ PYTHON_HEADERS= \
$(srcdir)/Include/osdefs.h \
$(srcdir)/Include/osmodule.h \
$(srcdir)/Include/patchlevel.h \
$(srcdir)/Include/pgen.h \
$(srcdir)/Include/pgenheaders.h \
$(srcdir)/Include/pyarena.h \
$(srcdir)/Include/pycapsule.h \
Expand Down Expand Up @@ -1771,7 +1737,7 @@ profile-removal:
rm -f profile-run-stamp

clobber: clean profile-removal
-rm -f $(BUILDPYTHON) $(PGEN) $(LIBRARY) $(LDLIBRARY) $(DLLLIBRARY) \
-rm -f $(BUILDPYTHON) $(LIBRARY) $(LDLIBRARY) $(DLLLIBRARY) \
tags TAGS \
config.cache config.log pyconfig.h Modules/config.c
-rm -rf build platform
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Retire pgen and use a modified version of pgen2 to generate the parser.
Patch by Pablo Galindo.
8 changes: 0 additions & 8 deletions Misc/coverity_model.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,6 @@ wchar_t *Py_DecodeLocale(const char* arg, size_t *size)
return w;
}

/* Parser/pgenmain.c */
grammar *getgrammar(const char *filename)
{
grammar *g;
__coverity_tainted_data_sink__(filename);
return g;
}

/* Python/marshal.c */

static Py_ssize_t r_string(char *s, Py_ssize_t n, RFILE *p)
Expand Down
6 changes: 0 additions & 6 deletions PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@
<ClInclude Include="..\Include\longobject.h" />
<ClInclude Include="..\Include\marshal.h" />
<ClInclude Include="..\Include\memoryobject.h" />
<ClInclude Include="..\Include\metagrammar.h" />
<ClInclude Include="..\Include\methodobject.h" />
<ClInclude Include="..\Include\modsupport.h" />
<ClInclude Include="..\Include\moduleobject.h" />
Expand All @@ -175,7 +174,6 @@
<ClInclude Include="..\Include\osmodule.h" />
<ClInclude Include="..\Include\parsetok.h" />
<ClInclude Include="..\Include\patchlevel.h" />
<ClInclude Include="..\Include\pgen.h" />
<ClInclude Include="..\Include\pgenheaders.h" />
<ClInclude Include="..\Include\pyhash.h" />
<ClInclude Include="..\Include\py_curses.h" />
Expand Down Expand Up @@ -372,12 +370,8 @@
<ClCompile Include="..\Objects\unicodeobject.c" />
<ClCompile Include="..\Objects\weakrefobject.c" />
<ClCompile Include="..\Parser\acceler.c" />
<ClCompile Include="..\Parser\bitset.c" />
<ClCompile Include="..\Parser\firstsets.c" />
<ClCompile Include="..\Parser\grammar.c" />
<ClCompile Include="..\Parser\grammar1.c" />
<ClCompile Include="..\Parser\listnode.c" />
<ClCompile Include="..\Parser\metagrammar.c" />
<ClCompile Include="..\Parser\myreadline.c" />
<ClCompile Include="..\Parser\node.c" />
<ClCompile Include="..\Parser\parser.c" />
Expand Down
18 changes: 0 additions & 18 deletions PCbuild/pythoncore.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,6 @@
<ClInclude Include="..\Include\memoryobject.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\metagrammar.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\methodobject.h">
<Filter>Include</Filter>
</ClInclude>
Expand Down Expand Up @@ -270,9 +267,6 @@
<ClInclude Include="..\Include\patchlevel.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\pgen.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\pgenheaders.h">
<Filter>Include</Filter>
</ClInclude>
Expand Down Expand Up @@ -836,24 +830,12 @@
<ClCompile Include="..\Parser\acceler.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\bitset.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\firstsets.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\grammar.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\grammar1.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\listnode.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\metagrammar.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\myreadline.c">
<Filter>Parser</Filter>
</ClCompile>
Expand Down
66 changes: 0 additions & 66 deletions Parser/bitset.c

This file was deleted.

Loading

0 comments on commit 1f24a71

Please sign in to comment.