Permalink
Browse files

This pure Python compiler can now compile print("Hello World")!

I glued together the pure Python pgen2 parser (from lib2to3) and the
Python2.7 compiler module (deprecated in Python 3).

parse.py is a front end to them both.  The shell functions in run.sh
use it to generate .pyc file that can be executed by the Python 3.4
interpreter.

Details:

- Copy in the Python 2.7 grammar and use it.  The 2to3 grammar from
  Python 3 has some productions that the compiler module can't handle.
  Note that deleting the print production at runtime allows us to parse
  at least some Python 3 code.

- Make the grammar pure input data, rather than being baked in:
  - Remove pygram.py from lib2to3.  Choose python2 vs python3 grammar in
    the parse.py tool.
  - Move the Symbols class to parse.py to remove host dependency on the
    stdlib 'symbol' module.
  - Initialize pytree.py from Symbols, i.e. the production IDs extracted
    by pgen2 from the grammar
  - Initialize compiler/transformer.py from Symbols

- Create a Pgen2Transformer class that uses the parse tree created by
  pgen2 rather than by the stdlib parsermodule.c.  The compiler module
  had always used the latter interface ('import parser').
  - The small py2st() function in parse.py does adaption to tuples.

- Implement pretty-printing of the tuple-based parse tree.

- Add start_symbol parameter to ensure we're parsing the 'file_input'
  rule (not just the first rule in the grammar file!)

- Fix bad 2to3 conversion (self.next was a member)

- Fix string/unicode problems in types.CodeType instantiation.

- Update the .pyc header to include the file size added in Python 3.

- Shell script automation and tests.

- Cleanup: Remove compileFile.

Status: Hello world runs, but ./parse.py doesn't.  LOAD_LOCALS bytecode
is not found.  There's still a host dependency with the 'dis' module.
  • Loading branch information...
Andy Chu
Andy Chu committed Apr 1, 2017
1 parent d326616 commit 31c63a72f9a1005d9432bd216ad69525f8d8f26c
File renamed without changes.
View
@@ -28,4 +28,4 @@
from compiler.transformer import parse, parseFile
from compiler.visitor import walk
from compiler.pycodegen import compile, compileFile
from compiler.pycodegen import compile
View
@@ -20,7 +20,7 @@ def startBlock(self, block):
if self._debug:
if self.current:
print("end", repr(self.current))
print(" next", self.current.__next__)
print(" next", self.current.next)
print(" prev", self.current.prev)
print(" ", self.current.get_children())
print(repr(block))
@@ -120,8 +120,8 @@ def order_blocks(start_block, exit_block):
# before it.
dominators = {}
for b in remaining:
if __debug__ and b.__next__:
assert b is b.next[0].prev[0], (b, b.__next__)
if __debug__ and b.next:
assert b is b.next[0].prev[0], (b, b.next)
# Make sure every block appears in dominators, even if no
# other block must precede it.
dominators.setdefault(b, set())
@@ -151,7 +151,7 @@ def find_next():
while 1:
order.append(b)
remaining.discard(b)
if b.__next__:
if b.next:
b = b.next[0]
continue
elif b is not exit_block and not b.has_unconditional_transfer():
@@ -197,7 +197,7 @@ def addOutEdge(self, block):
def addNext(self, block):
self.next.append(block)
assert len(self.__next__) == 1, list(map(str, self.__next__))
assert len(self.next) == 1, list(map(str, self.next))
block.prev.append(self)
assert len(block.prev) == 1, list(map(str, block.prev))
@@ -216,11 +216,11 @@ def has_unconditional_transfer(self):
return op in self._uncond_transfer
def get_children(self):
return list(self.outEdges) + self.__next__
return list(self.outEdges) + self.next
def get_followers(self):
"""Get the whole list of followers, including the next block."""
followers = set(self.__next__)
followers = set(self.next)
# Blocks that must be emitted *after* this one, because of
# bytecode offsets (e.g. relative jumps) pointing to them.
for inst in self.insts:
@@ -536,12 +536,47 @@ def newCodeObject(self):
argcount = self.argcount
if self.flags & CO_VARKEYWORDS:
argcount = argcount - 1
return types.CodeType(argcount, nlocals, self.stacksize, self.flags,
self.lnotab.getCode(), self.getConsts(),
tuple(self.names), tuple(self.varnames),
self.filename, self.name, self.lnotab.firstline,
self.lnotab.getTable(), tuple(self.freevars),
tuple(self.cellvars))
# NOTE: This introduces a dependency on the host VM. For example, if
# we're runing on Python 3.4, we'll get the Python 3.4 CodeTyep, which
# is _f.__code__.
#
# Patch for Python 3.0 kwonlyargs:
# https://www.python.org/dev/peps/pep-3102/
#
# This is defined in Include/code.h PyCodeObject.
kwonlyargcount = 0
bytecode = self.lnotab.getCode().encode('latin-1')
# addr <-> lineno mapping.
# See Objects/lnotab_notes.txt
lnotab = self.lnotab.getTable().encode('latin-1')
code_args = (
argcount, kwonlyargcount, nlocals, self.stacksize, self.flags, # int
bytecode, # bytes
self.getConsts(),
tuple(self.names),
tuple(self.varnames),
self.filename,
self.name,
self.lnotab.firstline,
lnotab, # bytes
tuple(self.freevars),
tuple(self.cellvars))
if 0:
for a in code_args:
print(type(a))
if isinstance(a, str):
print(repr(a))
# bytes/str confusion! We expect bytes.
#new_args.append(a.encode('latin-1')) # bytes type
elif isinstance(a, tuple):
print(repr(a))
return types.CodeType(*code_args)
def getConsts(self):
"""Return a tuple for the const slot of the code object
View
@@ -34,21 +34,31 @@
TRY_FINALLY = 3
END_FINALLY = 4
def compileFile(filename, display=0):
f = open(filename, 'U')
buf = f.read()
f.close()
mod = Module(buf, filename)
try:
mod.compile(display)
except SyntaxError:
raise
else:
f = open(filename + "c", "wb")
mod.dump(f)
f.close()
def compile(source, filename, mode, flags=None, dont_inherit=None):
MAGIC = imp.get_magic()
def getPycHeader(filename):
# compile.c uses marshal to write a long directly, with
# calling the interface that would also generate a 1-byte code
# to indicate the type of the value. simplest way to get the
# same effect is to call marshal and then skip the code.
mtime = os.path.getmtime(filename)
mtime = struct.pack('<i', int(mtime))
# Update for Python 3:
# https://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
# https://gist.github.com/anonymous/35c08092a6eb70cdd723
# It has the file size. Is this just for signature purposes?
file_size = struct.pack('<i', 0)
return MAGIC + mtime + file_size
def WritePyc(code, orig_filename, f):
f.write(getPycHeader(orig_filename))
marshal.dump(code, f)
def compile(source, filename, mode, flags=None, dont_inherit=None, transformer=None):
"""Replacement for builtin compile() function"""
if flags is not None or dont_inherit is not None:
raise RuntimeError("not implemented yet")
@@ -62,7 +72,7 @@ def compile(source, filename, mode, flags=None, dont_inherit=None):
else:
raise ValueError("compile() 3rd arg must be 'exec' or "
"'eval' or 'single'")
gen.compile()
gen.compile(transformer=transformer)
return gen.code
class AbstractCompileMode:
@@ -74,8 +84,8 @@ def __init__(self, source, filename):
self.filename = filename
self.code = None
def _get_tree(self):
tree = parse(self.source, self.mode)
def _get_tree(self, transformer=None):
tree = parse(self.source, self.mode, transformer=transformer)
misc.set_filename(self.filename, tree)
syntax.check(tree)
return tree
@@ -108,28 +118,14 @@ class Module(AbstractCompileMode):
mode = "exec"
def compile(self, display=0):
tree = self._get_tree()
def compile(self, display=0, transformer=None):
tree = self._get_tree(transformer=transformer)
gen = ModuleCodeGenerator(tree)
if display:
import pprint
print(pprint.pprint(tree))
self.code = gen.getCode()
def dump(self, f):
f.write(self.getPycHeader())
marshal.dump(self.code, f)
MAGIC = imp.get_magic()
def getPycHeader(self):
# compile.c uses marshal to write a long directly, with
# calling the interface that would also generate a 1-byte code
# to indicate the type of the value. simplest way to get the
# same effect is to call marshal and then skip the code.
mtime = os.path.getmtime(self.filename)
mtime = struct.pack('<i', mtime)
return self.MAGIC + mtime
class LocalNameFinder:
"""Find local names in scope"""
@@ -1549,7 +1545,3 @@ class AugSubscript(Delegator):
def wrap_aug(node):
return wrapper[node.__class__](node)
if __name__ == "__main__":
for file in sys.argv[1:]:
compileFile(file)
Oops, something went wrong.

0 comments on commit 31c63a7

Please sign in to comment.