Permalink
Browse files

More simplification of the compilation pipeline.

It's now written in a linear fashion:

tokenize()
parse()
transform() - parse tree to AST
compile() - ast to bytecode.

TODO: separate out the CFG.

In other words, we separate nouns and verbs.  We don't do work in
constructors.

walk() is done in compile(), not in the constructor to the
CodeGenerator!
  • Loading branch information...
Andy Chu
Andy Chu committed Mar 18, 2018
1 parent 6c33e9d commit 8cf2341a8a621923055d7d647e0c4723196d8b8f
Showing with 49 additions and 31 deletions.
  1. +1 −1 opy/compiler2/misc.py
  2. +20 −22 opy/compiler2/pycodegen.py
  3. +13 −8 opy/opy_main.py
  4. +15 −0 opy/regtest.sh
View
@@ -1,6 +1,6 @@
# mangle() is used by both symbols and pycodegen.
MANGLE_LEN = 256 # magic constant from compile.c
MANGLE_LEN = 256 # magic constant from compile.c
def mangle(name, klass):
if not name.startswith('__'):
View
@@ -55,26 +55,29 @@ def set_filename(filename, tree):
worklist.extend(node.getChildNodes())
def compile(parse_tree, filename, mode, flags=None, dont_inherit=None,
transformer=None):
def compile(as_tree, filename, mode):
"""Replacement for builtin compile() function"""
if flags is not None or dont_inherit is not None:
raise RuntimeError("not implemented yet")
as_tree = transformer.transform(parse_tree)
set_filename(filename, as_tree)
syntax.check(as_tree)
if mode == "single":
gen = InteractiveCodeGenerator(as_tree)
gen = InteractiveCodeGenerator(as_tree.filename)
gen.set_lineno(as_tree)
elif mode == "exec":
gen = ModuleCodeGenerator(as_tree)
futures = future.find_futures(as_tree)
gen = ModuleCodeGenerator(as_tree.filename, futures)
elif mode == "eval":
gen = ExpressionCodeGenerator(as_tree)
gen = ExpressionCodeGenerator(as_tree.filename)
else:
raise ValueError("compile() 3rd arg must be 'exec' or "
"'eval' or 'single'")
walk(as_tree, gen)
# Not sure why I need this, copied from InteractiveCodeGenerator
if mode == "single":
gen.emit('RETURN_VALUE')
return gen.getCode()
@@ -1243,11 +1246,10 @@ class ModuleCodeGenerator(NestedScopeMixin, CodeGenerator):
scopes = None
def __init__(self, tree):
self.graph = pyassem.PyFlowGraph("<module>", tree.filename)
self.futures = future.find_futures(tree)
def __init__(self, filename, futures):
self.graph = pyassem.PyFlowGraph("<module>", filename)
self.futures = futures
self.__super_init()
walk(tree, self)
def get_module(self):
return self
@@ -1258,10 +1260,9 @@ class ExpressionCodeGenerator(NestedScopeMixin, CodeGenerator):
scopes = None
futures = ()
def __init__(self, tree):
self.graph = pyassem.PyFlowGraph("<expression>", tree.filename)
def __init__(self, filename):
self.graph = pyassem.PyFlowGraph("<expression>", filename)
self.__super_init()
walk(tree, self)
def get_module(self):
return self
@@ -1273,12 +1274,9 @@ class InteractiveCodeGenerator(NestedScopeMixin, CodeGenerator):
scopes = None
futures = ()
def __init__(self, tree):
self.graph = pyassem.PyFlowGraph("<interactive>", tree.filename)
def __init__(self, filename):
self.graph = pyassem.PyFlowGraph("<interactive>", filename)
self.__super_init()
self.set_lineno(tree)
walk(tree, self)
self.emit('RETURN_VALUE')
def get_module(self):
return self
View
@@ -175,12 +175,14 @@ def OpyCommandMain(argv):
symbols = Symbols(gr)
pytree.Init(symbols) # for type_repr() pretty printing
transformer.Init(symbols) # for _names and other dicts
tr = transformer.Transformer()
else:
# e.g. pgen2 doesn't use any of these. Maybe we should make a different
# tool.
gr = None
FILE_INPUT = None
symbols = None
tr = None
dr = driver.Driver(gr, convert=py2st)
@@ -234,8 +236,8 @@ def OpyCommandMain(argv):
with open(py_path) as f:
tokens = tokenize.generate_tokens(f.readline)
parse_tree = dr.parse_tokens(tokens, start_symbol=FILE_INPUT)
tr = transformer.Transformer()
co = pycodegen.compile(parse_tree, py_path, 'exec', transformer=tr)
as_tree = tr.transform(parse_tree)
co = pycodegen.compile(as_tree, py_path, 'exec')
log("Compiled to %d bytes of bytecode", len(co.co_code))
# Write the .pyc file
@@ -248,9 +250,11 @@ def OpyCommandMain(argv):
py_expr = argv[1]
f = cStringIO.StringIO(py_expr)
tokens = tokenize.generate_tokens(f.readline)
parse_tree = dr.parse_tokens(tokens, start_symbol=gr.symbol2number['eval_input'])
tr = transformer.Transformer()
co = pycodegen.compile(parse_tree, '<eval input>', 'eval', transformer=tr)
parse_tree = dr.parse_tokens(tokens,
start_symbol=gr.symbol2number['eval_input'])
as_tree = tr.transform(parse_tree)
co = pycodegen.compile(as_tree, '<eval input>', 'eval')
v = dis_tool.Visitor()
v.show_code(co)
@@ -264,9 +268,10 @@ def OpyCommandMain(argv):
f = cStringIO.StringIO(py_expr)
tokens = tokenize.generate_tokens(f.readline)
# TODO: change this to 'single input'? Why doesn't this work?
parse_tree = dr.parse_tokens(tokens, start_symbol=gr.symbol2number['eval_input'])
tr = transformer.Transformer()
co = pycodegen.compile(parse_tree, '<REPL input>', 'single', transformer=tr)
parse_tree = dr.parse_tokens(tokens,
start_symbol=gr.symbol2number['eval_input'])
as_tree = tr.transform(parse_tree)
co = pycodegen.compile(as_tree, '<REPL input>', 'single')
v = dis_tool.Visitor()
v.show_code(co)
View
@@ -85,4 +85,19 @@ lines() {
find _regtest/src -type f | xargs wc -l | sort -n
}
compare-one() {
local rel_path='opy/compiler2/transformer.pyc'
ls -l _tmp/regtest/$rel_path
# TODO: Copy zip from flanders?
unzip -p $rel_path _tmp/flanders/bytecode-opy.zip | od -c
}
smoke-three-modes() {
compile oil
$THIS_DIR/../bin/opyc eval '1+2*3'
echo '4+5*6' | $THIS_DIR/../bin/opyc repl
}
"$@"

0 comments on commit 8cf2341

Please sign in to comment.