Permalink
Browse files

Generate Python source code from an ASDL schema.

This is in contrast to using metaprogramming at runtime.

OSH works with this new code, and it's a lot faster!  It's not enabled
yet because a few unit tests still fail, due to lack of __eq__.  But the
shell runs and passes spec tests.

Also:

- Decode the output of everything we encoded in the oheap benchmark.

Document an idea to both measure decoding speed and collect data about
Array/Str lengths.  This could help with record sizing, inlining, and
other decisions that affect locality.

- In ASDL constructors, Raise TypeError instead of AssertionError.  This
  is more consistent with Python itself.
  • Loading branch information...
Andy Chu
Andy Chu committed Nov 26, 2017
1 parent de19b3d commit 00b36fad5add0e612d0742c7c4e355bd77534b32
Showing with 267 additions and 18 deletions.
  1. +5 −2 asdl/arith_ast_test.py
  2. +4 −2 asdl/asdl_demo.py
  3. +2 −0 asdl/encode.py
  4. +3 −6 asdl/gen_cpp.py
  5. +159 −0 asdl/gen_python.py
  6. +13 −2 asdl/py_meta.py
  7. +21 −0 asdl/run.sh
  8. +46 −0 benchmarks/oheap.sh
  9. +12 −4 osh/ast_.py
  10. +2 −2 scripts/count.sh
View
@@ -14,6 +14,10 @@
from asdl import arith_ast # module under test
# Sanity check. Doesn't pass because this unit test exposes implementatio
# details, like the concrete classes.
#from _tmp import arith_ast_asdl as arith_ast
ArithVar = arith_ast.ArithVar
ArithUnary = arith_ast.ArithUnary
@@ -96,7 +100,6 @@ def testEncode(self):
self.assertEqual(b'\x63\x00\x00', e[9:12]) # 0x63 = 99
def testConstructorType(self):
print(ArithVar)
print('FIELDS', ArithVar.FIELDS)
print('DESCRIPTOR_LOOKUP', ArithVar.DESCRIPTOR_LOOKUP)
@@ -124,7 +127,7 @@ def testConstructorType(self):
n3 = ArithVar()
try:
n4 = ArithVar('x', name='X')
except AssertionError as e:
except TypeError as e:
pass
else:
raise AssertionError("Should have failed")
View
@@ -20,7 +20,7 @@ def main(argv):
except IndexError:
raise RuntimeError('Action required')
if action == 'py':
if action == 'py': # Prints the module
schema_path = argv[2]
with open(schema_path) as f:
@@ -29,7 +29,9 @@ def main(argv):
# NOTE: We shouldn't pass in app_types for arith.asdl, but this is just a
# demo.
py_meta.MakeTypes(module, root, app_types={'id': asdl.UserType(Id)})
print(dir(root))
print('Dynamically created a Python module with these types:')
for name in dir(root):
print('\t' + name)
elif action == 'arith-encode':
expr = argv[2]
View
@@ -68,6 +68,8 @@ def __init__(self, alignment=_DEFAULT_ALIGNMENT,
self.tag_width = 1 # for ArithVar vs ArithWord.
self.int_width = int_width
self.ref_width = int_width # Constant 3, used by gen_cpp
# used for fd, line/col
# also I guess steuff like SimpleCommand
self.index_width = 2 # 16 bits, e.g. max 64K entries in an array
View
@@ -154,9 +154,6 @@ def VisitType(self, typ, depth=0):
else:
raise AssertionError(typ)
def VisitSimpleSum(self, sum, name, depth):
pass
def VisitSum(self, sum, name, depth):
if asdl.is_simple(sum):
self.VisitSimpleSum(sum, name, depth)
@@ -192,11 +189,11 @@ def EmitFooter(self):
class ClassDefVisitor(AsdlVisitor):
"""Generate C++ classes and type-safe enums."""
def __init__(self, f, enc, enum_types=None):
def __init__(self, f, enc_params, enum_types=None):
AsdlVisitor.__init__(self, f)
self.ref_width = enc.ref_width
self.ref_width = enc_params.ref_width
self.enum_types = enum_types or {}
self.pointer_type = enc.pointer_type
self.pointer_type = enc_params.pointer_type
self.footer = [] # lines
def EmitFooter(self):
View
@@ -0,0 +1,159 @@
#!/usr/bin/python
"""
gen_python.py
"""
import sys
from asdl import gen_cpp
from asdl import asdl_ as asdl
class GenClassesVisitor(gen_cpp.AsdlVisitor):
# TODO:
# - __eq__ isn't the same
# - DESCRIPTOR and FIELDS are dummies right now.
# - I think FIELDS is used for encoding.
#
# - Debug mode:
# - _CheckType(value, desc) on initialization and __setattr__.
# - check unassigned. Why is it done with unit tests with CheckUnassigned,
# but also in _Init?
def VisitSimpleSum(self, sum, name, depth):
self.Emit('class %s_e(SimpleObj):' % name, depth)
self.Emit(' pass', depth)
self.Emit('', depth)
# Just use #define, since enums aren't namespaced.
for i, variant in enumerate(sum.types):
attr = '%s_e.%s = %s_e(%d, %r)' % (
name, variant.name, name, i + 1, variant.name)
self.Emit(attr, depth)
self.Emit('', depth)
def _GenClass(self, desc, name, super_name, depth, tag_num=None,
add_spids=True):
self.Emit('class %s(%s):' % (name, super_name), depth)
if tag_num is not None:
self.Emit(' tag = %d' % tag_num, depth)
field_names = [f.name for f in desc.fields]
if add_spids:
field_names.append('spids')
quoted_fields = repr(tuple(field_names))
# NOTE: FIELDS is a duplicate of __slots__, used for pretty printing and
# oheap serialization. TODO: measure the effect of __slots__, and then get
# rid of FIELDS? Or you can just make it an alias.
# FIELDS = self.__slots__.
self.Emit(' FIELDS = %s' % quoted_fields, depth)
# Dummy values.
self.Emit(' DESCRIPTOR_LOOKUP = {}', depth)
self.Emit(' DESCRIPTOR = None', depth)
self.Emit(' __slots__ = %s' % quoted_fields, depth)
self.Emit('', depth)
args = ', '.join('%s=None' % f.name for f in desc.fields)
self.Emit(' def __init__(self, %s):' % args, depth)
for f in desc.fields:
# This logic is like _MakeFieldDescriptors
default = None
if f.opt: # Maybe
if f.type == 'int':
default = 'const.NO_INTEGER'
elif f.type == 'string':
default = "''"
else:
default = 'None'
elif f.seq: # Array
default = '[]'
default_str = (' or %s' % default) if default else ''
self.Emit(' self.%s = %s%s' % (f.name, f.name, default_str), depth)
# Like add_spids in _MakeFieldDescriptors. TODO: This should be optional
# for token and span! Also for runtime.asdl. We need to make it optional.
if add_spids:
self.Emit(' self.spids = []', depth)
self.Emit('', depth)
self.Emit(' def CheckUnassigned(self):', depth)
self.Emit(' pass', depth)
self.Emit('', depth)
def VisitConstructor(self, cons, def_name, tag_num, depth):
if cons.fields:
self._GenClass(cons, cons.name, def_name, depth, tag_num=tag_num)
else:
self.Emit("class %s(%s):" % (cons.name, def_name), depth)
self.Emit(' tag = %d' % tag_num, depth)
self.Emit('', depth)
def VisitCompoundSum(self, sum, name, depth):
# define command_e
self.Emit('class %s_e(object):' % name, depth)
for i, variant in enumerate(sum.types):
self.Emit(' %s = %d' % (variant.name, i + 1), depth)
self.Emit('', depth)
self.Emit('class %s(object):' % name, depth)
self.Emit(' pass', depth)
self.Emit('', depth)
# define command_t, and then make subclasses
super_name = '%s' % name
for i, t in enumerate(sum.types):
tag_num = i + 1
self.VisitConstructor(t, super_name, tag_num, depth)
def VisitProduct(self, product, name, depth):
self._GenClass(product, name, 'object', depth)
def EmitFooter(self):
pass
def main(argv):
schema_path = argv[1]
with open(schema_path) as input_f:
module = asdl.parse(input_f)
f = sys.stdout
# For const.NO_INTEGER
f.write("""\
from asdl import const
# Copied from py_meta
class Obj(object):
# NOTE: We're using CAPS for these static fields, since they are constant at
# runtime after metaprogramming.
DESCRIPTOR = None # Used for type checking
class SimpleObj(Obj):
def __init__(self, enum_id, name):
self.enum_id = enum_id
self.name = name
def __repr__(self):
return '<%s %s %s>' % (self.__class__.__name__, self.name, self.enum_id)
""")
v = GenClassesVisitor(f)
v.VisitModule(module)
if __name__ == '__main__':
try:
main(sys.argv)
except RuntimeError as e:
print >>sys.stderr, 'FATAL: %s' % e
sys.exit(1)
View
@@ -184,7 +184,7 @@ def _Init(self, args, kwargs):
for name, val in kwargs.items():
if self._assigned[name]:
raise AssertionError('Duplicate assignment of field %r' % name)
raise TypeError('Duplicate assignment of field %r' % name)
self.__setattr__(name, val)
# Disable type checking here
@@ -195,7 +195,10 @@ def _Init(self, args, kwargs):
raise ValueError("Field %r is required and wasn't initialized" % name)
def CheckUnassigned(self):
"""See if there are unassigned fields, for later encoding."""
"""See if there are unassigned fields, for later encoding.
This is currently only used in unit tests.
"""
unassigned = []
for name in self.FIELDS:
if not self._assigned[name]:
@@ -343,3 +346,11 @@ def MakeTypes(module, root, app_types=None):
else:
raise AssertionError(typ)
def AssignTypes(src_module, dest_module):
for name in dir(src_module):
if not name.startswith('__'):
v = getattr(src_module, name)
setattr(dest_module, name, v)
View
@@ -47,6 +47,27 @@ py-cpp() {
asdl-cpp $schema _tmp/$(basename $schema).h
}
gen-python() {
local schema=${1:-asdl/arith.asdl}
asdl/gen_python.py $schema
}
gen-osh-python() {
touch _tmp/__init__.py
local out=_devbuild/osh_asdl.py
gen-python osh/osh.asdl > $out
wc -l $out
}
gen-arith-python() {
local out=_tmp/arith_ast_asdl.py
touch _tmp/__init__.py
gen-python asdl/arith.asdl > $out
wc -l $out
test/unit.sh unit asdl/arith_ast_test.py
}
#
# Test specific schemas
#
View
@@ -32,6 +32,52 @@ run() {
$0 encode-one
}
# TODO: instead of running osh_demo, we should generate a C++ program that
# visits every node and counts it. The output might look like:
#
# - It can also print out the depth of the tree.
# - Summary: number of different types used
# - another option: decode/validate utf-8. See Visitor Use Cases.
#
# # 500 instances
# line_span = (...)
# # 455 instances
# token = (
# id id,
# string val, # lengths: min 0, max 20, avg 30
# int? span_id,
# )
#
# command =
# # 20 instances
# NoOp
# -- TODO: respect order
# # 20 instances
# | SimpleCommand(
# word* words, # min length: 0, max: 10, mean: 3.3 ?
# redir* redirects, # min length 0, max: 2, mean: 4.4
# env_pair* more_env)
# | Sentence(command child, token terminator)
#
# This might help with encoding things inline?
# You will definitely need to append to ASDL arrays. I don't think you'll need
# to append to strings. But you might want to store strings inline with
# structs.
# I guess it wouldn't hurt to print out a table of EVERY node an array, along
# with the type.
# parent_type,field_name,type,subtype,length
# token,val,Str,-,5
# SimpleCommand,redirects,Array,redirect,10
#
# This lets you figure out what the common types are, as well as the common
# lengths.
decode() {
for bin in _tmp/oheap/*.oheap; do
time _tmp/osh_demo $bin | wc -l
done
}
stats() {
ls -l -h _tmp/oheap
echo
View
@@ -135,8 +135,16 @@ def LoadSchema(f):
return asdl_module, app_types
f = util.GetResourceLoader().open('osh/osh.asdl')
asdl_module, app_types = LoadSchema(f)
# TODO: This should be the only lines in this module?
# PrettyPrint can go in osh/ast_lib ? or ast_util?
root = sys.modules[__name__]
py_meta.MakeTypes(asdl_module, root, app_types)
f.close()
if 1:
f = util.GetResourceLoader().open('osh/osh.asdl')
asdl_module, app_types = LoadSchema(f)
py_meta.MakeTypes(asdl_module, root, app_types)
f.close()
else:
# Get the types from elsewhere
from _devbuild import osh_asdl
py_meta.AssignTypes(osh_asdl, root)
View
@@ -41,11 +41,11 @@ all() {
echo
echo 'ASDL'
wc -l asdl/{asdl_,py_meta,gen_cpp,encode,format}.py | sort --numeric
wc -l asdl/{asdl_,py_meta,encode,format}.py | sort --numeric
echo
echo 'CODE GENERATORS'
wc -l */*_gen.py | sort --numeric
wc -l asdl/gen_*.py */*_gen.py | sort --numeric
echo
echo 'GENERATED CODE'

0 comments on commit 00b36fa

Please sign in to comment.