Permalink
Browse files

Replace the core of the interpreter with ASDL classes.

- Instead of core/{expr,word,cmd}_node.py, we have osh/osh.asdl.  The
  osh/ast.py module uses the asdl/py_meta.py library to dynamically
  generate node types (classes in Python).
- All the executors/evaluators now dispatch using the generic '.tag' attribute
  on ASDL node classes.
- All the methods on Words in word_node.py are moved to free functions in
  word.py.  (TODO: write a blog post about functional vs. object-oriented
  style.)

- ASDL enhancements:
  - Add generic code to pretty print ASTs.
  - generate Python "enums" from ASDL

- Test enhancements
  - smoke test for empty programs
  - unit tests can print error locations now
  - add some tests for source locations -- not complete yet

- refactoring:
  - TildeDetect moved into word.py
- brainstorming about location info -- not done yet

Testing on this change:

- all unit tests pass
- ./spec.sh {smoke,arith,dbracket} didn't regress (there are unrelated
  failures)
- all wild.sh tests I tried pass (git, aboriginal, debootstrap, and 5 or
  so others)
~
  • Loading branch information...
Andy Chu
Andy Chu committed Jan 3, 2017
1 parent 58e41e5 commit 6bfc6cac1361cc1d3bf6878af8a56d6aedaac19d
View
@@ -6,11 +6,23 @@
import unittest
from asdl import arith_ast # module under test
from asdl import format as fmt
from asdl import py_meta
class ArithAstTest(unittest.TestCase):
def testFieldDefaults(self):
s = arith_ast.Slice()
print(s)
self.assertEqual(None, s.begin)
self.assertEqual(None, s.end)
self.assertEqual(None, s.stride)
func = arith_ast.FuncCall()
print(func)
self.assertEqual([], func.args)
def testTypes(self):
ArithVar = arith_ast.ArithVar
ArithUnary = arith_ast.ArithUnary
@@ -55,6 +67,14 @@ def testTypes(self):
#n5 = ArithVar(None)
s = source_location()
s.path = 'hi'
s.line = 1
s.col = 2
s.length = 3
print(s)
# Doesn't work now
print(source_location())
c = Const(66)
@@ -84,6 +104,10 @@ def testTypes(self):
n.right = Const(6)
n.CheckUnassigned()
arith_expr_e = arith_ast.arith_expr_e
self.assertEqual(arith_expr_e.Const, c.tag)
self.assertEqual(arith_expr_e.ArithBinary, n.tag)
if __name__ == '__main__':
unittest.main()
View
@@ -222,7 +222,7 @@ def ParseShell(s, expected=None):
if expected is not None:
assert sexpr == expected, '%r != %r' % (sexpr, expected)
print('%-40s %s' % (s, sexpr))
#print('%-40s %s' % (s, sexpr))
return tree
View
@@ -8,6 +8,7 @@
from asdl import arith_parse
from asdl import py_meta
from asdl import encode
from asdl import format as fmt
def main(argv):
@@ -36,6 +37,17 @@ def main(argv):
out = encode.BinOutput(f)
encode.EncodeRoot(obj, enc, out)
elif action == 'arith-format':
expr = argv[2]
obj = arith_parse.ParseShell(expr)
#out = fmt.TextOutput(sys.stdout)
tree = fmt.MakeTree(obj)
#treee= ['hi', 'there', ['a', 'b'], 'c']
fmt.PrintTree(tree, sys.stdout)
# Might need to print the output?
# out.WriteToFile?
else:
raise RuntimeError('Invalid action %r' % action)
View
@@ -1,4 +1,4 @@
#!/usr/bin/python -S
#!/usr/bin/env python3
"""
encode_test.py: Tests for encode.py
"""
View
@@ -0,0 +1,297 @@
#!/usr/bin/python
"""
format.py
Like encode.py, but uses text instead of binary.
For pretty-printing.
"""
import io
import sys
from asdl import asdl_parse as asdl
class ColorOutput:
"""
API:
PushColor() ?
PopColor()?
Things that should be color: raw text, like "ls" and '/foo/bar"
certain kinds of nodes.
Should we have both a background color and a foreground color?
"""
def __init__(self, f):
self.f = f
self.lines = []
def Write(self, line):
self.lines.append(line)
class TextOutput(ColorOutput):
"""TextOutput put obeys the color interface, but outputs nothing."""
def __init__(self, f):
ColorOutput.__init__(self, f)
class HtmlOutput(ColorOutput):
"""
HTML one can have wider columns. Maybe not even fixed-width font.
Hm yeah indentation should be logical then?
Color: HTML spans
"""
def __init__(self, f):
ColorOutput.__init__(self, f)
class AnsiOutput(ColorOutput):
"""
Generally 80 column output
Color: html code and restore
"""
def __init__(self, f):
ColorOutput.__init__(self, f)
INDENT = 2
# TODO: Change algorithm
# - MakeTree makes it homogeneous:
# - strings for primitives, or ? for unset
# - (field, value) tuple
# - [] for arrays
# - _Obj(name, fields)
#
# And then PrintTree(max_col) does
# temporary buffer
#
# if it fails, then print the tree
# ok = TryPrintLine(child, max_col)
# if (not ok):
# indent
# PrintTree()
#
# And PrintTree should take a list of Substitutions on node_type to make it
# shorter?
# - CompoundWord
# - SimpleCommand
# - Lit_Chars for tokens
class _Obj:
def __init__(self, node_type):
self.node_type = node_type
self.fields = [] # list of 2-tuples
def MakeTree(obj, omit_empty=True):
"""
Args:
obj: py_meta.Obj
omit_empty: Whether to omit empty lists
Returns:
A tree of strings and lists.
NOTES:
{} for words, [] for wordpart? What about tokens? I think each node has to
be able to override the behavior. How to do this though? Free functions?
Common case:
ls /foo /bar -> (Com {[ls]} {[/foo]} {[/bar]})
Or use color for this?
(ArithBinary Plus (ArithBinary Plus (Const 1) (Const 2)) (Const 3))
vs.
ArithBinary
Plus
ArithBinary
Plus
Const 1
Const 2
Const 3
What about field names?
Inline:
(Node children:[() () ()])
Indented
(Node
children:[
()
()
()
]
)
"""
# HACK to incorporate old AST nodes. Remove when the whole thing is
# converted.
from asdl import py_meta
if not isinstance(obj, py_meta.CompoundObj):
#raise AssertionError(obj)
return repr(obj)
# These lines can be possibly COMBINED all into one. () can replace
# indentation?
out_node = _Obj(obj.__class__.__name__)
fields = out_node.fields
for field_name in obj.FIELDS:
show_field = True
out_val = ''
# Need a different data model. Pairs?
#print(name)
try:
field_val = getattr(obj, field_name)
except AttributeError:
out_val = '?'
continue
desc = obj.DESCRIPTOR_LOOKUP[field_name]
if isinstance(desc, asdl.IntType):
# TODO: How to check for overflow?
out_val = str(field_val)
elif isinstance(desc, asdl.Sum) and asdl.is_simple(desc):
# HACK for now to reflect that Id is an integer.
if isinstance(field_val, int):
out_val = str(field_val)
else:
out_val = field_val.name
elif isinstance(desc, asdl.StrType):
out_val = field_val
elif isinstance(desc, asdl.ArrayType):
# Hm does an array need the field name? I can have multiple arrays like
# redirects, more_env, and words. Is there a way to make "words"
# special?
out_val = []
obj_list = field_val
for child_obj in obj_list:
t = MakeTree(child_obj)
out_val.append(t)
if omit_empty and not obj_list:
show_field = False
elif isinstance(desc, asdl.MaybeType):
if field_val is None:
show_field = False
else:
out_val = MakeTree(field_val)
else:
# Recursive call for child records. Write children before parents.
# Children can't be written directly to 'out'. We have to know if they
# will fit first.
out_val = MakeTree(field_val)
if show_field:
out_node.fields.append((field_name, out_val))
return out_node
def PrintTree(node, f, indent=0, max_col=100):
"""
node: homogeneous tree node
f: output file. TODO: Should take ColorOutput?
"""
ind = ' ' * indent
# Try printing on a single line
single_f = io.StringIO()
single_f.write(ind)
if TrySingleLine(node, single_f, max_col=max_col-indent):
f.write(single_f.getvalue())
return
if isinstance(node, str):
f.write(ind + node)
elif isinstance(node, _Obj):
f.write(ind + '(')
f.write(node.node_type)
f.write('\n')
i = 0
for name, val in node.fields:
ind1 = ' ' * (indent+INDENT)
if isinstance(val, list):
f.write('%s%s: [\n' % (ind1, name))
for child in val:
# TODO: Add max_col here
PrintTree(child, f, indent=indent+INDENT+INDENT)
f.write('\n')
f.write('%s]' % ind1)
else:
f.write('%s%s:\n' % (ind1, name))
# TODO: Add max_col here, taking into account the field name
PrintTree(val, f, indent=indent+INDENT+INDENT)
i += 1
f.write('\n') # separate fields
f.write(ind + ')')
else:
raise AssertionError(node)
def TrySingleLine(node, f, max_col=80):
"""Try printing on a single line.
Args:
node: homogeneous tree node
f: output file. TODO: Should take ColorOutput?
max_col: maximum length of the line
indent: current indent level
Returns:
ok: whether it fit on the line of the given size.
If False, you can't use the value of f.
"""
if isinstance(node, str):
f.write(node)
elif isinstance(node, _Obj):
f.write('(')
f.write(node.node_type)
n = len(node.fields)
i = 0
for name, val in node.fields:
f.write(' %s:' % name)
if not TrySingleLine(val, f):
return False
i += 1
f.write(')')
elif isinstance(node, list):
f.write('[')
for item in node:
if not TrySingleLine(item, f):
return False
f.write(']')
else:
raise AssertionError(p)
# Take into account the last char.
num_chars_so_far = len(f.getvalue())
if num_chars_so_far > max_col:
return False
return True
Oops, something went wrong.

0 comments on commit 6bfc6ca

Please sign in to comment.