| @@ -0,0 +1,242 @@ | ||
| #!/usr/bin/python3 | ||
| """ | ||
| arith_parse.py: Parse shell-like and C-like arithmetic. | ||
| """ | ||
| import sys | ||
| import tdop | ||
| from tdop import Node, CompositeNode | ||
| import arith_ast | ||
| # | ||
| # Null Denotation -- token that takes nothing on the left | ||
| # | ||
| def NullConstant(p, token, bp): | ||
| if token.type == 'number': | ||
| return arith_ast.Const(token.val) | ||
| # We have to wrap a string in some kind of variant. | ||
| if token.type == 'name': | ||
| return arith_ast.ArithVar(token.val) | ||
| raise AssertionError(token.type) | ||
| def NullParen(p, token, bp): | ||
| """ Arithmetic grouping """ | ||
| r = p.ParseUntil(bp) | ||
| p.Eat(')') | ||
| return r | ||
| def NullPrefixOp(p, token, bp): | ||
| """Prefix operator. | ||
| Low precedence: return, raise, etc. | ||
| return x+y is return (x+y), not (return x) + y | ||
| High precedence: logical negation, bitwise complement, etc. | ||
| !x && y is (!x) && y, not !(x && y) | ||
| """ | ||
| r = p.ParseUntil(bp) | ||
| return CompositeNode(token, [r]) | ||
| def NullIncDec(p, token, bp): | ||
| """ ++x or ++x[1] """ | ||
| right = p.ParseUntil(bp) | ||
| if right.token.type not in ('name', 'get'): | ||
| raise tdop.ParseError("Can't assign to %r (%s)" % (right, right.token)) | ||
| return CompositeNode(token, [right]) | ||
| # | ||
| # Left Denotation -- token that takes an expression on the left | ||
| # | ||
| def LeftIncDec(p, token, left, rbp): | ||
| """ For i++ and i-- | ||
| """ | ||
| if left.token.type not in ('name', 'get'): | ||
| raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token)) | ||
| token.type = 'post' + token.type | ||
| return CompositeNode(token, [left]) | ||
| def LeftIndex(p, token, left, unused_bp): | ||
| """ index f[x+1] """ | ||
| # f[x] or f[x][y] | ||
| if not isinstance(left, arith_ast.ArithVar): | ||
| raise tdop.ParseError("%s can't be indexed" % left) | ||
| index = p.ParseUntil(0) | ||
| if p.AtToken(':'): | ||
| p.Next() | ||
| end = p.ParseUntil(0) | ||
| else: | ||
| end = None | ||
| p.Eat(']') | ||
| # TODO: If you see ], then | ||
| # 1:4 | ||
| # 1:4:2 | ||
| # Both end and step are optional | ||
| if end: | ||
| return arith_ast.Slice(left, index, end, None) | ||
| else: | ||
| return arith_ast.Index(left, index) | ||
| def LeftTernary(p, token, left, bp): | ||
| """ e.g. a > 1 ? x : y """ | ||
| true_expr = p.ParseUntil(bp) | ||
| p.Eat(':') | ||
| false_expr = p.ParseUntil(bp) | ||
| children = [left, true_expr, false_expr] | ||
| return CompositeNode(token, children) | ||
| def LeftBinaryOp(p, token, left, rbp): | ||
| """ Normal binary operator like 1+2 or 2*3, etc. """ | ||
| if token.val == '+': | ||
| op_id = arith_ast.op_id.Plus | ||
| elif token.val == '-': | ||
| op_id = arith_ast.op_id.Minus | ||
| elif token.val == '*': | ||
| op_id = arith_ast.op_id.Star | ||
| else: | ||
| raise AssertionError(token.val) | ||
| return arith_ast.ArithBinary(op_id, left, p.ParseUntil(rbp)) | ||
| def LeftAssign(p, token, left, rbp): | ||
| """ Normal binary operator like 1+2 or 2*3, etc. """ | ||
| # x += 1, or a[i] += 1 | ||
| if left.token.type not in ('name', 'get'): | ||
| raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token)) | ||
| return CompositeNode(token, [left, p.ParseUntil(rbp)]) | ||
| def LeftComma(p, token, left, rbp): | ||
| """ foo, bar, baz | ||
| Could be sequencing operator, or tuple without parens | ||
| """ | ||
| r = p.ParseUntil(rbp) | ||
| if left.token.type == ',': # Keep adding more children | ||
| left.children.append(r) | ||
| return left | ||
| children = [left, r] | ||
| return CompositeNode(token, children) | ||
| # For overloading of , inside function calls | ||
| COMMA_PREC = 1 | ||
| def LeftFuncCall(p, token, left, unused_bp): | ||
| """ Function call f(a, b). """ | ||
| args = [] | ||
| # f(x) or f[i](x) | ||
| if not isinstance(left, arith_ast.ArithVar): | ||
| raise tdop.ParseError("%s can't be called" % left) | ||
| func_name = left.name # get a string | ||
| while not p.AtToken(')'): | ||
| # We don't want to grab the comma, e.g. it is NOT a sequence operator. So | ||
| # set the precedence to 5. | ||
| args.append(p.ParseUntil(COMMA_PREC)) | ||
| if p.AtToken(','): | ||
| p.Next() | ||
| p.Eat(")") | ||
| return arith_ast.FuncCall(func_name, args) | ||
| def MakeShellParserSpec(): | ||
| """ | ||
| Create a parser. | ||
| Compare the code below with this table of C operator precedence: | ||
| http://en.cppreference.com/w/c/language/operator_precedence | ||
| """ | ||
| spec = tdop.ParserSpec() | ||
| spec.Left(31, LeftIncDec, ['++', '--']) | ||
| spec.Left(31, LeftFuncCall, ['(']) | ||
| spec.Left(31, LeftIndex, ['[']) | ||
| # 29 -- binds to everything except function call, indexing, postfix ops | ||
| spec.Null(29, NullIncDec, ['++', '--']) | ||
| spec.Null(29, NullPrefixOp, ['+', '!', '~', '-']) | ||
| # Right associative: 2 ** 3 ** 2 == 2 ** (3 ** 2) | ||
| spec.LeftRightAssoc(27, LeftBinaryOp, ['**']) | ||
| spec.Left(25, LeftBinaryOp, ['*', '/', '%']) | ||
| spec.Left(23, LeftBinaryOp, ['+', '-']) | ||
| spec.Left(21, LeftBinaryOp, ['<<', '>>']) | ||
| spec.Left(19, LeftBinaryOp, ['<', '>', '<=', '>=']) | ||
| spec.Left(17, LeftBinaryOp, ['!=', '==']) | ||
| spec.Left(15, LeftBinaryOp, ['&']) | ||
| spec.Left(13, LeftBinaryOp, ['^']) | ||
| spec.Left(11, LeftBinaryOp, ['|']) | ||
| spec.Left(9, LeftBinaryOp, ['&&']) | ||
| spec.Left(7, LeftBinaryOp, ['||']) | ||
| spec.LeftRightAssoc(5, LeftTernary, ['?']) | ||
| # Right associative: a = b = 2 is a = (b = 2) | ||
| spec.LeftRightAssoc(3, LeftAssign, [ | ||
| '=', | ||
| '+=', '-=', '*=', '/=', '%=', | ||
| '<<=', '>>=', '&=', '^=', '|=']) | ||
| spec.Left(COMMA_PREC, LeftComma, [',']) | ||
| # 0 precedence -- doesn't bind until ) | ||
| spec.Null(0, NullParen, ['(']) # for grouping | ||
| # -1 precedence -- never used | ||
| spec.Null(-1, NullConstant, ['name', 'number']) | ||
| spec.Null(-1, tdop.NullError, [')', ']', ':', 'eof']) | ||
| return spec | ||
| def MakeParser(s): | ||
| """Used by tests.""" | ||
| spec = MakeShellParserSpec() | ||
| lexer = tdop.Tokenize(s) | ||
| p = tdop.Parser(spec, lexer) | ||
| return p | ||
| def ParseShell(s, expected=None): | ||
| """Used by tests.""" | ||
| p = MakeParser(s) | ||
| tree = p.Parse() | ||
| sexpr = repr(tree) | ||
| if expected is not None: | ||
| assert sexpr == expected, '%r != %r' % (sexpr, expected) | ||
| print('%-40s %s' % (s, sexpr)) | ||
| return tree | ||
| def main(argv): | ||
| try: | ||
| s = argv[1] | ||
| except IndexError: | ||
| print('Usage: ./arith_parse.py EXPRESSION') | ||
| else: | ||
| try: | ||
| tree = ParseShell(s) | ||
| except tdop.ParseError as e: | ||
| print('Error parsing %r: %s' % (s, e), file=sys.stderr) | ||
| if __name__ == '__main__': | ||
| main(sys.argv) |
| @@ -0,0 +1,173 @@ | ||
| #!/usr/bin/python3 | ||
| import tdop | ||
| import arith_parse | ||
| def _assertParseError(make_parser, s, error_substring=''): | ||
| p = make_parser(s) | ||
| try: | ||
| node = p.Parse() | ||
| except tdop.ParseError as e: | ||
| err = str(e) | ||
| if error_substring in err: | ||
| print('got expected error for %s: %s' % (s, err)) | ||
| else: | ||
| raise AssertionError('Expected %r to be in %r' % (error_substring, err)) | ||
| else: | ||
| raise AssertionError('%r should have failed' % s) | ||
| def TestArith(t_parse): | ||
| t_parse('1+2+3', '(+ (+ 1 2) 3)') | ||
| t_parse('1+2*3', '(+ 1 (* 2 3))') | ||
| t_parse('4*(2+3)', '(* 4 (+ 2 3))') | ||
| t_parse('(2+3)*4', '(* (+ 2 3) 4)') | ||
| t_parse('1<2', '(< 1 2)') | ||
| t_parse('x=3', '(= x 3)') | ||
| t_parse('x = 2*3', '(= x (* 2 3))') | ||
| t_parse('x = y', '(= x y)') | ||
| t_parse('x*y - y*z', '(- (* x y) (* y z))') | ||
| t_parse('x/y - y%z', '(- (/ x y) (% y z))') | ||
| t_parse("x = y", "(= x y)") | ||
| t_parse('2 ** 3 ** 2', '(** 2 (** 3 2))') | ||
| t_parse('a = b = 10', '(= a (= b 10))') | ||
| t_parse('x = ((y*4)-2)', '(= x (- (* y 4) 2))') | ||
| t_parse('x - -y', '(- x (- y))') | ||
| t_parse("-1 * -2", "(* (- 1) (- 2))") | ||
| t_parse("-x * -y", "(* (- x) (- y))") | ||
| t_parse('x - -234', '(- x (- 234))') | ||
| # Python doesn't allow this | ||
| t_parse('x += y += 3', '(+= x (+= y 3))') | ||
| # This is sort of nonsensical, but bash allows it. The 1 is discarded as | ||
| # the first element of the comma operator. | ||
| t_parse('x[1,2]', '(get x (, 1 2))') | ||
| # Python doesn't have unary + | ||
| t_parse('+1 - +2', '(- (+ 1) (+ 2))') | ||
| # LHS | ||
| t_parse('f[x] += 1', '(+= (get f x) 1)') | ||
| def TestBitwise(t_parse): | ||
| t_parse("~1 | ~2", "(| (~ 1) (~ 2))") | ||
| t_parse("x & y | a & b", "(| (& x y) (& a b))") | ||
| t_parse("~x ^ y", "(^ (~ x) y)") | ||
| t_parse("x << y | y << z", "(| (<< x y) (<< y z))") | ||
| t_parse("a ^= b-1", "(^= a (- b 1))") | ||
| def TestLogical(t_parse): | ||
| t_parse("a && b || c && d", "(|| (&& a b) (&& c d))") | ||
| t_parse("!a && !b", "(&& (! a) (! b))") | ||
| t_parse("a != b && c == d", "(&& (!= a b) (== c d))") | ||
| t_parse("a > b ? 0 : 1", "(? (> a b) 0 1)") | ||
| t_parse("a > b ? x+1 : y+1", "(? (> a b) (+ x 1) (+ y 1))") | ||
| t_parse("1 ? true1 : 2 ? true2 : false", "(? 1 true1 (? 2 true2 false))") | ||
| t_parse("1 ? true1 : (2 ? true2 : false)", "(? 1 true1 (? 2 true2 false))") | ||
| t_parse("1 ? (2 ? true : false1) : false2", "(? 1 (? 2 true false1) false2)") | ||
| t_parse("1 ? 2 ? true : false1 : false2", "(? 1 (? 2 true false1) false2)") | ||
| # Should have higher precedence than comma | ||
| t_parse("x ? 1 : 2, y ? 3 : 4", "(, (? x 1 2) (? y 3 4))") | ||
| def TestUnary(t_parse): | ||
| t_parse("!x", "(! x)") | ||
| t_parse("x--", "(post-- x)") | ||
| t_parse("x[1]--", "(post-- (get x 1))") | ||
| t_parse("--x", "(-- x)") | ||
| t_parse("++x[1]", "(++ (get x 1))") | ||
| t_parse("!x--", "(! (post-- x))") | ||
| t_parse("~x++", "(~ (post++ x))") | ||
| t_parse("x++ - y++", "(- (post++ x) (post++ y))") | ||
| t_parse("++x - ++y", "(- (++ x) (++ y))") | ||
| # | ||
| # 1. x++ f() x[] left associative | ||
| # f(x)[1]++ means | ||
| # (++ (get (call f x) 1)) | ||
| # 2. ++x + - ! ~ right associative | ||
| # -++x means (- (++ x)) | ||
| def TestArrays(t_parse): | ||
| """Shared between shell, oil, and Python.""" | ||
| t_parse('x[1]', '(get x 1)') | ||
| t_parse('x[a+b]', '(get x (+ a b))') | ||
| def TestComma(t_parse): | ||
| t_parse('x=1,y=2,z=3', '(, (= x 1) (= y 2) (= z 3))') | ||
| def TestFuncCalls(t_parse): | ||
| t_parse('x = y(2)*3 + y(4)*5', '(= x (+ (* (call y 2) 3) (* (call y 4) 5)))') | ||
| t_parse('x(1,2)+y(3,4)', '(+ (call x 1 2) (call y 3 4))') | ||
| t_parse('x(a,b,c[d])', '(call x a b (get c d))') | ||
| t_parse('x(1,2)*j+y(3,4)*k+z(5,6)*l', | ||
| '(+ (+ (* (call x 1 2) j) (* (call y 3 4) k)) (* (call z 5 6) l))') | ||
| t_parse('print(test(2,3))', '(call print (call test 2 3))') | ||
| t_parse('print("x")', '(call print x)') | ||
| t_parse('min(255,n*2)', '(call min 255 (* n 2))') | ||
| t_parse('c = pal[i*8]', '(= c (get pal (* i 8)))') | ||
| def TestErrors(p): | ||
| _assertParseError(p, '}') | ||
| _assertParseError(p, ']') | ||
| _assertParseError(p, '{') # depends on language | ||
| _assertParseError(p, "x+1 = y", "Can't assign") | ||
| _assertParseError(p, "(x+1)++", "Can't assign") | ||
| # Should be an EOF error | ||
| _assertParseError(p, 'foo ? 1 :', 'Unexpected end') | ||
| _assertParseError(p, 'foo ? 1 ', 'expected :') | ||
| _assertParseError(p, '%', "can't be used in prefix position") | ||
| error_str = "can't be used in prefix" | ||
| _assertParseError(p, '}') | ||
| _assertParseError(p, '{') | ||
| _assertParseError(p, ']', error_str) | ||
| _assertParseError(p, '1 ( 2', "can't be called") | ||
| _assertParseError(p, '(x+1) ( 2 )', "can't be called") | ||
| #_assertParseError(p, '1 ) 2') | ||
| _assertParseError(p, '1 [ 2 ]', "can't be indexed") | ||
| def main(): | ||
| t_parse = arith_parse.ParseShell | ||
| p = arith_parse.MakeParser | ||
| TestArith(t_parse) | ||
| TestBitwise(t_parse) | ||
| TestLogical(t_parse) | ||
| TestUnary(t_parse) | ||
| TestArrays(t_parse) | ||
| TestFuncCalls(t_parse) | ||
| TestComma(t_parse) | ||
| TestErrors(p) | ||
| if __name__ == '__main__': | ||
| main() |
| @@ -0,0 +1,48 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| asdl_tool.py | ||
| """ | ||
| import sys | ||
| import asdl | ||
| import arith_parse | ||
| import py_meta | ||
| import encode | ||
| def main(argv): | ||
| try: | ||
| action = argv[1] | ||
| except IndexError: | ||
| raise RuntimeError('Action required') | ||
| if action == 'py': | ||
| schema_path = argv[2] | ||
| module = asdl.parse(schema_path) | ||
| root = sys.modules[__name__] | ||
| py_meta.MakeTypes(module, root) | ||
| print(dir(root)) | ||
| elif action == 'arith-encode': | ||
| expr = argv[2] | ||
| out_path = argv[3] | ||
| obj = arith_parse.ParseShell(expr) | ||
| #print(obj) | ||
| enc = encode.Params() | ||
| with open(out_path, 'wb') as f: | ||
| out = encode.BinOutput(f) | ||
| encode.EncodeRoot(obj, enc, out) | ||
| else: | ||
| raise RuntimeError('Invalid action %r' % action) | ||
| if __name__ == '__main__': | ||
| try: | ||
| main(sys.argv) | ||
| except RuntimeError as e: | ||
| print('FATAL: %s' % e, file=sys.stderr) | ||
| sys.exit(1) |
| @@ -0,0 +1,268 @@ | ||
| #!/usr/bin/python | ||
| """ | ||
| encode.py | ||
| """ | ||
| import sys | ||
| import asdl | ||
| import py_meta | ||
| _DEFAULT_ALIGNMENT = 4 | ||
| class BinOutput: | ||
| """Write aligned blocks here. Keeps track of block indexes for refs.""" | ||
| def __init__(self, f, alignment=_DEFAULT_ALIGNMENT): | ||
| self.f = f | ||
| # index of last block, to return as a ref. | ||
| self.last_block = 0 | ||
| self.alignment = alignment | ||
| def WriteRootRef(self, chunk): | ||
| self.f.seek(5) # seek past 'OHP\x01\x04' | ||
| assert len(chunk) == 3 | ||
| self.f.write(chunk) | ||
| def Write(self, chunk): | ||
| """ | ||
| Return a block pointer/index. | ||
| """ | ||
| # Input should be padded | ||
| a = self.alignment | ||
| assert len(chunk) % self.alignment == 0 | ||
| self.f.write(chunk) | ||
| ref = self.last_block | ||
| num_blocks = len(chunk) // self.alignment # int division | ||
| #print('WROTE %d blocks' % num_blocks) | ||
| self.last_block += num_blocks | ||
| # Return a reference to the beginning | ||
| return ref | ||
| class Params: | ||
| """Encoding parameters. | ||
| Hm most of these settings should be per-field, expressed in the schema. The | ||
| only global one is the ref/pointer alignment. 4 and 8 are the most likely | ||
| choices, and 4 is probably fine, because you have 64 MB of addressable memory | ||
| with 24 bit pointers. | ||
| """ | ||
| def __init__(self, alignment=_DEFAULT_ALIGNMENT): | ||
| self.alignment = alignment | ||
| self.pointer_type = 'uint32_t' | ||
| self.tag_width = 1 # for ArithVar vs ArithWord. | ||
| self.ref_width = 3 # 24 bits | ||
| self.int_width = 3 # 24 bits | ||
| # used for fd, line/col | ||
| # also I guess steuff like SimpleCommand | ||
| self.index_width = 2 # 16 bits, e.g. max 64K entries in an array | ||
| self.max_int = 1 << (self.ref_width * 8) | ||
| self.max_index = 1 << (self.index_width * 8) | ||
| self.max_tag = 1 << (self.tag_width * 8) | ||
| def Tag(self, i, chunk): | ||
| if i > self.max_tag: | ||
| raise AssertionError('Invalid id %r' % i) | ||
| chunk.append(i & 0xFF) | ||
| def Int(self, n, chunk): | ||
| if n > self.max_int: | ||
| raise Error('%d is too big to fit in %d bytes' % (n, self.int_width)) | ||
| for i in range(self.int_width): | ||
| chunk.append(n & 0xFF) | ||
| n >>= 8 | ||
| def Ref(self, n, chunk): | ||
| self.Int(n, chunk) | ||
| def _Pad(self, chunk): | ||
| n = len(chunk) | ||
| a = self.alignment | ||
| if n % a != 0: | ||
| chunk.extend(b'\x00' * (a - (n % a))) | ||
| return chunk | ||
| # Right now all strings are references. Later they could be inline. | ||
| def Str(self, s, chunk): | ||
| # NOTE: For variable, proc, and function names, it could make sense to | ||
| # pre-compute and store a hash value. They will be looked up in the stack | ||
| # and so forth. | ||
| # - You could also return a obj number or object ID. | ||
| chunk.extend(s.encode('utf-8')) | ||
| chunk.append(0) # NUL terminator | ||
| def PaddedStr(self, s): | ||
| # NOTE: | ||
| # - The encoder could also have an intern table to save space. | ||
| # - Str and PaddedStr will both return char* ? Should we allow them to | ||
| # VARY with the same schema, is a value/ref type PART of the schema? It's | ||
| # basically small size optimization and "flexible array" optimization. I | ||
| # think you want that possibility. | ||
| chunk = bytearray() | ||
| self.Str(s, chunk) | ||
| return self._Pad(chunk) | ||
| def Bytes(self, buf, chunk): | ||
| n = len(buf) | ||
| if n >= self.max_index: | ||
| raise RuntimeError("bytes object is too long (%d)" % n) | ||
| for i in range(self.index_width): | ||
| chunk.append(n & 0xFF) | ||
| n >>= 8 | ||
| chunk.extend(buf.encode('utf-8')) | ||
| def PaddedBytes(self, buf): | ||
| chunk = bytearray() | ||
| self.Bytes(buf, chunk) | ||
| return self._Pad(chunk) | ||
| def PaddedBlock(self, chunk): | ||
| return self._Pad(chunk) | ||
| def EncodeArray(obj_list, item_desc, enc, out): | ||
| """ | ||
| Args: | ||
| obj_list: List of Obj values | ||
| Returns: | ||
| ref | ||
| """ | ||
| array_chunk = bytearray() | ||
| enc.Int(len(obj_list), array_chunk) # Length prefix | ||
| if isinstance(item_desc, asdl.IntType): | ||
| for item in obj_list: | ||
| enc.Int(item, array_chunk) | ||
| elif isinstance(item_desc, asdl.Sum) and asdl.is_simple(item_desc): | ||
| for item in obj_list: | ||
| enc.Int(item.enum_id, array_chunk) | ||
| else: | ||
| # A simple value is either an int, enum, or pointer. (Later: Iter<Str> | ||
| # might be possible for locality.) | ||
| assert isinstance(item_desc, asdl.Sum) or isinstance( | ||
| item_desc, asdl.Product), item_desc | ||
| # This is like vector<T*> | ||
| # Later: | ||
| # - Product types can be put in line | ||
| # - Sum types can even be put in line, if you have List<T> rather than | ||
| # Array<T>. Array implies O(1) random access; List doesn't. | ||
| for item in obj_list: | ||
| # Recursive call. | ||
| ref = EncodeObj(item, enc, out) | ||
| enc.Ref(ref, array_chunk) | ||
| this_ref = out.Write(enc.PaddedBlock(array_chunk)) | ||
| return this_ref | ||
| def EncodeObj(obj, enc, out): | ||
| """ | ||
| Args: | ||
| obj: Obj to encode | ||
| enc: encoding params | ||
| out: output file | ||
| Returns: | ||
| ref: Reference to the last block | ||
| """ | ||
| # Algorithm: Depth first, post-order traversal. First obj is the first leaf. | ||
| # last obj is the root. | ||
| # | ||
| # Array is a homogeneous type. | ||
| this_chunk = bytearray() | ||
| assert isinstance(obj, py_meta.CompoundObj), \ | ||
| '%s is not a compound obj (%r)' % (obj, obj.__class__) | ||
| if isinstance(obj.DESCRIPTOR, asdl.Constructor): | ||
| enc.Tag(obj.tag, this_chunk) | ||
| for name in obj.FIELDS: # encode in order | ||
| desc = obj.DESCRIPTOR_LOOKUP[name] | ||
| #print('\n\n------------') | ||
| #print('field DESC', name, desc) | ||
| field_val = getattr(obj, name) | ||
| #print('VALUE', field_val) | ||
| # TODO: | ||
| # - Float would be inline, etc. | ||
| # - Optional value: write all enc.Ref(0)? This is 'nullptr'. | ||
| # - Repeated value: write them all adjacent to each other? | ||
| # INLINE | ||
| if isinstance(desc, asdl.IntType): | ||
| enc.Int(field_val, this_chunk) | ||
| elif isinstance(desc, asdl.Sum) and asdl.is_simple(desc): | ||
| # Encode enums as integers. TODO later: Don't use 3 bytes! Can use 1 | ||
| # byte for most enums. | ||
| enc.Int(field_val.enum_id, this_chunk) | ||
| # Write variable length field first, assuming that it's a ref/pointer. | ||
| # TODO: allow one inline, hanging string or array per record. | ||
| elif isinstance(desc, asdl.StrType): | ||
| ref = out.Write(enc.PaddedStr(field_val)) | ||
| enc.Ref(ref, this_chunk) | ||
| elif isinstance(desc, asdl.ArrayType): | ||
| item_desc = desc.desc | ||
| ref = EncodeArray(field_val, item_desc, enc, out) | ||
| enc.Ref(ref, this_chunk) | ||
| elif isinstance(desc, asdl.MaybeType): | ||
| item_desc = desc.desc | ||
| ok = False | ||
| if isinstance(item_desc, asdl.Sum): | ||
| if not asdl.is_simple(item_desc): | ||
| ok = True | ||
| elif isinstance(item_desc, asdl.Product): | ||
| ok = True | ||
| if not ok: | ||
| raise AssertionError( | ||
| "Currently not encoding simple optional types: %s", field_val) | ||
| if field_val is None: | ||
| enc.Ref(0, this_chunk) | ||
| else: | ||
| ref = EncodeObj(field_val, enc, out) | ||
| enc.Ref(ref, this_chunk) | ||
| else: | ||
| # Recursive call for child records. Write children before parents. | ||
| ref = EncodeObj(field_val, enc, out) | ||
| enc.Ref(ref, this_chunk) | ||
| # Write the parent record | ||
| this_ref = out.Write(enc.PaddedBlock(this_chunk)) | ||
| return this_ref | ||
| def EncodeRoot(obj, enc, out): | ||
| ref = out.Write(b'OHP\x01') # header, version 1 | ||
| assert ref == 0 | ||
| # 4-byte alignment, then 3 byte placeholder for the root ref. | ||
| ref = out.Write(b'\4\0\0\0') | ||
| assert ref == 1 | ||
| root_ref = EncodeObj(obj, enc, out) | ||
| chunk = bytearray() | ||
| enc.Ref(root_ref, chunk) | ||
| out.WriteRootRef(chunk) | ||
| #print("Root obj ref:", root_ref) |
| @@ -0,0 +1,32 @@ | ||
| #!/usr/bin/python -S | ||
| """ | ||
| encode_test.py: Tests for encode.py | ||
| """ | ||
| import unittest | ||
| import encode # module under test | ||
| class EncoderTest(unittest.TestCase): | ||
| def testEncoder(self): | ||
| p = encode.Params(16) | ||
| chunk = bytearray() | ||
| p.Int(1, chunk) | ||
| self.assertEqual(b'\x01\x00\x00', chunk) | ||
| chunk = p.PaddedBytes('0123456789') | ||
| # 2 byte length -- max 64K entries | ||
| self.assertEqual(b'\x0A\x000123456789\x00\x00\x00\x00', bytes(chunk)) | ||
| chunk = p.PaddedStr('0123456789') | ||
| # 2 byte length -- max 64K entries | ||
| self.assertEqual(b'0123456789\x00\x00\x00\x00\x00\x00', bytes(chunk)) | ||
| #p.Block([b'a', b'bc']) | ||
| if __name__ == '__main__': | ||
| unittest.main() |
| @@ -0,0 +1,109 @@ | ||
| -- ASDL's six builtin types are identifier, int, string, bytes, object, singleton | ||
| -- NOTE we're not using identifier/object/singleton/bytes | ||
| -- Python only uses bytes/object/singleton once. | ||
| -- identifier is used all over. Why? I have different rules for | ||
| -- for functions and vars. case/for/assign have vars. | ||
| -- TODO: | ||
| -- How to encode position information? | ||
| -- FuncCall inside ${}, in addition to arithmetic context? | ||
| -- Well I guess you can do this: echo $(( f(x,y) ))a. It is a little more | ||
| -- annoying. | ||
| module osh | ||
| { | ||
| source_location = (string path, int line, int col, int length) | ||
| token = (string value, source_location loc) | ||
| id = Foo | Bar -- TODO: you need a placeholder to say this is generated | ||
| -- by another tool. Suppress the error. | ||
| -- TODO: generate the following: | ||
| -- arith_op_id, bool_op_id, vpre_op_id, array_op_id, vpost_op_id | ||
| bracket_op = | ||
| ArrayOp(id op_id) -- * or @ | ||
| | ArrayIndex(arith_expr a) | ||
| suffix_op = | ||
| VarUnary(id op_id, word arg) | ||
| | VarReplace(word pat, word? replace) | ||
| | VarSlice(arith_expr start, arith_expr? len) | ||
| word_part = | ||
| ArrayLiteralPart(word* words) | ||
| | LiteralPart(token t) | ||
| | EscapedLiteralPart(token t) | ||
| | DoubleQuotedPart(word_part* parts) | ||
| | VarSubPart(string name, | ||
| id prefix_op, | ||
| bracket_op bracket_op | ||
| suffix_op suffix_op) | ||
| | TildeSubPart(string prefix) | ||
| | CommandSubPart(command c) | ||
| | ArithSubPart(arith_expr a) | ||
| -- NOTE: Could put | Token(token t) as an optimization. | ||
| word = Word(word_part* parts) | ||
| -- NOTE: Could put | Token(token t) as an optimization. | ||
| arith_expr = | ||
| ArithVar(string name) -- eval variable | ||
| | ArithWord(word w) -- word to be evaluated as a constant | ||
| | ArithUnary(id op_id, arith_expr a) | ||
| | ArithBinary(id op_id, arith_expr left, arith_expr right) | ||
| | TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr) | ||
| | FuncCall(arith_expr func, arith_expr* args) | ||
| bool_expr = | ||
| BoolBinary(word left, word right) | ||
| | BoolUnary(word child) | ||
| | LogicalNot(bool_expr b) | ||
| | LogicalAnd(bool_expr left, bool_expr right) | ||
| | LogicalOr(bool_expr left, bool_expr right) | ||
| -- NOTE: To reprint the here doc, I guess we need the whole delimiter? And | ||
| -- then do_expansion is calculated from that. | ||
| redir = | ||
| HereDoc(id op_id, word arg_word, int fd, int do_expansion) | ||
| | Redirect(id op_id, word arg_word, int fd) | ||
| lvalue = | ||
| LeftVar(string name) | ||
| | LeftIndex(string name, arith_expr index) | ||
| scope = Global | Local | ||
| var_flags = Export | Readonly | ||
| binding = (lvalue lhs, word rhs) | ||
| and_or = DAmp | DPipe | ||
| -- |& in osh; |- in oil. | ||
| pipe_op = Pipe | PipeAndStderr | ||
| case_arm = (word* pat, command* action) | ||
| command = | ||
| NoOp | ||
| | SimpleCommand(word* words, redir* redirects, binding* more_env) | ||
| | Assignment(scope scope, | ||
| var_flags flags, | ||
| word* names, -- names mentioned without a binding | ||
| binding* bindings) | ||
| | DParen(arith_expr a) | ||
| | DBracket(bool_expr b) | ||
| | Block(command* commands) | ||
| | Subshell(command* commands) | ||
| | Fork(command* commands) -- shell only allows one command | ||
| | Pipeline(command* commands, int negated, pipe_op* op) | ||
| | AndOr(command* commands, and_or ops) | ||
| -- NOTE: Can't have multiple var length? Maybe it's just a single command. | ||
| | ForEach(string var, word* words, command* body) | ||
| | ForExpr(arith_expr init, arith_expr test, arith_expr update, command* body) | ||
| -- NOTE: in oil, we will have expression variants?i | ||
| | While(command cond, command* body) | ||
| | Until(command cond, command* body) | ||
| | If(command test, command* body, command* orelse) | ||
| | Case(string var_name, case_arm* cases) | ||
| | FuncDef(string name, command* body) | ||
| } |
| @@ -0,0 +1,52 @@ | ||
| #include <string> | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include "osh.asdl.h" | ||
| // Returns the root ref, or -1 for invalid | ||
| int GetRootRef(uint8_t* image) { | ||
| if (image[0] != 'O') return -1; | ||
| if (image[1] != 'H') return -1; | ||
| if (image[2] != 'P') return -1; | ||
| if (image[3] != 4) return -1; | ||
| return image[4] + (image[5] << 8) + (image[6] << 16) + (image[7] << 24); | ||
| } | ||
| int main(int argc, char **argv) { | ||
| if (argc == 0) { | ||
| printf("Expected filename\n"); | ||
| return 1; | ||
| } | ||
| FILE *f = fopen(argv[1], "rb"); | ||
| if (!f) { | ||
| printf("Error opening %s", argv[1]); | ||
| return 1; | ||
| } | ||
| fseek(f, 0, SEEK_END); | ||
| size_t num_bytes = ftell(f); | ||
| fseek(f, 0, SEEK_SET); //same as rewind(f); | ||
| uint8_t* image = static_cast<uint8_t*>(malloc(num_bytes + 1)); | ||
| fread(image, num_bytes, 1, f); | ||
| fclose(f); | ||
| image[num_bytes] = 0; | ||
| printf("Read %zu bytes\n", num_bytes); | ||
| int root_ref = GetRootRef(image); | ||
| if (root_ref == -1) { | ||
| printf("Invalid image\n"); | ||
| return 1; | ||
| } | ||
| // Hm we could make the root ref be a BYTE offset? | ||
| int alignment = 4; | ||
| printf("alignment: %d root: %d\n", alignment, root_ref); | ||
| auto base = reinterpret_cast<uint32_t*>(image); | ||
| size_t offset = alignment * root_ref; | ||
| auto expr = reinterpret_cast<arith_expr_t*>(image + offset); | ||
| //PrintExpr(base, *expr, 0); | ||
| } |
| @@ -0,0 +1,249 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| py_meta.py | ||
| Parse an ASDL file, and generate Python classes using metaprogramming. | ||
| All objects descends from Obj, which allows them to be dynamically type-checked | ||
| and serialized. Objects hold type descriptors, which are defined in asdl.py. | ||
| Usage: | ||
| from osh import ast | ||
| n1 = ast.ArithVar() | ||
| n2 = ast.ArrayLiteralPart() | ||
| API Notes: | ||
| The Python AST module doesn't make any distinction between simple and compound | ||
| sum types. (Simple types have no constructors with fields.) | ||
| C++ has to make this distinction for reasons of representation. It's more | ||
| efficient to hold an enum value than a pointer to a class with an enum value. | ||
| In Python I guess that's not quite true. | ||
| So in order to serialize the correct bytes for C++, our Python metaclass | ||
| implementation has to differ from what's generated by asdl_c.py. More simply | ||
| put: an op is Add() and not Add, an instance of a class, not an integer value. | ||
| """ | ||
| import sys | ||
| import pprint | ||
| import asdl | ||
| def _CheckType(value, expected_desc): | ||
| """Is value of type expected_desc? | ||
| Args: | ||
| value: Obj or primitive type | ||
| expected_desc: instance of asdl.Product, asl.Sum, asdl.StrType, | ||
| asdl.IntType, ArrayType, MaybeType, etc. | ||
| """ | ||
| if isinstance(expected_desc, asdl.MaybeType): | ||
| if value is None: | ||
| return True | ||
| return _CheckType(value, expected_desc.desc) | ||
| if isinstance(expected_desc, asdl.ArrayType): | ||
| if not isinstance(value, list): | ||
| return False | ||
| # Now check all entries | ||
| for item in value: | ||
| if not _CheckType(item, expected_desc.desc): | ||
| return False | ||
| return True | ||
| if isinstance(expected_desc, asdl.StrType): | ||
| return isinstance(value, str) | ||
| if isinstance(expected_desc, asdl.IntType): | ||
| return isinstance(value, int) | ||
| try: | ||
| actual_desc = value.__class__.DESCRIPTOR | ||
| except AttributeError: | ||
| return False # it's not of the right type | ||
| if isinstance(expected_desc, asdl.Product): | ||
| return actual_desc is expected_desc | ||
| if isinstance(expected_desc, asdl.Sum): | ||
| if asdl.is_simple(expected_desc): | ||
| return actual_desc is expected_desc | ||
| else: | ||
| for cons in expected_desc.types: | ||
| #print("CHECKING desc %s against %s" % (desc, cons)) | ||
| # It has to be one of the alternatives | ||
| if actual_desc is cons: | ||
| return True | ||
| return False | ||
| class Obj: | ||
| # NOTE: We're using CAPS for these static fields, since they are constant at | ||
| # runtime after metaprogramming. | ||
| DESCRIPTOR = None # Used for type checking | ||
| class SimpleObj(Obj): | ||
| """An enum value. | ||
| Other simple objects: int, str, maybe later a float. | ||
| """ | ||
| def __init__(self, enum_id, name): | ||
| self.enum_id = enum_id | ||
| self.name = name | ||
| def __repr__(self): | ||
| return '<%s %s %s>' % (self.__class__.__name__, self.name, self.enum_id) | ||
| class CompoundObj(Obj): | ||
| """A compound object with fields, e.g. a Product or Constructor. | ||
| Uses some metaprogramming. | ||
| """ | ||
| FIELDS = [] # ordered list of field names | ||
| DESCRIPTOR_LOOKUP = {} # field name: (asdl.Type | int | str) | ||
| def __init__(self, *args, **kwargs): | ||
| # The user must specify ALL required fields or NONE. | ||
| self._assigned = {f: False for f in self.FIELDS} | ||
| if args or kwargs: | ||
| self._Init(args, kwargs) | ||
| else: | ||
| # Set defaults here? | ||
| pass | ||
| def _Init(self, args, kwargs): | ||
| for i, val in enumerate(args): | ||
| name = self.FIELDS[i] | ||
| self._assigned[name] = True | ||
| self.__setattr__(name, val) | ||
| for name, val in kwargs.items(): | ||
| if self._assigned[name]: | ||
| raise AssertionError('Duplicate assignment of field %r' % name) | ||
| self._assigned[name] = True | ||
| self.__setattr__(name, val) | ||
| for name in self.FIELDS: | ||
| if not self._assigned[name]: | ||
| #print("%r wasn't assigned" % name) | ||
| desc = self.DESCRIPTOR_LOOKUP[name] | ||
| if isinstance(desc, asdl.MaybeType): | ||
| # item_desc = desc.desc | ||
| self.__setattr__(name, None) # Maybe values can be None | ||
| else: | ||
| # If anything was set, then required fields raise an error. | ||
| raise ValueError("Field %r is required and wasn't initialized" % name) | ||
| def CheckUnassigned(self): | ||
| """See if there are unassigned fields, for later encoding.""" | ||
| unassigned = [] | ||
| for name in self.FIELDS: | ||
| if not self._assigned[name]: | ||
| desc = self.DESCRIPTOR_LOOKUP[name] | ||
| if not isinstance(desc, asdl.MaybeType): | ||
| unassigned.append(name) | ||
| if unassigned: | ||
| raise ValueError("Fields %r were't be assigned" % unassigned) | ||
| def __setattr__(self, name, value): | ||
| if name == '_assigned': | ||
| self.__dict__[name] = value | ||
| return | ||
| desc = self.DESCRIPTOR_LOOKUP[name] | ||
| if not _CheckType(value, desc): | ||
| raise AssertionError("Field %r should be of type %s, got %r" % | ||
| (name, desc, value)) | ||
| self._assigned[name] = True # check this later when encoding | ||
| self.__dict__[name] = value | ||
| def __repr__(self): | ||
| return '<%s %s>' % (self.__class__.__name__, pprint.pformat(self.__dict__)) | ||
| def _MakeFieldDescriptors(module, fields): | ||
| desc_lookup = {} | ||
| for f in fields: | ||
| # look up type by name | ||
| primitive_desc = asdl.DESCRIPTORS_BY_NAME.get(f.type) | ||
| desc = primitive_desc or module.types[f.type] | ||
| # It's either a primitive type or sum type | ||
| if primitive_desc is None: | ||
| assert (isinstance(desc, asdl.Sum) or | ||
| isinstance(desc, asdl.Product)), desc | ||
| # Wrap descriptor here. Then we can type check. | ||
| # And then encode too. | ||
| assert not (f.opt and f.seq), f | ||
| if f.opt: | ||
| desc = asdl.MaybeType(desc) | ||
| if f.seq: | ||
| desc = asdl.ArrayType(desc) | ||
| desc_lookup[f.name] = desc | ||
| class_attr = { | ||
| 'FIELDS': [f.name for f in fields], | ||
| 'DESCRIPTOR_LOOKUP': desc_lookup, | ||
| } | ||
| return class_attr | ||
| def MakeTypes(module, root): | ||
| """ | ||
| Args: | ||
| module: asdl.Module | ||
| root: an object/package to add types to | ||
| """ | ||
| for defn in module.dfns: | ||
| typ = defn.value | ||
| #print('TYPE', defn.name, typ) | ||
| if isinstance(typ, asdl.Sum): | ||
| if asdl.is_simple(typ): | ||
| # An object without fields, which can be stored inline. | ||
| class_attr = {'DESCRIPTOR': typ} # asdl.Sum | ||
| cls = type(defn.name, (SimpleObj, ), class_attr) | ||
| #print('CLASS', cls) | ||
| setattr(root, defn.name, cls) | ||
| for i, cons in enumerate(typ.types): | ||
| enum_id = i + 1 | ||
| name = cons.name | ||
| val = cls(enum_id, cons.name) # Instantiate SimpleObj subtype | ||
| # Set a static attribute like op_id.Plus, op_id.Minus. | ||
| setattr(cls, name, val) | ||
| else: | ||
| # e.g. for arith_expr | ||
| base_class = type(defn.name, (CompoundObj, ), {}) | ||
| setattr(root, defn.name, base_class) | ||
| # Make a type for each alternative. | ||
| for i, cons in enumerate(typ.types): | ||
| class_attr = _MakeFieldDescriptors(module, cons.fields) | ||
| class_attr['DESCRIPTOR'] = cons | ||
| # TODO: Allow setting these integers. We're reusing ID 0 for every | ||
| # sum type, but that's OK because fields are strongly typed. | ||
| class_attr['tag'] = i + 1 # zero reserved? | ||
| cls = type(cons.name, (base_class, ), class_attr) | ||
| setattr(root, cons.name, cls) | ||
| elif isinstance(typ, asdl.Product): | ||
| class_attr = _MakeFieldDescriptors(module, typ.fields) | ||
| class_attr['DESCRIPTOR'] = typ | ||
| cls = type(defn.name, (CompoundObj, ), class_attr) | ||
| setattr(root, defn.name, cls) | ||
| else: | ||
| raise AssertionError(typ) |
| @@ -0,0 +1,16 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| py_meta_test.py: Tests for py_meta.py | ||
| """ | ||
| import re | ||
| import unittest | ||
| import py_meta # module under test | ||
| class AsdlTest(unittest.TestCase): | ||
| pass | ||
| if __name__ == '__main__': | ||
| unittest.main() |
| @@ -0,0 +1,219 @@ | ||
| #!/bin/bash | ||
| # | ||
| # Automation for ASDL. | ||
| # | ||
| # Usage: | ||
| # ./run.sh <function name> | ||
| set -o nounset | ||
| set -o pipefail | ||
| set -o errexit | ||
| # Run unit tests. | ||
| unit() { | ||
| asdl/arith_ast_test.py | ||
| asdl/py_meta_test.py | ||
| asdl/encode_test.py | ||
| return | ||
| for t in asdl/*_test.py; do | ||
| echo ----- | ||
| echo $t | ||
| echo ----- | ||
| $t | ||
| done | ||
| } | ||
| asdl-arith-encode() { | ||
| local expr="$1" | ||
| local out=${2:-_tmp/arith.bin} | ||
| asdl/asdl_demo.py arith-encode "$expr" $out | ||
| ls -l $out | ||
| hexdump $out | ||
| } | ||
| asdl-py() { | ||
| local schema=$1 | ||
| asdl/asdl_demo.py py $schema | ||
| } | ||
| asdl-cpp() { | ||
| local schema=${1:-asdl/arith.asdl} | ||
| local src=${2:-_tmp/arith.asdl.h} | ||
| asdl/gen_cpp.py cpp $schema > $src | ||
| ls -l $src | ||
| wc -l $src | ||
| } | ||
| py-cpp() { | ||
| local schema=${1:-asdl/arith.asdl} | ||
| asdl-py $schema | ||
| asdl-cpp $schema _tmp/$(basename $schema).h | ||
| } | ||
| # | ||
| # Test specific schemas | ||
| # | ||
| arith-both() { | ||
| py-cpp asdl/arith.asdl | ||
| } | ||
| osh-both() { | ||
| py-cpp asdl/osh.asdl | ||
| } | ||
| # | ||
| # Native Code | ||
| # | ||
| readonly CLANG=~/install/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04/bin/clang++ | ||
| cxx() { | ||
| #local CXX=c++ | ||
| local CXX=$CLANG | ||
| local opt_flag='-O2' | ||
| local opt_flag='-O0' | ||
| # -Winline | ||
| # http://stackoverflow.com/questions/10631283/how-will-i-know-whether-inline-function-is-actually-replaced-at-the-place-where | ||
| $CXX -Winline $opt_flag -std=c++11 "$@" | ||
| } | ||
| # http://www.commandlinefu.com/commands/view/6004/print-stack-trace-of-a-core-file-without-needing-to-enter-gdb-interactively | ||
| # http://stackoverflow.com/questions/4521015/how-to-pass-arguments-and-redirect-stdin-from-a-file-to-program-run-in-gdb | ||
| gdb-trace() { | ||
| # -args goes before the executable | ||
| gdb -batch -ex "run" -ex "bt" -args "$@" 2>&1 | ||
| } | ||
| build-demo() { | ||
| local name=$1 | ||
| local schema=asdl/${name}.asdl | ||
| # Generate C++ code | ||
| asdl-cpp $schema _tmp/${name}.asdl.h | ||
| local bin=_tmp/${name}_demo | ||
| cxx -I _tmp -o $bin asdl/${name}_demo.cc | ||
| chmod +x $bin | ||
| } | ||
| arith-demo() { | ||
| local name=arith | ||
| local data=_tmp/${name}.bin | ||
| # Write a binary | ||
| asdl-arith-encode '7 + 9' $data | ||
| local bin=_tmp/${name}_demo | ||
| build-demo $name $bin | ||
| set -x | ||
| gdb-trace $bin $data | ||
| #$bin $data | ||
| } | ||
| osh-demo() { | ||
| build-demo osh | ||
| } | ||
| a2() { | ||
| local data=_tmp/a2.bin | ||
| asdl-arith-encode 'foo + 99 - f(1,2,3+4) * 123' $data | ||
| _tmp/arith_demo $data | ||
| } | ||
| a3() { | ||
| local data=_tmp/a3.bin | ||
| asdl-arith-encode 'g(x,2)' $data | ||
| gdb-trace _tmp/arith_demo $data | ||
| } | ||
| a4() { | ||
| local data=_tmp/a4.bin | ||
| asdl-arith-encode 'array[99]' $data | ||
| gdb-trace _tmp/arith_demo $data | ||
| asdl-arith-encode 'array[5:10] * 5' $data | ||
| gdb-trace _tmp/arith_demo $data | ||
| } | ||
| # http://stackoverflow.com/questions/22769246/disassemble-one-function-using-objdump | ||
| # It would be nice to disassemble a single function. | ||
| disassemble() { | ||
| local opt_flag=${1:-'-O0'} | ||
| local out=_tmp/arith_demo$opt_flag.S | ||
| $CLANG -std='c++11' $opt_flag -I _tmp -o $out -S \ | ||
| -mllvm --x86-asm-syntax=intel asdl/arith_demo.cc | ||
| #cat $out | ||
| } | ||
| llvm() { | ||
| local opt_flag=${1:-'-O0'} | ||
| local out=_tmp/arith_demo$opt_flag.ll | ||
| $CLANG -std='c++11' $opt_flag -I _tmp -o $out -S \ | ||
| -emit-llvm asdl/arith_demo.cc | ||
| #cat $out | ||
| } | ||
| # With -O0, you can see all the functions. With -O2, they ARE inlined. | ||
| objdump-arith() { | ||
| # NOTE: This doesn't take into account different optimization levels | ||
| objdump -d _tmp/arith_demo | grep '^0' | ||
| } | ||
| # https://sourceware.org/ml/binutils/2010-04/msg00447.html | ||
| # http://stackoverflow.com/questions/4274804/query-on-ffunction-section-fdata-sections-options-of-gcc | ||
| # Hm you can force a function. Write it inline with arith_demo.cc then. | ||
| # TODO: Is there a pattern we can grep for to test if ANY accessor was NOT | ||
| # inlined? Demangle names I guess. | ||
| nm-arith() { | ||
| nm _tmp/arith_demo | ||
| } | ||
| opt-stats() { | ||
| wc -l _tmp/*.S | ||
| echo | ||
| wc -l _tmp/*.ll | ||
| echo | ||
| md5sum _tmp/*.S | ||
| echo | ||
| md5sum _tmp/*.ll | ||
| } | ||
| compare-opts() { | ||
| # http://stackoverflow.com/questions/15548023/clang-optimization-levels | ||
| # Says -Os is identical to -O2? But not according to my test! | ||
| for opt in -Os -O0 -O1 -O2 -O3 -O4; do | ||
| echo $opt | ||
| disassemble $opt | ||
| llvm $opt | ||
| done | ||
| opt-stats | ||
| } | ||
| count() { | ||
| wc -l asdl/{asdl,py_meta,gen_cpp,encode}.py | ||
| echo | ||
| wc -l asdl/{py_meta,encode}_test.py | ||
| echo | ||
| wc -l asdl/arith_parse*.py asdl/tdop.py asdl/arith_ast.py asdl/asdl_demo.py | ||
| echo | ||
| wc -l asdl/*.cc | ||
| echo | ||
| wc -l asdl/*.asdl | ||
| echo | ||
| } | ||
| "$@" |
| @@ -0,0 +1,230 @@ | ||
| #!/usr/bin/python3 | ||
| """ | ||
| tdop.py | ||
| """ | ||
| import re | ||
| class ParseError(Exception): | ||
| pass | ||
| # | ||
| # Default parsing functions give errors | ||
| # | ||
| def NullError(p, token, bp): | ||
| raise ParseError("%s can't be used in prefix position" % token) | ||
| def LeftError(p, token, left, rbp): | ||
| # Hm is this not called because of binding power? | ||
| raise ParseError("%s can't be used in infix position" % token) | ||
| # | ||
| # Input | ||
| # | ||
| class Token: | ||
| def __init__(self, type, val, loc=None): | ||
| self.type = type | ||
| self.val = val | ||
| def __repr__(self): | ||
| return '<Token %s %s>' % (self.type, self.val) | ||
| # | ||
| # Using the pattern here: http://effbot.org/zone/xml-scanner.htm | ||
| # | ||
| # NOTE: () and [] need to be on their own so (-1+2) works | ||
| TOKEN_RE = re.compile(""" | ||
| \s* (?: (\d+) | (\w+) | ( [\-\+\*/%!~<>=&^|?:,]+ ) | ([\(\)\[\]]) ) | ||
| """, re.VERBOSE) | ||
| def Tokenize(s): | ||
| for item in TOKEN_RE.findall(s): | ||
| if item[0]: | ||
| typ = 'number' | ||
| val = int(item[0]) | ||
| elif item[1]: | ||
| typ = 'name' | ||
| val = item[1] | ||
| elif item[2]: | ||
| typ = item[2] | ||
| val = item[2] | ||
| elif item[3]: | ||
| typ = item[3] | ||
| val = item[3] | ||
| yield Token(typ, val, loc=(0, 0)) | ||
| # | ||
| # Simple and Composite AST nodes | ||
| # | ||
| class Node(object): | ||
| def __init__(self, token): | ||
| """ | ||
| Args: | ||
| type: token type (operator, etc.) | ||
| val: token val, only important for number and string | ||
| """ | ||
| self.token = token | ||
| def __repr__(self): | ||
| return str(self.token.val) | ||
| class CompositeNode(Node): | ||
| def __init__(self, token, children): | ||
| """ | ||
| Args: | ||
| type: token type (operator, etc.) | ||
| """ | ||
| Node.__init__(self, token) | ||
| self.children = children | ||
| def __repr__(self): | ||
| args = ''.join([" " + repr(c) for c in self.children]) | ||
| return "(" + self.token.type + args + ")" | ||
| # | ||
| # Parser definition | ||
| # | ||
| class LeftInfo(object): | ||
| """Row for operator. | ||
| In C++ this should be a big array. | ||
| """ | ||
| def __init__(self, led=None, lbp=0, rbp=0): | ||
| self.led = led or LeftError | ||
| self.lbp = lbp | ||
| self.rbp = rbp | ||
| class NullInfo(object): | ||
| """Row for operator. | ||
| In C++ this should be a big array. | ||
| """ | ||
| def __init__(self, nud=None, bp=0): | ||
| self.nud = nud or NullError | ||
| self.bp = bp | ||
| class ParserSpec(object): | ||
| """Specification for a TDOP parser.""" | ||
| def __init__(self): | ||
| self.null_lookup = {} | ||
| self.left_lookup = {} | ||
| def Null(self, bp, nud, tokens): | ||
| """Register a token that doesn't take anything on the left. | ||
| Examples: constant, prefix operator, error. | ||
| """ | ||
| for token in tokens: | ||
| self.null_lookup[token] = NullInfo(nud=nud, bp=bp) | ||
| if token not in self.left_lookup: | ||
| self.left_lookup[token] = LeftInfo() # error | ||
| def _RegisterLed(self, lbp, rbp, led, tokens): | ||
| for token in tokens: | ||
| if token not in self.null_lookup: | ||
| self.null_lookup[token] = NullInfo(NullError) | ||
| self.left_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led) | ||
| def Left(self, bp, led, tokens): | ||
| """Register a token that takes an expression on the left.""" | ||
| self._RegisterLed(bp, bp, led, tokens) | ||
| def LeftRightAssoc(self, bp, led, tokens): | ||
| """Register a right associative operator.""" | ||
| self._RegisterLed(bp, bp-1, led, tokens) | ||
| def LookupNull(self, token): | ||
| """Get the parsing function and precedence for a null position token.""" | ||
| try: | ||
| nud = self.null_lookup[token] | ||
| except KeyError: | ||
| raise ParseError('Unexpected token %r' % token) | ||
| return nud | ||
| def LookupLeft(self, token): | ||
| """Get the parsing function and precedence for a left position token.""" | ||
| try: | ||
| led = self.left_lookup[token] | ||
| except KeyError: | ||
| raise ParseError('Unexpected token %r' % token) | ||
| return led | ||
| EOF_TOKEN = Token('eof', 'eof') | ||
| class Parser(object): | ||
| """Recursive TDOP parser.""" | ||
| def __init__(self, spec, lexer): | ||
| self.spec = spec | ||
| self.lexer = lexer # iterable | ||
| self.token = None # current token | ||
| def AtToken(self, token_type): | ||
| """Test if we are looking at a token.""" | ||
| return self.token.type == token_type | ||
| def Next(self): | ||
| """Move to the next token.""" | ||
| try: | ||
| t = self.lexer.__next__() | ||
| except StopIteration: | ||
| t = EOF_TOKEN | ||
| self.token = t | ||
| def Eat(self, val): | ||
| """Assert the value of the current token, then move to the next token.""" | ||
| if val and not self.AtToken(val): | ||
| raise ParseError('expected %s, got %s' % (val, self.token)) | ||
| self.Next() | ||
| def ParseUntil(self, rbp): | ||
| """ | ||
| Parse to the right, eating tokens until we encounter a token with binding | ||
| power LESS THAN OR EQUAL TO rbp. | ||
| """ | ||
| if self.AtToken('eof'): | ||
| raise ParseError('Unexpected end of input') | ||
| t = self.token | ||
| self.Next() # skip over the token, e.g. ! ~ + - | ||
| null_info = self.spec.LookupNull(t.type) | ||
| node = null_info.nud(self, t, null_info.bp) | ||
| while True: | ||
| t = self.token | ||
| left_info = self.spec.LookupLeft(t.type) | ||
| # Examples: | ||
| # If we see 1*2+ , rbp = 27 and lbp = 25, so stop. | ||
| # If we see 1+2+ , rbp = 25 and lbp = 25, so stop. | ||
| # If we see 1**2**, rbp = 26 and lbp = 27, so keep going. | ||
| if rbp >= left_info.lbp: | ||
| break | ||
| self.Next() # skip over the token, e.g. / * | ||
| node = left_info.led(self, t, node, left_info.rbp) | ||
| return node | ||
| def Parse(self): | ||
| self.Next() | ||
| return self.ParseUntil(0) |