Skip to content

Commit

Permalink
[demo] Got Shed Skin to generate C++ code, but it doesn't compile.
Browse files Browse the repository at this point in the history
We probably need to put everything in the same Python module.

One problem is that we use class attributes to simulate namespaces in
ASDL, which I think confuses the Shed Skin compiler.
  • Loading branch information
Andy Chu committed Jan 3, 2019
1 parent e20b69f commit 8786934
Show file tree
Hide file tree
Showing 6 changed files with 1,012 additions and 0 deletions.
71 changes: 71 additions & 0 deletions demo/shedskin.sh
@@ -0,0 +1,71 @@
#!/bin/bash
#
# Usage:
# ./shedskin.sh <function name>

set -o nounset
set -o pipefail
set -o errexit

# Problems
# - loading pickle for metadata. It has to dynamically look up classes.
# - it won't compile the pickle module due to its use of marshal!
# - TODO: we don't need metadata at all?

# Fixed
# - import posix removed in runtime.py
# - _CheckType uses AttributeError: Shed Skin doesn't like it

# Unfortunately the ShedSkin compiler crashes after 17 seconds with this error!
#
# ts = typestrnew(gx, types, cplusplus, node, check_extmod, depth, check_ret, var, tuple_check, mv=mv)
# File "/usr/lib/python2.7/dist-packages/shedskin/typestr.py", line 193, in typestrnew
# elif not node or infer.inode(gx, node).mv.module.builtin:
# AttributeError: 'NoneType' object has no attribute 'module'
#
# real 0m17.210s
# user 0m17.083s
# sys 0m0.084s


# 0.9.4 was released in 2015. Supposedly fixed in git!
#
# https://github.com/shedskin/shedskin/issues/203

install-latest() {
# NOTE: I manually transcribed what I did. Could use virtualenv?
pushd ~/git/languages/shedskin
python setup.py build
sudo python setup.py install
}

make-tree() {
local out=_tmp/shedskin
mkdir -p $out
#cp -v asdl/{arith_parse.py,tdop.py} _devbuild/gen/demo_asdl.py $out

# dependencies of generated code
# unpickle probably won't work
cp -v asdl/{const.py,runtime.py} $out
}

run-python() {
pushd demo/shedskin
./arith_parse.py '1+2'
}

# With latest, this generates C++ code, but it doesn't compile.
#
# TODO: Try something based on tdop.py that is a single module? There are too
# many modules here.

compile() {
pushd demo/shedskin
time shedskin arith_parse
}

count-output() {
wc -l demo/shedskin/*.{cpp,hpp} Makefile
}

"$@"
249 changes: 249 additions & 0 deletions demo/shedskin/arith_parse.py
@@ -0,0 +1,249 @@
#!/usr/bin/env python
"""
arith_parse.py: Parse shell-like and C-like arithmetic.
"""
from __future__ import print_function

import sys

import tdop
from tdop import CompositeNode

import demo_asdl

arith_expr = demo_asdl.arith_expr
op_id_e = demo_asdl.op_id_e


#
# Null Denotation -- token that takes nothing on the left
#

def NullConstant(p, token, bp):
if token.type == 'number':
return arith_expr.Const(token.val)
# We have to wrap a string in some kind of variant.
if token.type == 'name':
return arith_expr.ArithVar(token.val)

raise AssertionError(token.type)


def NullParen(p, token, bp):
""" Arithmetic grouping """
r = p.ParseUntil(bp)
p.Eat(')')
return r


def NullPrefixOp(p, token, bp):
"""Prefix operator.
Low precedence: return, raise, etc.
return x+y is return (x+y), not (return x) + y
High precedence: logical negation, bitwise complement, etc.
!x && y is (!x) && y, not !(x && y)
"""
r = p.ParseUntil(bp)
return CompositeNode(token, [r])


def NullIncDec(p, token, bp):
""" ++x or ++x[1] """
right = p.ParseUntil(bp)
if right.token.type not in ('name', 'get'):
raise tdop.ParseError("Can't assign to %r (%s)" % (right, right.token))
return CompositeNode(token, [right])


#
# Left Denotation -- token that takes an expression on the left
#

def LeftIncDec(p, token, left, rbp):
""" For i++ and i--
"""
if left.token.type not in ('name', 'get'):
raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
token.type = 'post' + token.type
return CompositeNode(token, [left])


def LeftIndex(p, token, left, unused_bp):
""" index f[x+1] """
# f[x] or f[x][y]
if not isinstance(left, demo_asdl.ArithVar):
raise tdop.ParseError("%s can't be indexed" % left)
index = p.ParseUntil(0)
if p.AtToken(':'):
p.Next()
end = p.ParseUntil(0)
else:
end = None

p.Eat(']')

# TODO: If you see ], then
# 1:4
# 1:4:2
# Both end and step are optional

if end:
return demo_asdl.Slice(left, index, end, None)
else:
return demo_asdl.Index(left, index)


def LeftTernary(p, token, left, bp):
""" e.g. a > 1 ? x : y """
true_expr = p.ParseUntil(bp)
p.Eat(':')
false_expr = p.ParseUntil(bp)
children = [left, true_expr, false_expr]
return CompositeNode(token, children)


def LeftBinaryOp(p, token, left, rbp):
""" Normal binary operator like 1+2 or 2*3, etc. """
if token.val == '+':
op_id_ = op_id_e.Plus
elif token.val == '-':
op_id_ = op_id_e.Minus
elif token.val == '*':
op_id_ = op_id_e.Star
else:
raise AssertionError(token.val)
return arith_expr.ArithBinary(op_id_, left, p.ParseUntil(rbp))


def LeftAssign(p, token, left, rbp):
""" Normal binary operator like 1+2 or 2*3, etc. """
# x += 1, or a[i] += 1
if left.token.type not in ('name', 'get'):
raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
return CompositeNode(token, [left, p.ParseUntil(rbp)])


def LeftComma(p, token, left, rbp):
""" foo, bar, baz
Could be sequencing operator, or tuple without parens
"""
r = p.ParseUntil(rbp)
if left.token.type == ',': # Keep adding more children
left.children.append(r)
return left
children = [left, r]
return CompositeNode(token, children)


# For overloading of , inside function calls
COMMA_PREC = 1

def LeftFuncCall(p, token, left, unused_bp):
""" Function call f(a, b). """
args = []
# f(x) or f[i](x)
if not isinstance(left, demo_asdl.ArithVar):
raise tdop.ParseError("%s can't be called" % left)
func_name = left.name # get a string

while not p.AtToken(')'):
# We don't want to grab the comma, e.g. it is NOT a sequence operator. So
# set the precedence to 5.
args.append(p.ParseUntil(COMMA_PREC))
if p.AtToken(','):
p.Next()
p.Eat(")")
return demo_asdl.FuncCall(func_name, args)


def MakeShellParserSpec():
"""
Create a parser.
Compare the code below with this table of C operator precedence:
http://en.cppreference.com/w/c/language/operator_precedence
"""
spec = tdop.ParserSpec()

spec.Left(31, LeftIncDec, ['++', '--'])
spec.Left(31, LeftFuncCall, ['('])
spec.Left(31, LeftIndex, ['['])

# 29 -- binds to everything except function call, indexing, postfix ops
spec.Null(29, NullIncDec, ['++', '--'])
spec.Null(29, NullPrefixOp, ['+', '!', '~', '-'])

# Right associative: 2 ** 3 ** 2 == 2 ** (3 ** 2)
spec.LeftRightAssoc(27, LeftBinaryOp, ['**'])
spec.Left(25, LeftBinaryOp, ['*', '/', '%'])

spec.Left(23, LeftBinaryOp, ['+', '-'])
spec.Left(21, LeftBinaryOp, ['<<', '>>'])
spec.Left(19, LeftBinaryOp, ['<', '>', '<=', '>='])
spec.Left(17, LeftBinaryOp, ['!=', '=='])

spec.Left(15, LeftBinaryOp, ['&'])
spec.Left(13, LeftBinaryOp, ['^'])
spec.Left(11, LeftBinaryOp, ['|'])
spec.Left(9, LeftBinaryOp, ['&&'])
spec.Left(7, LeftBinaryOp, ['||'])

spec.LeftRightAssoc(5, LeftTernary, ['?'])

# Right associative: a = b = 2 is a = (b = 2)
spec.LeftRightAssoc(3, LeftAssign, [
'=',
'+=', '-=', '*=', '/=', '%=',
'<<=', '>>=', '&=', '^=', '|='])

spec.Left(COMMA_PREC, LeftComma, [','])

# 0 precedence -- doesn't bind until )
spec.Null(0, NullParen, ['(']) # for grouping

# -1 precedence -- never used
spec.Null(-1, NullConstant, ['name', 'number'])
spec.Null(-1, tdop.NullError, [')', ']', ':', 'eof'])

return spec


def MakeParser(s):
"""Used by tests."""
spec = MakeShellParserSpec()
lexer = tdop.Tokenize(s)
p = tdop.Parser(spec, lexer)
return p


def ParseShell(s, expected=None):
"""Used by tests."""
p = MakeParser(s)
tree = p.Parse()

sexpr = repr(tree)
if expected is not None:
assert sexpr == expected, '%r != %r' % (sexpr, expected)

#print('%-40s %s' % (s, sexpr))
return tree


def main(argv):
try:
s = argv[1]
except IndexError:
print('Usage: ./arith_parse.py EXPRESSION')
else:
try:
tree = ParseShell(s)
except tdop.ParseError as e:
print('Error parsing %r: %s' % (s, e), file=sys.stderr)
print(tree)


if __name__ == '__main__':
main(sys.argv)
36 changes: 36 additions & 0 deletions demo/shedskin/const.py
@@ -0,0 +1,36 @@
#!/usr/bin/env python
"""
const.py
"""

DEFAULT_INT_WIDTH = 3 # 24 bits

# 2^24 - 1 is used as an invalid/uninitialized value for ASDL integers.

# Why? We have a few use cases for invalid/sentinel values:
# - span_id, line_id. Sometimes we don't have a span ID.
# - file descriptor: 'read x < f.txt' vs 'read x 0< f.txt'
#
# Other options for representation:
#
# 1. ADSL could use signed integers, then -1 is valid.
# 2. Use a type like fd = None | Some(int fd)
#
# I don't like #1 because ASDL is lazily-decoded, and then we have to do sign
# extension on demand. (24 bits to 32 or 64). As far as I can tell, sign
# extension requires a branch, at least in portable C (on the sign bit).
#
# Thes second option is semantically cleaner. But it needlessly
# inflates the size of both the source code and the data. Instead of having a
# single "inline" integer, we would need a reference to another value.
#
# We could also try to do some fancy thing like fd = None |
# Range<1..max_fd>(fd), with smart encoding. But that is overkill for these
# use cases.
#
# Using InvalidInt instead of -1 seems like a good compromise.

NO_INTEGER = (1 << (DEFAULT_INT_WIDTH * 8)) - 1

# NOTE: In Python: 1 << (n * 8) - 1 is wrong! I thought that bit shift would
# have higher precedence.

0 comments on commit 8786934

Please sign in to comment.