[demo] Got Shed Skin to generate C++ code, but it doesn't compile.

We probably need to put everything in the same Python module. One problem is that we use class attributes to simulate namespaces in ASDL, which I think confuses the Shed Skin compiler.
oilshell · Jan 3, 2019 · 8786934 · 8786934
1 parent e20b69f
commit 8786934
Show file tree

Hide file tree

Showing 6 changed files with 1,012 additions and 0 deletions.
diff --git a/demo/shedskin.sh b/demo/shedskin.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+#
+# Usage:
+#   ./shedskin.sh <function name>
+
+set -o nounset
+set -o pipefail
+set -o errexit
+
+# Problems
+# - loading pickle for metadata.  It has to dynamically look up classes.
+# - it won't compile the pickle module due to its use of marshal!
+# - TODO: we don't need metadata at all?
+
+# Fixed
+# - import posix removed in runtime.py
+# - _CheckType uses AttributeError: Shed Skin doesn't like it
+
+# Unfortunately the ShedSkin compiler crashes after 17 seconds with this error!
+#
+#     ts = typestrnew(gx, types, cplusplus, node, check_extmod, depth, check_ret, var, tuple_check, mv=mv)
+#   File "/usr/lib/python2.7/dist-packages/shedskin/typestr.py", line 193, in typestrnew
+#     elif not node or infer.inode(gx, node).mv.module.builtin:
+# AttributeError: 'NoneType' object has no attribute 'module'
+# 
+# real    0m17.210s
+# user    0m17.083s
+# sys     0m0.084s
+
+
+# 0.9.4 was released in 2015.  Supposedly fixed in git!
+#
+# https://github.com/shedskin/shedskin/issues/203
+
+install-latest() {
+  # NOTE: I manually transcribed what I did.  Could use virtualenv?
+  pushd ~/git/languages/shedskin
+  python setup.py build
+  sudo python setup.py install
+}
+
+make-tree() {
+  local out=_tmp/shedskin
+  mkdir -p $out
+  #cp -v asdl/{arith_parse.py,tdop.py} _devbuild/gen/demo_asdl.py $out
+
+  # dependencies of generated code
+  # unpickle probably won't work
+  cp -v asdl/{const.py,runtime.py} $out
+}
+
+run-python() {
+  pushd demo/shedskin
+  ./arith_parse.py '1+2'
+}
+
+# With latest, this generates C++ code, but it doesn't compile.
+#
+# TODO: Try something based on tdop.py that is a single module?  There are too
+# many modules here.
+
+compile() {
+  pushd demo/shedskin
+  time shedskin arith_parse
+}
+
+count-output() {
+  wc -l demo/shedskin/*.{cpp,hpp} Makefile
+}
+
+"$@"
diff --git a/demo/shedskin/arith_parse.py b/demo/shedskin/arith_parse.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python
+"""
+arith_parse.py: Parse shell-like and C-like arithmetic.
+"""
+from __future__ import print_function
+
+import sys
+
+import tdop
+from tdop import CompositeNode
+
+import demo_asdl
+
+arith_expr = demo_asdl.arith_expr
+op_id_e = demo_asdl.op_id_e
+
+
+#
+# Null Denotation -- token that takes nothing on the left
+#
+
+def NullConstant(p, token, bp):
+  if token.type == 'number':
+    return arith_expr.Const(token.val)
+  # We have to wrap a string in some kind of variant.
+  if token.type == 'name':
+    return arith_expr.ArithVar(token.val)
+
+  raise AssertionError(token.type)
+
+
+def NullParen(p, token, bp):
+  """ Arithmetic grouping """
+  r = p.ParseUntil(bp)
+  p.Eat(')')
+  return r
+
+
+def NullPrefixOp(p, token, bp):
+  """Prefix operator.
+
+  Low precedence:  return, raise, etc.
+    return x+y is return (x+y), not (return x) + y
+
+  High precedence: logical negation, bitwise complement, etc.
+    !x && y is (!x) && y, not !(x && y)
+  """
+  r = p.ParseUntil(bp)
+  return CompositeNode(token, [r])
+
+
+def NullIncDec(p, token, bp):
+  """ ++x or ++x[1] """
+  right = p.ParseUntil(bp)
+  if right.token.type not in ('name', 'get'):
+    raise tdop.ParseError("Can't assign to %r (%s)" % (right, right.token))
+  return CompositeNode(token, [right])
+
+
+#
+# Left Denotation -- token that takes an expression on the left
+#
+
+def LeftIncDec(p, token, left, rbp):
+  """ For i++ and i--
+  """
+  if left.token.type not in ('name', 'get'):
+    raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
+  token.type = 'post' + token.type
+  return CompositeNode(token, [left])
+
+
+def LeftIndex(p, token, left, unused_bp):
+  """ index f[x+1] """
+  # f[x] or f[x][y]
+  if not isinstance(left, demo_asdl.ArithVar):
+    raise tdop.ParseError("%s can't be indexed" % left)
+  index = p.ParseUntil(0)
+  if p.AtToken(':'):
+    p.Next()
+    end = p.ParseUntil(0)
+  else:
+    end = None
+
+  p.Eat(']')
+
+  # TODO: If you see ], then
+  # 1:4
+  # 1:4:2
+  # Both end and step are optional
+
+  if end:
+    return demo_asdl.Slice(left, index, end, None)
+  else:
+    return demo_asdl.Index(left, index)
+
+
+def LeftTernary(p, token, left, bp):
+  """ e.g. a > 1 ? x : y """
+  true_expr = p.ParseUntil(bp)
+  p.Eat(':')
+  false_expr = p.ParseUntil(bp)
+  children = [left, true_expr, false_expr]
+  return CompositeNode(token, children)
+
+
+def LeftBinaryOp(p, token, left, rbp):
+  """ Normal binary operator like 1+2 or 2*3, etc. """
+  if token.val == '+':
+    op_id_ = op_id_e.Plus
+  elif token.val == '-':
+    op_id_ = op_id_e.Minus
+  elif token.val == '*':
+    op_id_ = op_id_e.Star
+  else:
+    raise AssertionError(token.val)
+  return arith_expr.ArithBinary(op_id_, left, p.ParseUntil(rbp))
+
+
+def LeftAssign(p, token, left, rbp):
+  """ Normal binary operator like 1+2 or 2*3, etc. """
+  # x += 1, or a[i] += 1
+  if left.token.type not in ('name', 'get'):
+    raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
+  return CompositeNode(token, [left, p.ParseUntil(rbp)])
+
+
+def LeftComma(p, token, left, rbp):
+  """ foo, bar, baz
+
+  Could be sequencing operator, or tuple without parens
+  """
+  r = p.ParseUntil(rbp)
+  if left.token.type == ',':  # Keep adding more children
+    left.children.append(r)
+    return left
+  children = [left, r]
+  return CompositeNode(token, children)
+
+
+# For overloading of , inside function calls
+COMMA_PREC = 1
+
+def LeftFuncCall(p, token, left, unused_bp):
+  """ Function call f(a, b). """
+  args = []
+  # f(x) or f[i](x)
+  if not isinstance(left, demo_asdl.ArithVar):
+    raise tdop.ParseError("%s can't be called" % left)
+  func_name = left.name  # get a string
+
+  while not p.AtToken(')'):
+    # We don't want to grab the comma, e.g. it is NOT a sequence operator.  So
+    # set the precedence to 5.
+    args.append(p.ParseUntil(COMMA_PREC))
+    if p.AtToken(','):
+      p.Next()
+  p.Eat(")")
+  return demo_asdl.FuncCall(func_name, args)
+
+
+def MakeShellParserSpec():
+  """
+  Create a parser.
+
+  Compare the code below with this table of C operator precedence:
+  http://en.cppreference.com/w/c/language/operator_precedence
+  """
+  spec = tdop.ParserSpec()
+
+  spec.Left(31, LeftIncDec, ['++', '--'])
+  spec.Left(31, LeftFuncCall, ['('])
+  spec.Left(31, LeftIndex, ['['])
+
+  # 29 -- binds to everything except function call, indexing, postfix ops
+  spec.Null(29, NullIncDec, ['++', '--'])
+  spec.Null(29, NullPrefixOp, ['+', '!', '~', '-'])
+
+  # Right associative: 2 ** 3 ** 2 == 2 ** (3 ** 2)
+  spec.LeftRightAssoc(27, LeftBinaryOp, ['**'])
+  spec.Left(25, LeftBinaryOp, ['*', '/', '%'])
+
+  spec.Left(23, LeftBinaryOp, ['+', '-'])
+  spec.Left(21, LeftBinaryOp, ['<<', '>>'])
+  spec.Left(19, LeftBinaryOp, ['<', '>', '<=', '>='])
+  spec.Left(17, LeftBinaryOp, ['!=', '=='])
+
+  spec.Left(15, LeftBinaryOp, ['&'])
+  spec.Left(13, LeftBinaryOp, ['^'])
+  spec.Left(11, LeftBinaryOp, ['|'])
+  spec.Left(9, LeftBinaryOp, ['&&'])
+  spec.Left(7, LeftBinaryOp, ['||'])
+
+  spec.LeftRightAssoc(5, LeftTernary, ['?'])
+
+  # Right associative: a = b = 2 is a = (b = 2)
+  spec.LeftRightAssoc(3, LeftAssign, [
+      '=',
+      '+=', '-=', '*=', '/=', '%=',
+      '<<=', '>>=', '&=', '^=', '|='])
+
+  spec.Left(COMMA_PREC, LeftComma, [','])
+
+  # 0 precedence -- doesn't bind until )
+  spec.Null(0, NullParen, ['('])  # for grouping
+
+  # -1 precedence -- never used
+  spec.Null(-1, NullConstant, ['name', 'number'])
+  spec.Null(-1, tdop.NullError, [')', ']', ':', 'eof'])
+
+  return spec
+
+
+def MakeParser(s):
+  """Used by tests."""
+  spec = MakeShellParserSpec()
+  lexer = tdop.Tokenize(s)
+  p = tdop.Parser(spec, lexer)
+  return p
+
+
+def ParseShell(s, expected=None):
+  """Used by tests."""
+  p = MakeParser(s)
+  tree = p.Parse()
+
+  sexpr = repr(tree)
+  if expected is not None:
+    assert sexpr == expected, '%r != %r' % (sexpr, expected)
+
+  #print('%-40s %s' % (s, sexpr))
+  return tree
+
+
+def main(argv):
+  try:
+    s = argv[1]
+  except IndexError:
+    print('Usage: ./arith_parse.py EXPRESSION')
+  else:
+    try:
+      tree = ParseShell(s)
+    except tdop.ParseError as e:
+      print('Error parsing %r: %s' % (s, e), file=sys.stderr)
+    print(tree)
+
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/demo/shedskin/const.py b/demo/shedskin/const.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+"""
+const.py
+"""
+
+DEFAULT_INT_WIDTH = 3  # 24 bits
+
+# 2^24 - 1 is used as an invalid/uninitialized value for ASDL integers.
+
+# Why?  We have a few use cases for invalid/sentinel values:
+# - span_id, line_id.  Sometimes we don't have a span ID.
+# - file descriptor: 'read x < f.txt' vs 'read x 0< f.txt'
+#
+# Other options for representation:
+#
+# 1. ADSL could use signed integers, then -1 is valid.
+# 2. Use a type like fd = None | Some(int fd)
+#
+# I don't like #1 because ASDL is lazily-decoded, and then we have to do sign
+# extension on demand.  (24 bits to 32 or 64).  As far as I can tell, sign
+# extension requires a branch, at least in portable C (on the sign bit).
+#
+# Thes second option is semantically cleaner.  But it needlessly
+# inflates the size of both the source code and the data.  Instead of having a
+# single "inline" integer, we would need a reference to another value.
+#
+# We could also try to do some fancy thing like fd = None |
+# Range<1..max_fd>(fd), with smart encoding.  But that is overkill for these
+# use cases.
+#
+# Using InvalidInt instead of -1 seems like a good compromise.
+
+NO_INTEGER = (1 << (DEFAULT_INT_WIDTH * 8)) - 1
+
+# NOTE: In Python: 1 << (n * 8) - 1 is wrong!  I thought that bit shift would
+# have higher precedence.