Permalink
Browse files

Demo of Python/OPy bytecode decoding in C++.

    opy$ ./test.sh compile-fib

This compiles bytecode with OPy and dumps it to a file.  Then it builds
ovm/ovm_main.cc, which decodes the bytecode and prints the instruction
names.  The instruction names are taken from a header file generated
from the new file lib/opcode_gen.py.

Also:

- Document OHeap a little bit.  (The OSH demo has a compile error now.)
- Change to Python 3 print() in some files.
  • Loading branch information...
Andy Chu
Andy Chu committed Jun 27, 2018
1 parent 5b6336b commit 1afd2ddbce741235ae18703767144b1567e73118
Showing with 159 additions and 3 deletions.
  1. +29 −0 asdl/README.md
  2. +2 −1 opy/gold/class_vs_closure.py
  3. +1 −1 opy/gold/generator_exception.py
  4. +48 −0 opy/lib/opcode_gen.py
  5. +14 −1 opy/opy_main.py
  6. +19 −0 opy/test.sh
  7. +46 −0 ovm/ovm_main.cc
View
@@ -18,3 +18,32 @@ are an end-to-end demo, driven by `run.sh`.
For more on Zephyr ASDL, see [this blog post](http://www.oilshell.org/blog/2016/12/11.html).
OHeap
-----
This is an experimental serialization of ASDL data structures. See [What is
OHeap?](http://www.oilshell.org/blog/2017/01/09.html)
On Ubuntu:
build/codegen.sh download-clang
build/codegen.sh extract-clang
# encodes and decodes arithmetic AST
asdl/run.sh asdl-arith-oheap
# encodes and decodes the OSH "lossless syntax tree"
asdl/run.sh osh-demo
(NOTE: We probably shouldn't require Clang for this? It's only necessary for
ASAN, clang-format, build time benchmarking, runtime benchmarking vs. GCC,
etc.)
### OHeap Use Cases:
- To freeze OSH LSTs (instances of types in `osh.asdl`)
- This isn't necessary if the parser is fast enough (which is desirable)
- To freeze Python / OPy bytecode, and associated constants
- Special case: ASDL reflection data for `osh.asdl`, so we can pretty
print them
@@ -1,4 +1,5 @@
#!/usr/bin/python
from __future__ import print_function
"""
class_vs_closure.py
@@ -71,5 +72,5 @@ def main(argv):
try:
main(sys.argv)
except RuntimeError as e:
print >>sys.stderr, 'FATAL: %s' % e
print('FATAL: %s' % e, file=sys.stderr)
sys.exit(1)
@@ -56,5 +56,5 @@ def main(argv):
try:
main(sys.argv)
except RuntimeError as e:
print >>sys.stderr, 'FATAL: %s' % e
print('FATAL: %s' % e, file=sys.stderr)
sys.exit(1)
View
@@ -0,0 +1,48 @@
#!/usr/bin/python
from __future__ import print_function
"""
opcode_gen.py
"""
import sys
from lib import opcode
def log(msg, *args):
if args:
msg = msg % args
print(msg, file=sys.stderr)
def main(argv):
opcode_nums = set(opcode.opmap.itervalues())
# Print opcodes in numerical order. They're not contiguous integers.
for num in sorted(opcode_nums):
# SLICE+1 -> SLICE_1
name = opcode.opname[num].replace('+', '_')
print('#define %s %d' % (name, num))
print('')
print('#define HAVE_ARGUMENT %d' % opcode.HAVE_ARGUMENT)
#log('%s', opcode.opname)
print('')
print('const char* const kOpcodeNames[] = {')
n = max(opcode_nums)
for i in xrange(n+1):
if i in opcode_nums:
print('"%s",' % opcode.opname[i])
else:
print('"",') # empty value
print('};')
if __name__ == '__main__':
try:
main(sys.argv)
except RuntimeError as e:
print('FATAL: %s' % e, file=sys.stderr)
sys.exit(1)
View
@@ -137,7 +137,7 @@ def OpyCommandMain(argv):
except IndexError:
raise args.UsageError('opy: Missing required subcommand.')
if action in ('parse', 'compile', 'eval', 'repl', 'run'):
if action in ('parse', 'compile', 'compile-fib', 'eval', 'repl', 'run'):
loader = util.GetResourceLoader()
f = loader.open(PICKLE_REL_PATH)
gr = grammar.Grammar()
@@ -224,6 +224,19 @@ def OpyCommandMain(argv):
out_f.write(h)
marshal.dump(co, out_f)
elif action == 'compile-fib':
py_path = argv[1]
out_path = argv[2]
with open(py_path) as f:
co = skeleton.Compile(f, py_path, gr, 'file_input', 'exec')
log("Compiled to %d bytes of bytecode", len(co.co_code))
# Write the .pyc file
with open(out_path, 'wb') as out_f:
out_f.write(co.co_code)
log('Wrote only the bytecode to %r', out_path)
elif action == 'eval': # Like compile, but parses to a code object and prints it
py_expr = argv[1]
f = cStringIO.StringIO(py_expr)
View
@@ -239,4 +239,23 @@ fib-dis() {
../bin/opyc dis $pyc
}
# TODO: Move this to the OVM dir
run-ovm() {
local bin=_tmp/ovm_main
# generate code
PYTHONPATH=. lib/opcode_gen.py > _tmp/opcode.h
cc -I _tmp -o $bin ../ovm/ovm_main.cc
#cc -I ../Python-2.7.13/Include -o $bin ../ovm/ovm_main.cc
set -x
$bin "$@"
}
compile-fib() {
local bytecode=_tmp/fibonacci.bytecode
../bin/opyc compile-fib gold/fibonacci.py $bytecode
run-ovm $bytecode
}
"$@"
View
@@ -0,0 +1,46 @@
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include "opcode.h"
int main(int argc, char **argv) {
if (argc == 0) {
printf("Expected filename\n");
return 1;
}
FILE *f = fopen(argv[1], "rb");
if (!f) {
printf("Error opening %s", argv[1]);
return 1;
}
fseek(f, 0, SEEK_END);
size_t num_bytes = ftell(f);
fseek(f, 0, SEEK_SET); //same as rewind(f);
uint8_t* bytecode = static_cast<uint8_t*>(malloc(num_bytes));
fread(bytecode, num_bytes, 1, f);
fclose(f);
printf("Read %zu bytes\n", num_bytes);
int i = 0;
int extended_arg = 0;
int n = 0;
while (i < num_bytes) {
uint8_t op = bytecode[i];
i++;
printf("%s\n", kOpcodeNames[op]);
if (op > HAVE_ARGUMENT) {
int oparg = bytecode[i] + bytecode[i+1]*256 + extended_arg;
printf(" arg %d\n", oparg);
i += 2;
}
n++;
}
printf("Read %d instructions\n", n);
}

0 comments on commit 1afd2dd

Please sign in to comment.