Permalink
Browse files

Fix a bug where CPython would execute code instead of byterun.

Functions from imported modules are created through the host __import__
rather than MAKE_FUNCTION, so they were get executed by the HOST
CPython interpreter.

This can be seen with the speed.py/speed_main.py scripts in smoke.sh.

See comments in pyvm2.py for details on the fix.

Also:
- Misc cleanup and comments.
- This reveals some OSH unit tests failing under byterun, which need to
  be fixed.
  • Loading branch information...
Andy Chu
Andy Chu committed Apr 17, 2017
1 parent 521c0f5 commit 62cd4928c0e71f9cd254062204227a8d8cf1cb23
Showing with 111 additions and 31 deletions.
  1. +1 −1 opy/byterun/pyobj.py
  2. +34 −11 opy/byterun/pyvm2.py
  3. +13 −10 opy/opy_main.py
  4. +63 −9 opy/smoke.sh
View
@@ -50,7 +50,7 @@ def __init__(self, name, code, globs, defaults, closure, vm):
self._func = types.FunctionType(code, globs, **kw)
def __repr__(self): # pragma: no cover
return '<Function %s at 0x%08x>' % (
return '<byterun Function %s at 0x%08x>' % (
self.func_name, id(self)
)
View
@@ -9,6 +9,7 @@
import logging
import operator
import sys
import types
import six
from six.moves import reprlib
@@ -19,11 +20,6 @@
log = logging.getLogger(__name__)
if six.PY3:
byteint = lambda b: b
else:
byteint = ord
# Create a repr that won't overflow.
repr_obj = reprlib.Repr()
repr_obj.maxother = 120
@@ -169,15 +165,15 @@ def parse_byte_and_args(self):
an instruction and optionally arguments."""
f = self.frame
opoffset = f.f_lasti
byteCode = byteint(f.f_code.co_code[opoffset])
byteCode = ord(f.f_code.co_code[opoffset])
f.f_lasti += 1
byteName = dis.opname[byteCode]
arg = None
arguments = []
if byteCode >= dis.HAVE_ARGUMENT:
arg = f.f_code.co_code[f.f_lasti:f.f_lasti+2]
f.f_lasti += 2
intArg = byteint(arg[0]) + (byteint(arg[1]) << 8)
intArg = ord(arg[0]) + (ord(arg[1]) << 8)
if byteCode in dis.hasconst:
arg = f.f_code.co_consts[intArg]
elif byteCode in dis.hasfree:
@@ -314,7 +310,9 @@ def run_frame(self, frame):
"""
self.push_frame(frame)
num_ticks = 0
while True:
num_ticks += 1
byteName, arguments, opoffset = self.parse_byte_and_args()
if log.isEnabledFor(logging.INFO):
self.log(byteName, arguments, opoffset)
@@ -342,8 +340,13 @@ def run_frame(self, frame):
self.pop_frame()
if why == 'exception':
# Hm there is no third traceback part of the tuple?
#print('GOT', self.last_exception)
six.reraise(*self.last_exception)
# How to raise with_traceback? Is that Python 3 only?
#raise self.last_exception
#print('num_ticks: %d' % num_ticks)
return self.return_value
## Stack manipulation
@@ -963,7 +966,27 @@ def call_function(self, arg, args, kwargs):
)
)
func = func.im_func
retval = func(*posargs, **namedargs)
# BUG FIX: The callable must be a pyobj.Function, not a native Python
# function (types.FunctionType). The latter will be executed using the
# HOST CPython interpreter rather than the byterun interpreter.
# Cases:
# 1. builtin functions like int(). We want to use the host here.
# 2. User-defined functions from this module. These are created with
# MAKE_FUNCTION, which properly turns them into pyobj.Function.
# 3. User-defined function from another module. These are created with
# __import__, which yields a native function.
if isinstance(func, types.FunctionType):
defaults = func.func_defaults or ()
byterun_func = Function(
func.func_name, func.func_code, func.func_globals,
defaults, func.func_closure, self)
else:
byterun_func = func
retval = byterun_func(*posargs, **namedargs)
self.push(retval)
def byte_RETURN_VALUE(self):
@@ -1003,9 +1026,9 @@ def byte_YIELD_FROM(self):
def byte_IMPORT_NAME(self, name):
level, fromlist = self.popn(2)
frame = self.frame
self.push(
__import__(name, frame.f_globals, frame.f_locals, fromlist, level)
)
mod = __import__(name, frame.f_globals, frame.f_locals, fromlist, level)
#print('-- IMPORTED %s -> %s' % (name, mod))
self.push(mod)
def byte_IMPORT_STAR(self):
# TODO: this doesn't use __all__ properly.
View
@@ -12,6 +12,12 @@
import marshal
import logging
# Like oil.py, set PYTHONPATH internally? So symlinks work?
# Actually '.' is implicitly in PYTHONPATH, so we don't need it.
# If we were in bin/oil.py, then we would need this.
#this_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
#sys.path.append(os.path.join(this_dir))
from pgen2 import driver, pgen, grammar
from pgen2 import token, tokenize
import pytree
@@ -25,11 +31,8 @@
from util_opy import log
# From lib2to3/pygram.py. This presumably takes the place of the 'symbol'
# module. Could we hook it up elsewhere?
#
# From lib2to3/pygram.py. This takes the place of the 'symbol' module.
# compiler/transformer module needs this.
# tools/compile.py runs compileFile, which runs parseFile.
class Symbols(object):
@@ -164,11 +167,10 @@ def main(argv):
transformer.Init(symbols) # for _names and other dicts
# In Python 2 code, always use from __future__ import print_function.
if 1: # TODO: re-enable after 2to3
try:
del gr.keywords["print"]
except KeyError:
pass
try:
del gr.keywords["print"]
except KeyError:
pass
#do_glue = False
do_glue = True
@@ -294,6 +296,7 @@ def py2st(gr, raw_node):
#level = logging.DEBUG if args.verbose else logging.WARNING
#logging.basicConfig(level=level)
#logging.basicConfig(level=logging.DEBUG)
# Compile and run, without writing pyc file
py_path = argv[3]
@@ -308,7 +311,7 @@ def py2st(gr, raw_node):
co = pycodegen.compile(contents, py_path, 'exec', transformer=tr)
execfile.run_code_object(co, opy_argv)
elif py_path.endswith('.pyc'):
elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
with open(py_path) as f:
f.seek(8) # past header. TODO: validate it!
co = marshal.load(f)
View
@@ -118,26 +118,80 @@ test-osh-tree() {
popd
}
byterun-speed-test() {
write-speed() {
cat >_tmp/speed.py <<EOF
def do_sum(n):
sum = 0
for i in xrange(n):
sum += i
print(sum)
if __name__ == '__main__':
import sys
n = int(sys.argv[1])
do_sum(n)
EOF
cat >_tmp/speed_main.py <<EOF
import sys
import speed
n = int(sys.argv[1])
sum = 0
for i in xrange(n):
sum += i
print(sum)
speed.do_sum(n)
EOF
}
opy-speed-test() {
write-speed
_compile-one _tmp/speed.py _tmp/speed.pyc
_compile-one _tmp/speed_main.py _tmp/speed_main.pyc
_compile2-one _tmp/speed.py _tmp/speed.pyc
cp _tmp/speed.pyc _tmp/speed.opyc
# For logging
local n=10000
#local n=10
# 7 ms
echo PYTHON
time python _tmp/speed.opyc $n
# 205 ms. So it's 30x slower. Makes sense.
echo OPY
time opy_ run _tmp/speed.opyc $n
# 7 ms
echo PYTHON
time python _tmp/speed.opyc 10000
time python _tmp/speed_main.pyc $n
# 205 ms. So it's 30x slower. Makes sense.
echo BYTERUN
time byterun -c _tmp/speed.opyc 10000
echo OPY
time opy_ run _tmp/speed_main.pyc $n
}
byterun-speed-test() {
write-speed
echo OLD BYTERUN
time _byterun $PWD/_tmp/speed_main.py 10000
time _byterun $PWD/_tmp/speed.py 10000
}
_byterun() {
# Wow this is SO confusing.
# Not executable on master branch
#python ~/git/other/byterun/byterun/__main__.py "$@"
#python ~/git/other/byterun/byterun "$@"
#python -m ~/git/other/byterun/byterun "$@"
#PYTHONPATH=~/git/other/byterun
# WHY is this the only way to make it work?
pushd ~/git/other/byterun
python -m byterun.__main__ "$@"
popd
}
#

0 comments on commit 62cd492

Please sign in to comment.