Permalink
Browse files

Fix a horrible bug in util.Enum that I introduced on Monday.

__hash__ and __eq__ have to match.  That was my fault.  However, I
believe this exposed a bug in Python 2.7.  It led
some reason this led to non-determinism that depended on whether there's
a .pyc file or not!!!

There is a repro in test/unit.sh.  Tested under multiple Python
interpreter, and under GDB.  (TODO later: investigate the root cause.)

Simplify lookups keyed by Id -- use enum_value instead.

Also:

- Automate the re2c dependency.  Upgrade to 1.0.3.
- Got uftrace working on a stock Python interpreter!
- Trace in Python too (benchmarks/pytrace.py).
  • Loading branch information...
Andy Chu
Andy Chu committed Nov 23, 2017
1 parent 80a1974 commit 3cb2205c30b0c4eb72073b6599f8bf2ed64e174e
Showing with 200 additions and 23 deletions.
  1. +1 −0 .gitignore
  2. +1 −0 README.md
  3. +16 −2 benchmarks/pytrace.py
  4. +34 −0 benchmarks/uftrace.sh
  5. +16 −2 build/codegen.sh
  6. +16 −4 build/prepare.sh
  7. +9 −8 core/id_kind.py
  8. +8 −0 core/id_kind_test.py
  9. +7 −4 core/lexer_gen.py
  10. +3 −3 core/util.py
  11. +36 −0 native/fastlex_test.py
  12. +53 −0 test/unit.sh
View
@@ -6,6 +6,7 @@ _tmp
_bin
_build
_devbuild
_deps
_release
libc.so
# Python build support
View
@@ -92,6 +92,7 @@ Directory Structure
_bin/ # Native executables are put here
_build/ # Temporary build files
_devbuild/ # Developer build files not deleted upon 'make clean'
_deps/ # build dependencies like re2c
_tmp/ # Temporary test files and the like
spec/
wild/
View
@@ -25,6 +25,7 @@ def __init__(self, max_events=10e6):
# After max_events we stop recording
self.max_events = max_events
self.num_events = 0
self.depth = 0
# Python VM callback
def OnEvent(self, frame, event_type, arg):
@@ -40,15 +41,28 @@ def OnEvent(self, frame, event_type, arg):
# cProfile output.
self.num_events += 1
name = frame.f_code.co_name
filename = frame.f_code.co_filename
if event_type in ('call', 'c_call'):
self.depth += 1
record = '%s%s\t%s\t%s\t%s\t%s\n' % (' ' * self.depth,
event_type, filename, frame.f_lineno, name, arg)
self.event_strs.write(record)
if event_type in ('return', 'c_return'):
self.depth -= 1
return
self.event_strs.write('') # struct.pack(s)
# TODO:
# NOTE: Do we want a struct.pack version eventually?
#self.event_strs.write('')
def Start(self):
sys.setprofile(self.OnEvent)
def Stop(self, path):
sys.setprofile(None)
# Only one process should write out the file!
if os.getpid() != self.pid:
return
View
@@ -0,0 +1,34 @@
#!/bin/bash
#
# Usage:
# ./uftrace.sh <function name>
set -o nounset
set -o pipefail
set -o errexit
#uftrace() {
# ~/src/uftrace-0.8.1/uftrace "$@"
#}
python-demo() {
uftrace _devbuild/cpython-instrumented/python -h
}
# https://github.com/namhyung/uftrace/wiki/Tutorial
hello-demo() {
cat >_tmp/hello.c <<EOF
#include <stdio.h>
int main(void) {
printf("Hello world\n");
return 0;
}
EOF
gcc -o _tmp/hello -pg _tmp/hello.c
uftrace _tmp/hello
}
"$@"
View
@@ -30,8 +30,22 @@ set -o errexit
# osh-lex.re2c.c
# osh-lex.c
download-re2c() {
mkdir -p _deps
wget --directory _deps \
https://github.com/skvadrik/re2c/releases/download/1.0.3/re2c-1.0.3.tar.gz
}
install-re2c() {
cd _deps
tar -x -z < re2c-1.0.3.tar.gz
cd re2c-1.0.3
./configure
make
}
re2c() {
~/src/re2c-0.16/re2c "$@"
_deps/re2c-1.0.3/re2c "$@"
}
ast-gen() {
@@ -79,7 +93,7 @@ bloaty() {
}
symbols() {
local obj=_devbuild/pylibc/x86_64/fastlex.so
local obj=_devbuild/py-ext/x86_64/fastlex.so
nm $obj
echo
View
@@ -14,15 +14,16 @@ set -o errexit
source build/common.sh
configure() {
local dir=$PREPARE_DIR
local dir=${1:-$PREPARE_DIR}
rm -r -f $dir
mkdir -p $dir
local conf=$PWD/$PY27/configure
cd $dir
pushd $dir
time $conf --without-threads
popd
}
# Clang makes this faster. We have to build all modules so that we can
@@ -37,13 +38,24 @@ readonly NPROC=$(nproc)
readonly JOBS=$(( NPROC == 1 ? NPROC : NPROC-1 ))
build-python() {
cd $PREPARE_DIR
local dir=${1:-$PREPARE_DIR}
local extra_cflags=${2:-'-O0'}
pushd $dir
make clean
# Speed it up with -O0.
# NOTE: CFLAGS clobbers some Python flags, so use EXTRA_CFLAGS.
time make -j $JOBS EXTRA_CFLAGS='-O0'
time make -j $JOBS EXTRA_CFLAGS="$extra_cflags"
#time make -j 7 CFLAGS='-O0'
popd
}
# For uftrace.
cpython-instrumented() {
configure _devbuild/cpython-instrumented
build-python _devbuild/cpython-instrumented '-O0 -pg'
}
"$@"
View
@@ -15,36 +15,37 @@
_ID_TO_KIND = {} # type: dict
def LookupKind(id_):
return _ID_TO_KIND[id_]
return _ID_TO_KIND[id_.enum_value]
_ID_NAMES = {} # type: dict
def IdName(id_):
return _ID_NAMES[id_]
return _ID_NAMES[id_.enum_value]
class Id(object):
"""Token and op type.
The evaluator must consider all Ids.
NOTE: We add a bunch of class attributes that are INSTANCES of this class,
e.g. Id.Lit_Chars.
"""
def __init__(self, enum_value):
self.enum_value = enum_value
def __eq__(self, other):
# NOTE: We need to compare to ints too because of the ID hash tables. They
# are keyed by index, and then an equality test happens.
if isinstance(other, int):
return self.enum_value == other
return self.enum_value == other.enum_value
def __ne__(self, other):
return self.enum_value != other.enum_value
def __hash__(self):
return hash(self.enum_value)
def __repr__(self):
return IdName(self.enum_value)
return IdName(self)
class Kind(object):
View
@@ -57,6 +57,14 @@ def testTokens(self):
t = ast.token(Id.BoolBinary_GlobDEqual, '==')
self.assertEqual(Kind.BoolBinary, LookupKind(t.id))
def testEquality(self):
# OK WTF!!!!
left = Id(198)
right = Id(198)
print(left, right)
print(left == right)
self.assertEqual(left, right)
def testLexerPairs(self):
def MakeLookup(p):
return dict((pat, tok) for _, pat, tok in p)
View
@@ -183,11 +183,11 @@ def TranslateLexer(lexer_def):
inline void MatchToken(int lex_mode, unsigned char* line, int line_len,
int start_pos, int* id, int* end_pos) {
// bounds checking
assert(start_pos < line_len);
unsigned char* p = line + start_pos; /* modified by re2c */
unsigned char* q = line + line_len; /* yylimit */
// bounds checking
assert(p < q);
//printf("p: %p q: %p\n", p, q);
unsigned char* YYMARKER; /* why do we need this? */
@@ -208,7 +208,10 @@ def TranslateLexer(lexer_def):
re2_pat = TranslateRegex(pat)
else:
re2_pat = TranslateConstant(pat)
print ' %-30s { *id = id__%s; break; }' % (re2_pat, token_id)
# TODO: Remove this after debugging Id problem
from core import id_kind
id_name = id_kind.IdName(token_id)
print ' %-30s { *id = id__%s; break; }' % (re2_pat, id_name)
print ' */'
print ' }'
print ' break;'
View
@@ -132,9 +132,9 @@ def __repr__(self):
return '<%s.%s %s>' % (self.namespace, self.name, self.value)
# I think this is not needed?
#def __hash__(self):
# # Needed for the LEXER_DEF dictionary
# return hash(self.name)
def __hash__(self):
# Needed for the LEXER_DEF dictionary
return hash(self.name)
# Why is this needed? For ASDL serialization? But we're not using it like
# that.
View
@@ -57,6 +57,42 @@ def testOutOfBounds(self):
print MatchToken(lex_mode_e.OUTER, 'line', 4)
print MatchToken(lex_mode_e.OUTER, 'line', 5)
def testBug(self):
import os
pid = os.getpid()
# This changes the results! WTF!
from benchmarks import pytrace
t = pytrace.Tracer()
t.Start()
# woah this makes no sense
# expected 194, got 197. So how is expected being corrupted?
code_str = '-z'
expected = Id.BoolUnary_z
# expected 197 got 195 (but it says that is -n, which is 198)
code_str = '-v'
expected = Id.BoolUnary_v
# -n confused with -R
# 195 198
code_str = '-n'
expected = Id.BoolUnary_n
try:
tok_type, end_pos = MatchToken(lex_mode_e.DBRACKET, code_str, 0)
#print tok_type, end_pos
#print tok_type
print '---', 'expected', expected.enum_value, 'got', tok_type.enum_value
#print 'L', id(Id.BoolUnary_z)
#print 'L', id(Id.BoolUnary_z.enum_value)
self.assertEqual(expected, tok_type)
#self.assertEqual(2, end_pos, Id.BoolUnary_z)
finally:
t.Stop('bug-%d.pytrace' % pid)
if __name__ == '__main__':
unittest.main()
View
@@ -24,9 +24,62 @@ one() {
# For auto-complete
unit() {
#$py "$@"
"$@"
}
delete-pyc() {
find . -name '*.pyc' | xargs --no-run-if-empty -- rm || true
}
readonly PY_273=~/src/languages/Python-2.7.3/python
readonly PY_272=~/src/languages/Python-2.7.2/python
readonly PY_27=~/src/languages/Python-2.7/python
# WTF, fixes native_test issue
#export PYTHONDONTWRITEBYTECODE=1
banner() {
echo -----
echo "$@"
echo ----
}
# geez wtf!
repro() {
rm -v *.pytrace || true
delete-pyc
#local t=osh/cmd_parse_test.py
#local t='native/fastlex_test.py LexTest.testBug'
local t='core/id_kind_test.py TokensTest.testEquality'
# with shebang
#py=''
local py='_devbuild/cpython-instrumented/python'
#local prefix='uftrace record -d one.uftrace'
local prefix=''
#local prefix='gdb --args'
set +o errexit
banner 'FIRST'
$prefix $py $t
local first=$?
banner 'SECOND'
# Fails the second time
$prefix $py $t
local second=$?
echo "first $first second $second"
#$PY_273 -V
}
_log-one() {
local name=$1
$name > _tmp/unit/${name}.log.txt 2>&1

0 comments on commit 3cb2205

Please sign in to comment.