Skip to content

Commit

Permalink
Fix a horrible bug in util.Enum that I introduced on Monday.
Browse files Browse the repository at this point in the history
__hash__ and __eq__ have to match.  That was my fault.  However, I
believe this exposed a bug in Python 2.7.  It led
some reason this led to non-determinism that depended on whether there's
a .pyc file or not!!!

There is a repro in test/unit.sh.  Tested under multiple Python
interpreter, and under GDB.  (TODO later: investigate the root cause.)

Simplify lookups keyed by Id -- use enum_value instead.

Also:

- Automate the re2c dependency.  Upgrade to 1.0.3.
- Got uftrace working on a stock Python interpreter!
- Trace in Python too (benchmarks/pytrace.py).
  • Loading branch information
Andy Chu committed Nov 25, 2017
1 parent 80a1974 commit 3cb2205
Show file tree
Hide file tree
Showing 12 changed files with 200 additions and 23 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -6,6 +6,7 @@ _tmp
_bin
_build
_devbuild
_deps
_release
libc.so
# Python build support
Expand Down
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -92,6 +92,7 @@ Directory Structure
_bin/ # Native executables are put here
_build/ # Temporary build files
_devbuild/ # Developer build files not deleted upon 'make clean'
_deps/ # build dependencies like re2c
_tmp/ # Temporary test files and the like
spec/
wild/
Expand Down
18 changes: 16 additions & 2 deletions benchmarks/pytrace.py
Expand Up @@ -25,6 +25,7 @@ def __init__(self, max_events=10e6):
# After max_events we stop recording
self.max_events = max_events
self.num_events = 0
self.depth = 0

# Python VM callback
def OnEvent(self, frame, event_type, arg):
Expand All @@ -40,15 +41,28 @@ def OnEvent(self, frame, event_type, arg):
# cProfile output.

self.num_events += 1
name = frame.f_code.co_name
filename = frame.f_code.co_filename
if event_type in ('call', 'c_call'):
self.depth += 1

record = '%s%s\t%s\t%s\t%s\t%s\n' % (' ' * self.depth,
event_type, filename, frame.f_lineno, name, arg)
self.event_strs.write(record)

if event_type in ('return', 'c_return'):
self.depth -= 1

return

self.event_strs.write('') # struct.pack(s)
# TODO:
# NOTE: Do we want a struct.pack version eventually?
#self.event_strs.write('')

def Start(self):
sys.setprofile(self.OnEvent)

def Stop(self, path):
sys.setprofile(None)
# Only one process should write out the file!
if os.getpid() != self.pid:
return
Expand Down
34 changes: 34 additions & 0 deletions benchmarks/uftrace.sh
@@ -0,0 +1,34 @@
#!/bin/bash
#
# Usage:
# ./uftrace.sh <function name>

set -o nounset
set -o pipefail
set -o errexit

#uftrace() {
# ~/src/uftrace-0.8.1/uftrace "$@"
#}

python-demo() {
uftrace _devbuild/cpython-instrumented/python -h
}

# https://github.com/namhyung/uftrace/wiki/Tutorial
hello-demo() {
cat >_tmp/hello.c <<EOF
#include <stdio.h>
int main(void) {
printf("Hello world\n");
return 0;
}
EOF

gcc -o _tmp/hello -pg _tmp/hello.c

uftrace _tmp/hello
}

"$@"
18 changes: 16 additions & 2 deletions build/codegen.sh
Expand Up @@ -30,8 +30,22 @@ set -o errexit
# osh-lex.re2c.c
# osh-lex.c

download-re2c() {
mkdir -p _deps
wget --directory _deps \
https://github.com/skvadrik/re2c/releases/download/1.0.3/re2c-1.0.3.tar.gz
}

install-re2c() {
cd _deps
tar -x -z < re2c-1.0.3.tar.gz
cd re2c-1.0.3
./configure
make
}

re2c() {
~/src/re2c-0.16/re2c "$@"
_deps/re2c-1.0.3/re2c "$@"
}

ast-gen() {
Expand Down Expand Up @@ -79,7 +93,7 @@ bloaty() {
}

symbols() {
local obj=_devbuild/pylibc/x86_64/fastlex.so
local obj=_devbuild/py-ext/x86_64/fastlex.so
nm $obj
echo

Expand Down
20 changes: 16 additions & 4 deletions build/prepare.sh
Expand Up @@ -14,15 +14,16 @@ set -o errexit
source build/common.sh

configure() {
local dir=$PREPARE_DIR
local dir=${1:-$PREPARE_DIR}

rm -r -f $dir
mkdir -p $dir

local conf=$PWD/$PY27/configure

cd $dir
pushd $dir
time $conf --without-threads
popd
}

# Clang makes this faster. We have to build all modules so that we can
Expand All @@ -37,13 +38,24 @@ readonly NPROC=$(nproc)
readonly JOBS=$(( NPROC == 1 ? NPROC : NPROC-1 ))

build-python() {
cd $PREPARE_DIR
local dir=${1:-$PREPARE_DIR}
local extra_cflags=${2:-'-O0'}

pushd $dir
make clean
# Speed it up with -O0.
# NOTE: CFLAGS clobbers some Python flags, so use EXTRA_CFLAGS.

time make -j $JOBS EXTRA_CFLAGS='-O0'
time make -j $JOBS EXTRA_CFLAGS="$extra_cflags"
#time make -j 7 CFLAGS='-O0'
popd
}


# For uftrace.
cpython-instrumented() {
configure _devbuild/cpython-instrumented
build-python _devbuild/cpython-instrumented '-O0 -pg'
}

"$@"
17 changes: 9 additions & 8 deletions core/id_kind.py
Expand Up @@ -15,36 +15,37 @@
_ID_TO_KIND = {} # type: dict

def LookupKind(id_):
return _ID_TO_KIND[id_]
return _ID_TO_KIND[id_.enum_value]


_ID_NAMES = {} # type: dict

def IdName(id_):
return _ID_NAMES[id_]
return _ID_NAMES[id_.enum_value]


class Id(object):
"""Token and op type.
The evaluator must consider all Ids.
NOTE: We add a bunch of class attributes that are INSTANCES of this class,
e.g. Id.Lit_Chars.
"""
def __init__(self, enum_value):
self.enum_value = enum_value

def __eq__(self, other):
# NOTE: We need to compare to ints too because of the ID hash tables. They
# are keyed by index, and then an equality test happens.
if isinstance(other, int):
return self.enum_value == other

return self.enum_value == other.enum_value

def __ne__(self, other):
return self.enum_value != other.enum_value

def __hash__(self):
return hash(self.enum_value)

def __repr__(self):
return IdName(self.enum_value)
return IdName(self)


class Kind(object):
Expand Down
8 changes: 8 additions & 0 deletions core/id_kind_test.py
Expand Up @@ -57,6 +57,14 @@ def testTokens(self):
t = ast.token(Id.BoolBinary_GlobDEqual, '==')
self.assertEqual(Kind.BoolBinary, LookupKind(t.id))

def testEquality(self):
# OK WTF!!!!
left = Id(198)
right = Id(198)
print(left, right)
print(left == right)
self.assertEqual(left, right)

def testLexerPairs(self):
def MakeLookup(p):
return dict((pat, tok) for _, pat, tok in p)
Expand Down
11 changes: 7 additions & 4 deletions core/lexer_gen.py
Expand Up @@ -183,11 +183,11 @@ def TranslateLexer(lexer_def):
inline void MatchToken(int lex_mode, unsigned char* line, int line_len,
int start_pos, int* id, int* end_pos) {
// bounds checking
assert(start_pos < line_len);
unsigned char* p = line + start_pos; /* modified by re2c */
unsigned char* q = line + line_len; /* yylimit */
// bounds checking
assert(p < q);
//printf("p: %p q: %p\n", p, q);
unsigned char* YYMARKER; /* why do we need this? */
Expand All @@ -208,7 +208,10 @@ def TranslateLexer(lexer_def):
re2_pat = TranslateRegex(pat)
else:
re2_pat = TranslateConstant(pat)
print ' %-30s { *id = id__%s; break; }' % (re2_pat, token_id)
# TODO: Remove this after debugging Id problem
from core import id_kind
id_name = id_kind.IdName(token_id)
print ' %-30s { *id = id__%s; break; }' % (re2_pat, id_name)
print ' */'
print ' }'
print ' break;'
Expand Down
6 changes: 3 additions & 3 deletions core/util.py
Expand Up @@ -132,9 +132,9 @@ def __repr__(self):
return '<%s.%s %s>' % (self.namespace, self.name, self.value)

# I think this is not needed?
#def __hash__(self):
# # Needed for the LEXER_DEF dictionary
# return hash(self.name)
def __hash__(self):
# Needed for the LEXER_DEF dictionary
return hash(self.name)

# Why is this needed? For ASDL serialization? But we're not using it like
# that.
Expand Down
36 changes: 36 additions & 0 deletions native/fastlex_test.py
Expand Up @@ -57,6 +57,42 @@ def testOutOfBounds(self):
print MatchToken(lex_mode_e.OUTER, 'line', 4)
print MatchToken(lex_mode_e.OUTER, 'line', 5)

def testBug(self):
import os
pid = os.getpid()
# This changes the results! WTF!
from benchmarks import pytrace
t = pytrace.Tracer()
t.Start()

# woah this makes no sense

# expected 194, got 197. So how is expected being corrupted?
code_str = '-z'
expected = Id.BoolUnary_z

# expected 197 got 195 (but it says that is -n, which is 198)
code_str = '-v'
expected = Id.BoolUnary_v

# -n confused with -R
# 195 198
code_str = '-n'
expected = Id.BoolUnary_n

try:
tok_type, end_pos = MatchToken(lex_mode_e.DBRACKET, code_str, 0)
#print tok_type, end_pos
#print tok_type
print '---', 'expected', expected.enum_value, 'got', tok_type.enum_value
#print 'L', id(Id.BoolUnary_z)
#print 'L', id(Id.BoolUnary_z.enum_value)

self.assertEqual(expected, tok_type)
#self.assertEqual(2, end_pos, Id.BoolUnary_z)
finally:
t.Stop('bug-%d.pytrace' % pid)


if __name__ == '__main__':
unittest.main()
53 changes: 53 additions & 0 deletions test/unit.sh
Expand Up @@ -24,9 +24,62 @@ one() {

# For auto-complete
unit() {
#$py "$@"

"$@"
}

delete-pyc() {
find . -name '*.pyc' | xargs --no-run-if-empty -- rm || true
}

readonly PY_273=~/src/languages/Python-2.7.3/python
readonly PY_272=~/src/languages/Python-2.7.2/python
readonly PY_27=~/src/languages/Python-2.7/python

# WTF, fixes native_test issue
#export PYTHONDONTWRITEBYTECODE=1

banner() {
echo -----
echo "$@"
echo ----
}

# geez wtf!
repro() {
rm -v *.pytrace || true
delete-pyc

#local t=osh/cmd_parse_test.py
#local t='native/fastlex_test.py LexTest.testBug'
local t='core/id_kind_test.py TokensTest.testEquality'

# with shebang
#py=''
local py='_devbuild/cpython-instrumented/python'

#local prefix='uftrace record -d one.uftrace'
local prefix=''
#local prefix='gdb --args'

set +o errexit

banner 'FIRST'

$prefix $py $t
local first=$?

banner 'SECOND'

# Fails the second time
$prefix $py $t
local second=$?

echo "first $first second $second"
#$PY_273 -V
}

_log-one() {
local name=$1
$name > _tmp/unit/${name}.log.txt 2>&1
Expand Down

0 comments on commit 3cb2205

Please sign in to comment.