Permalink
Browse files

Package up the OSH parser benchmarks and run them on another machine.

- Import shell files into repo and make a new manifest
- start the concept of code/data/env ID for benchmarks

Also:

- focus on profiling the abuild parse in benchmarks/pytrace.sh
  • Loading branch information...
Andy Chu
Andy Chu committed Oct 23, 2017
1 parent 2f864e5 commit 6f83e3a5e9b9a8617f1e1f6ad249f9018ff1ee00
View
@@ -3,6 +3,7 @@
_tmp
_bin
_build
_devbuild
_release
libc.so
# Python build support
@@ -1,11 +1,10 @@
# These files were selected to be big with test/wild.sh count-lines
/home/andy/git/alpine/abuild/abuild
/home/andy/git/other/staticpython/build.sh
/home/andy/git/other/git/t/t9300-fast-import.sh
/home/andy/git/other/kubernetes/hack/make-rules/test-cmd-util.sh
/home/andy/git/other/kubernetes/cluster/gce/gci/configure-helper.sh
/home/andy/src/mksh/Build.sh
/home/andy/git/basis-build/_tmp/debootstrap/functions
/home/andy/git/other/git/t/t4014-format-patch.sh
/home/andy/git/other/kythe/third_party/proto/configure
/home/andy/git/other/kythe/third_party/proto/ltmain.sh
benchmarks/testdata/abuild
benchmarks/testdata/ltmain.sh
benchmarks/testdata/build.sh
benchmarks/testdata/functions
benchmarks/testdata/configure-helper.sh
benchmarks/testdata/t9300-fast-import.sh
benchmarks/testdata/t4014-format-patch.sh
benchmarks/testdata/configure
benchmarks/testdata/Build.sh
benchmarks/testdata/test-cmd-util.sh
@@ -0,0 +1,11 @@
# These files were selected to be big with test/wild.sh count-lines
/home/andy/git/alpine/abuild/abuild
/home/andy/git/other/staticpython/build.sh
/home/andy/git/other/git/t/t9300-fast-import.sh
/home/andy/git/other/kubernetes/hack/make-rules/test-cmd-util.sh
/home/andy/git/other/kubernetes/cluster/gce/gci/configure-helper.sh
/home/andy/src/mksh/Build.sh
/home/andy/git/basis-build/_tmp/debootstrap/functions
/home/andy/git/other/git/t/t4014-format-patch.sh
/home/andy/git/other/kythe/third_party/proto/configure
/home/andy/git/other/kythe/third_party/proto/ltmain.sh
View
@@ -1,5 +1,7 @@
#!/bin/bash
#
# Measure how fast the OSH parser is.a
#
# Usage:
# ./osh-parser.sh <function name>
@@ -15,40 +17,42 @@ readonly LINES_CSV=$BASE_DIR/raw/line-counts.csv
# NOTE --ast-format none eliminates print time! That is more than half of it!
# ( 60 seconds with serialization, 29 seconds without.)
#
# TODO: Lines per second is about 1700
# Run each file twice and compare timing?
# TODO: Use the compiled version without our Python, not system Python!
# Compilation flags are different.
# - Well maybe we want both.
# TODO:
# - Have OSH --parse-and-dump-path
# - it can dump /proc/self/meminfo
osh-parse-one() {
local path=$1
local append_out=$1
local path=$2
echo "--- $path ---"
TIMEFORMAT="%R osh $path" # elapsed time
benchmarks/time.py \
--output $TIMES_CSV \
--output $append_out \
--field osh --field "$path" -- \
bin/osh -n --ast-format none $path
}
sh-one() {
local sh=$1
local path=$2
local append_out=$1
local sh=$2
local path=$3
echo "--- $sh -n $path ---"
# Since we're running benchmarks serially, just append to the same file.
TIMEFORMAT="%R $sh $path" # elapsed time
# exit code, time in seconds, sh, path. \0 would have been nice here!
benchmarks/time.py \
--output $TIMES_CSV \
--output $append_out \
--field "$sh" --field "$path" -- \
$sh -n $path || echo FAILED
}
import-files() {
grep -v '^#' benchmarks/osh-parser-originals.txt |
xargs --verbose -I {} -- cp {} benchmarks/testdata
}
write-sorted-manifest() {
local files=${1:-benchmarks/osh-parser-files.txt}
local counts=$BASE_DIR/raw/line-counts.txt
@@ -77,25 +81,35 @@ run() {
write-sorted-manifest
local sorted=$SORTED
# This file is appended to
local out=$TIMES_CSV
# Header
echo 'status,elapsed_secs,shell,path' > $TIMES_CSV
# 20ms for ltmain.sh; 34ms for configure
cat $sorted | xargs -n 1 $0 sh-one bash || true
cat $sorted | xargs -n 1 $0 sh-one $out bash || true
# Wow dash is a lot faster, 5 ms / 6 ms. It even gives one syntax error.
cat $sorted | xargs -n 1 $0 sh-one dash || true
cat $sorted | xargs -n 1 $0 sh-one $out dash || true
# mksh is in between: 11 / 23 ms.
cat $sorted | xargs -n 1 $0 sh-one mksh || true
cat $sorted | xargs -n 1 $0 sh-one $out mksh || true
# zsh really slow: 45 ms and 124 ms.
cat $sorted | xargs -n 1 $0 sh-one zsh || true
cat $sorted | xargs -n 1 $0 sh-one $out zsh || true
# TODO:
# - Run OSH under OVM
# - Run OSH compiled with OPy
# Maybe these are gradual release upgrades?
return
# 4 s and 15 s. So 1000x speedup would be sufficient, not 10,000x!
time cat $sorted | xargs -n 1 $0 osh-parse-one
time cat $sorted | xargs -n 1 $0 osh-parse-one $out
cat $TIMES_CSV
echo $TIMES_CSV
}
summarize() {
@@ -168,17 +182,111 @@ report() {
echo "Wrote $out"
}
#
# Record Provenance: Code, Data, Env
#
# - code: We will run against different shells (bash, dash, OSH). The OSH
# code will improve over time
# - env: we test it on different machines (machine architecture, OS, distro,
# etc.)
# - data ID: (name, num_lines) is sufficient I think. Don't bother with hash.
# - or does (name, hash) make sense?
# TODO:
# - Parse the test file -> csv. Have to get rid of syntax errors?
# - I really want --output.
# - benchmarks/time.py is probably appropriate now.
# - reshape, total, and compute lines/sec
# - that is really a job for R
# - maybe you need awk to massage wc output into LINES_CSV
# - csv_to_html.py
# - Then a shell script here to put CSS and JS around it.
# - wild-static
# - Publish to release/0.2.0/benchmarks/MACHINE/wild/
# - add code_id to CSV (time.py), and code-id.txt?
code-id() {
# columns for osh:
# vm,compiler
# columns for other:
# --version
# osh --version?
# git branch, etc.?
# running system python, or OVM?
echo TODO
}
# Just hash the files?
data-id() {
echo TODO
}
# Events that will change the env for a given machine:
# - kernel upgrade
# - distro upgrade
env-id() {
local out_dir=${1:-_tmp/env-id-$(hostname)}
mkdir -p $out_dir
hostname > $out_dir/hostname.txt
# does it make sense to do individual fields like -m?
# avoid parsing?
# We care about the kernel and the CPU architecture.
# There is a lot of redundant information there.
uname -m > $out_dir/machine.txt
# machine
{ uname --kernel-release
uname --kernel-version
} > $out_dir/kernel.txt
cat /proc/cpuinfo > $out_dir/cpuinfo.txt
# mem info doesn't make a difference? I guess it's just nice to check that
# it's not swapping. But shouldn't be part of the hash.
cat /proc/meminfo > $out_dir/meminfo.txt
cat /etc/lsb-release > $out_dir/lsb-release.txt
cat /etc/debian_version > $out_dir/debian_version.txt
head $out_dir/*
# Now should I create a hash from this?
# like x86_64__linux__distro?
# There is already concept of the triple?
}
_banner() {
echo -----
echo "$@"
echo -----
}
# Run the whole benchmark from a clean git checkout.
#
# Similar to scripts/release.sh build-and-test.
auto() {
test/spec.sh install-shells
# Technically we need build-essential too?
sudo apt install python-dev
build/dev.sh all
_banner 'OSH dev build'
bin/osh -c 'echo OSH dev build'
build/prepare.sh configure
build/prepare.sh build-python
make _bin/oil.ovm
# This does what 'install' does.
scripts/run.sh make-bin-links
_banner 'OSH production build'
_bin/osh -c 'echo OSH production build'
run # make observations
# Then summarize report can be done on a central machine?
}
time-test() {
benchmarks/time.py \
View
@@ -2,6 +2,8 @@
#
# Use sys.setprofile() and maybe sys.settrace() to trace Oil execution.
#
# Problem: Python callbacks for sys.setprofile() are too slow I think.
#
# Usage:
# ./pytrace.sh <function name>
@@ -11,22 +13,26 @@ set -o errexit
readonly ABUILD=~/git/alpine/abuild/abuild
readonly -a RUN_ABUILD=(bin/oil.py osh $ABUILD -h)
readonly -a PARSE_ABUILD=(bin/oil.py osh --ast-format none -n $ABUILD)
#
# Use Python's cProfile, which uses _lsprof. This is pretty fast.
#
# ~2.7 seconds without tracing
abuild() {
# ~2.7 seconds (no tracing)
time-run-abuild() {
time "${RUN_ABUILD[@]}"
}
ls -l -h *.json
# ~1.6 seconds (no tracing)
time-parse-abuild() {
time "${PARSE_ABUILD[@]}"
}
# 3.8 seconds. So less than 2x overhead.
cprofile-abuild() {
cprofile-parse-abuild() {
local out=abuild.cprofile
time python -m cProfile -o $out "${RUN_ABUILD[@]}"
time python -m cProfile -o $out "${PARSE_ABUILD[@]}"
ls -l $out
}
@@ -57,7 +63,7 @@ p.sort_stats("tottime").print_stats()
# 14 bytes * 14.9M is 209 MB.
abuild-trace() {
_PY_TRACE=abuild.pytrace time "${RUN_ABUILD[@]}"
_PY_TRACE=abuild.pytrace time "${PARSE_ABUILD[@]}"
}
#
@@ -68,7 +74,7 @@ abuild-trace() {
parse() {
#local script=$ABUILD
local script=$0
time bin/oil.py osh -n $script >/dev/null
time bin/oil.py osh --ast-format none -n $script >/dev/null
}
# Trace the execution
Oops, something went wrong.

0 comments on commit 6f83e3a

Please sign in to comment.