View
@@ -16,28 +16,45 @@ source test/common.sh # die
readonly BASE_DIR=_tmp/osh-parser
readonly SORTED=$BASE_DIR/tmp/sorted.txt
# Where we copied them from.
import-files() {
grep -v '^#' benchmarks/osh-parser-originals.txt |
xargs --verbose -I {} -- cp {} benchmarks/testdata
}
# NOTE --ast-format none eliminates print time! That is more than
# half of it! ( 60 seconds with serialization, 29 seconds without.)
# TODO: That is the only difference... hm.
#
# TODO:
# - Have OSH --parse-and-dump-path
# - it can dump /proc/self/meminfo
sh-one() {
local append_out=$1
local vm_out_dir=$2
local sh_path=$3
local host_name=$4
local host_hash=$5
write-sorted-manifest() {
local files=${1:-benchmarks/osh-parser-files.txt}
local counts=$BASE_DIR/tmp/line-counts.txt
local csv_out=$2
# Remove comments and sort by line count
grep -v '^#' $files | xargs wc -l | sort -n > $counts
# Raw list of paths
cat $counts | awk '$2 != "total" { print $2 }' > $SORTED
# Make a CSV file from wc output
cat $counts | awk '
BEGIN { print "num_lines,path" }
$2 != "total" { print $1 "," $2 }' \
> $csv_out
}
# Calls by xargs with a task row.
parser-task() {
local raw_dir=$1 # output
local job_id=$2
local host=$3
local host_hash=$4
local sh_path=$5
local shell_hash=$6
local path=$7
echo "--- $sh_path $path ---"
local script_path=$7
echo "--- $sh_path $script_path ---"
local times_out="$raw_dir/$host.$job_id.times.csv"
local vm_out_dir="$raw_dir/$host.$job_id.virtual-memory"
mkdir -p $vm_out_dir
local shell_name
shell_name=$(basename $sh_path)
@@ -49,110 +66,84 @@ sh-one() {
#extra_args='--ast-format none'
local script_name
local vm_out_path
script_name=$(basename $path)
script_name=$(basename $script_path)
vm_out_path="${vm_out_dir}/${shell_name}-${shell_hash}__${script_name}.txt"
extra_args="--dump-proc-status-to $vm_out_path"
# And then add that as --field?
# This adds 0.01 seconds?
# or shell_hash
# Then you need a Python or R script to make a CSV file out of VmPeak VmRSS
# etc.
# Should we add a field here to say it has VM stats?
fi
# exit code, time in seconds, host_hash, shell_hash, path. \0
# would have been nice here!
benchmarks/time.py \
--output $append_out \
--field "$host_name" --field "$host_hash" \
--output $times_out \
--field "$host" --field "$host_hash" \
--field "$shell_name" --field "$shell_hash" \
--field "$path" -- \
"$sh_path" -n $extra_args "$path" || echo FAILED
--field "$script_path" -- \
"$sh_path" -n $extra_args "$script_path" || echo FAILED
}
import-files() {
grep -v '^#' benchmarks/osh-parser-originals.txt |
xargs --verbose -I {} -- cp {} benchmarks/testdata
}
# For each shell, print 10 script paths.
print-tasks() {
local provenance=$1
write-sorted-manifest() {
local files=${1:-benchmarks/osh-parser-files.txt}
local counts=$BASE_DIR/raw/line-counts.txt
local csv=$2
# Remove comments and sort by line count
grep -v '^#' $files | xargs wc -l | sort -n > $counts
# Raw list of paths
cat $counts | awk '$2 != "total" { print $2 }' > $SORTED
# Make a LINES_CSV from wc output
cat $counts | awk '
BEGIN { print "num_lines,path" }
$2 != "total" { print $1 "," $2 }' \
> $csv
cat $SORTED
echo ---
cat $csv
# Add 1 field for each of 5 fields.
cat $provenance | while read fields; do
cat $sorted | xargs -n 1 -- echo $fields
done
}
# runtime_id, host_hash, toolchain_id (which sometimes you don't know)
# 5 from provenance, 1 for file
readonly NUM_COLUMNS=6
run() {
local preview=${1:-}
local host
host=$(hostname)
# Figure out all tasks to run, and run them. When called from auto.sh, $2
# should be the ../benchmarks-data repo.
all() {
local provenance=$1
local raw_dir=${2:-$BASE_DIR/raw}
local job_id
job_id="$host.$(date +%Y-%m-%d__%H-%M-%S)"
# Job ID is everything up to the first dot in the filename.
local name=$(basename $provenance)
local prefix=${name%.provenance.txt} # strip suffix
local out_dir='../benchmark-data/osh-parser'
local times_out="$out_dir/$job_id.times.csv"
local lines_out="$out_dir/$job_id.lines.csv"
local vm_out_dir="$out_dir/$job_id.virtual-memory"
local times_out="$raw_dir/$prefix.times.csv"
local lines_out="$raw_dir/$prefix.lines.csv"
mkdir -p \
$(dirname $times_out) \
$vm_out_dir \
$BASE_DIR/{tmp,raw,stage1,www}
mkdir -p $BASE_DIR/{tmp,raw,stage1,www}
write-sorted-manifest '' $lines_out
local sorted=$SORTED
# Write Header of the CSV file that is appended to.
echo 'status,elapsed_secs,host_name,host_hash,shell_name,shell_hash,path' \
> $times_out
local tmp_dir=_tmp/host-id/$host
benchmarks/id.sh dump-host-id $tmp_dir
echo 'status,elapsed_secs,host_name,host_hash,shell_name,shell_hash,path' > $times_out
local host_hash
host_hash=$(benchmarks/id.sh publish-host-id $tmp_dir)
echo $host $host_hash
local tasks=$raw_dir/tasks.txt
print-tasks $provenance > $tasks
local shell_hash
# Run them all
cat $tasks | xargs -n $NUM_COLUMNS -- $0 parser-task $raw_dir
#for sh_path in bash dash mksh zsh; do
for sh_path in bash dash mksh zsh bin/osh _bin/osh; do
# There will be two different OSH
local name=$(basename $sh_path)
tmp_dir=_tmp/shell-id/$name
benchmarks/id.sh dump-shell-id $sh_path $tmp_dir
shell_hash=$(benchmarks/id.sh publish-shell-id $tmp_dir)
cp -v $provenance $raw_dir
}
echo "$sh_path ID: $shell_hash"
#
# Testing
#
if ! test -n "$preview"; then
# 20ms for ltmain.sh; 34ms for configure
cat $sorted | xargs -n 1 -- $0 \
sh-one $times_out $vm_out_dir $sh_path $host $host_hash $shell_hash || true
# Copy data so it looks like it's from another host
fake-other-host() {
local dir=${1:-_tmp/osh-parser/raw}
for entry in $dir/lisa*; do
local fake=${entry/lisa/flanders}
#echo $entry $fake
mv -v $entry $fake
# The host ID isn't changed, but that's OK.
# provencence.txt has host names.
if test -f $fake; then
sed -i 's/lisa/flanders/g' $fake
fi
done
cat $times_out
echo "Wrote $times_out, $lines_out, and $vm_out_dir/"
}
#
@@ -164,18 +155,21 @@ csv-concat() {
}
stage1() {
local raw_dir=${1:-_tmp/osh-parser/raw}
#local raw_dir=${1:-../benchmark-data/osh-parser}
local out=_tmp/osh-parser/stage1
mkdir -p $out
local vm_csv=$out/virtual-memory.csv
local -a x=(../benchmark-data/osh-parser/flanders.*.virtual-memory)
local -a y=(../benchmark-data/osh-parser/lisa.*.virtual-memory)
local -a x=($raw_dir/flanders.*.virtual-memory)
local -a y=($raw_dir/lisa.*.virtual-memory)
benchmarks/virtual_memory.py osh-parser ${x[-1]} ${y[-1]} > $vm_csv
local times_csv=$out/times.csv
# Globs are in lexicographical order, which works for our dates.
local -a a=(../benchmark-data/osh-parser/flanders.*.times.csv)
local -a b=(../benchmark-data/osh-parser/lisa.*.times.csv)
local -a a=($raw_dir/flanders.*.times.csv)
local -a b=($raw_dir/lisa.*.times.csv)
csv-concat ${a[-1]} ${b[-1]} > $times_csv
# Construct a one-column CSV file
@@ -187,8 +181,8 @@ stage1() {
# Verify that the files are equal, and pass one of them.
local lines_csv=$out/lines.csv
local -a c=(../benchmark-data/osh-parser/flanders.*.lines.csv)
local -a d=(../benchmark-data/osh-parser/lisa.*.lines.csv)
local -a c=($raw_dir/flanders.*.lines.csv)
local -a d=($raw_dir/lisa.*.lines.csv)
local left=${c[-1]}
local right=${d[-1]}
@@ -334,43 +328,6 @@ report() {
stage3
}
_banner() {
echo -----
echo "$@"
echo -----
}
# Run the whole benchmark from a clean git checkout.
#
# Similar to scripts/release.sh build-and-test.
auto() {
test/spec.sh install-shells
# Technically we need build-essential too?
sudo apt install python-dev
build/dev.sh all
build/codegen.sh lexer
_banner 'OSH dev build'
bin/osh -c 'echo OSH dev build'
build/prepare.sh configure
build/prepare.sh build-python
make _bin/oil.ovm
# This does what 'install' does.
scripts/run.sh make-bin-links
_banner 'OSH production build'
_bin/osh -c 'echo OSH production build'
run # make observations
# Then summarize report can be done on a central machine?
}
time-test() {
benchmarks/time.py \
--field bash --field foo.txt --output _tmp/bench.csv \
View
@@ -0,0 +1,213 @@
#!/bin/bash
#
# Test scripts found in the wild for both correctness and performance.
#
# Usage:
# ./runtime.sh <function name>
set -o nounset
set -o pipefail
set -o errexit
source test/common.sh
readonly TAR_DIR=$PWD/_tmp/osh-runtime
# Use the compiled version. Otherwise /proc/self/exe is the Python
# interpreter, which matters for yash's configure script!
readonly OSH=$PWD/_bin/osh
# NOTE: Same list in oilshell.org/blob/run.sh.
files() {
cat <<EOF
tcc-0.9.26.tar.bz2
yash-2.46.tar.xz
ocaml-4.06.0.tar.xz
uftrace-0.8.1.tar.gz
EOF
}
conf-dirs() {
cat <<EOF
$TAR_DIR/ocaml-4.06.0
$TAR_DIR/tcc-0.9.26
$TAR_DIR/uftrace-0.8.1
$TAR_DIR/yash-2.46
EOF
}
download() {
files | xargs -n 1 -I {} --verbose -- \
wget --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
}
extract() {
time for f in $TAR_DIR/*.{gz,bz2,xz}; do
tar -x --directory $TAR_DIR --file $f
done
ls -l $TAR_DIR
}
configure-and-copy() {
local src_dir=$1
local sh_path=$2
local out_dir=$3
mkdir -p $out_dir
# These hand-written configure scripts must be run from their own directory,
# unlike autoconf's scripts.
pushd $src_dir >/dev/null
touch __TIMESTAMP
#$OSH -x ./configure
#benchmarks/time.py --output $out_csv
$sh_path ./configure >$out_dir/STDOUT.txt
echo
echo "--- NEW FILES ---"
echo
find . -type f -newer __TIMESTAMP | xargs -I {} --verbose -- cp {} $out_dir
popd >/dev/null
}
configure-one() {
local append_out=$1 # times
local vm_out_dir=$2 # pass to virtual memory
local sh_path=$3
local shell_hash=$4
local conf_dir=$5
local prog_label=$(basename $conf_dir)
local sh_label=$(basename $sh_path)
local out_dir=$TAR_DIR/raw/${prog_label}__${sh_label}
# TODO: benchmarks/time.
# Except we don't want to time the copying.
configure-and-copy $conf_dir $sh_path $out_dir
}
# TODO:
# - Add Python's configure -- same or different?
# - Unify abuild -h -- time it
# - --runtime-dump-mem and rename to --parser-dump-mem
#
# benchmark_name,shell,out_dir
# abuild-h
# X-configure
# config.status?
#
# Yeah need to come up with a name. Not just conf-dirs.
# $dir-configure
# Do I add host/host_id? Or just host_label and rely on provenance?
# Turn this into write-tasks?
# And then run-tasks? run-all?
# Yeah it should be
# osh-parser.sh write-tasks
# osh-runtime.sh write-tasks
# virtual-memory.sh write-tasks
#
# And then auto.sh run-tasks? Then you can have consistent logging?
all() {
local provenance=$1
local base_dir=${2:-_tmp/osh-runtime/raw}
#local base_dir=${2:-../benchmark-data/osh-parser}
# Job ID is everything up to the first dot in the filename.
local name=$(basename $provenance)
local job_id=${name%.provenance.txt} # strip suffix
local times_out="$base_dir/$job_id.times.csv"
local vm_out_dir="$base_dir/$job_id.virtual-memory"
mkdir -p $vm_out_dir \
# Write Header of the CSV file that is appended to.
echo 'status,elapsed_secs,shell_name,shell_hash,benchmark_name' \
> $times_out
# TODO: read the host and pass it
# job_id is a (host / host ID)?
# It's probably simpler just to thread through those 2 vars and keep it in the same format.
cat $provenance | while read _ _ _ sh_path shell_hash; do
case $sh_path in
mksh|zsh|bin/osh)
log "--- Skipping $sh_path"
continue
;;
esac
# Need $PWD/$sh_path because we must change dirs to configure.
case $sh_path in
*/osh)
sh_path=$PWD/$sh_path
;;
esac
log "--- Running task with $sh_path"
conf-dirs | xargs -n 1 -- $0 \
configure-one $times_out $vm_out_dir $sh_path $shell_hash || true
done
cp -v $provenance $base_dir
}
#
# Non-configure scripts
#
abuild-h() {
local out_dir=_tmp/osh-runtime
mkdir -p $out_dir
# TODO: Should test the correctness too.
local out=$out_dir/abuild-h-times.csv
echo 'status,elapsed_secs,sh_path' > $out
for sh_path in bash dash mksh zsh $OSH; do
benchmarks/time.py --output $out --field "$sh_path" -- \
$sh_path benchmarks/testdata/abuild -h
done
}
#
# Misc
#
# Same problem as tcc
qemu-old() {
local out_dir=$PWD/_tmp/qemu-old
mkdir -p $out_dir
configure-and-copy ~/src/qemu-1.6.0 $OSH $out_dir
}
# This doesn't work for ash either, because it uses the busybox pattern. It
# says "exe: applet not found". I guess yash doesn't configure under ash!
self-exe() {
set +o errexit
dash <<EOF
/proc/self/exe -V
EOF
echo
_bin/osh <<EOF
/proc/self/exe -V
EOF
_tmp/shells/ash <<EOF
/proc/self/exe -V
EOF
}
"$@"
View

This file was deleted.

Oops, something went wrong.
View
@@ -7,48 +7,47 @@ set -o nounset
set -o pipefail
set -o errexit
# TODO: What format should this be recorded in?
# I think a Python script can parse it to CSV / TSV2.
# Use benchmark/id.sh too
source test/common.sh # log
baseline() {
local host=$(hostname)
local job_id="$host.$(date +%Y-%m-%d__%H-%M-%S)"
local out_dir="../benchmark-data/vm-baseline/$job_id"
mkdir -p $out_dir
local tmp_dir
tmp_dir=_tmp/host-id/$host
benchmarks/id.sh dump-host-id $tmp_dir
# TODO: Call this from benchmarks/auto.sh.
local host_hash=$(benchmarks/id.sh publish-host-id $tmp_dir)
echo $host $host_hash
vm-baseline() {
local provenance=$1
local base_dir=${2:-_tmp/vm-baseline}
#local base_dir=${2:-../benchmark-data/vm-baseline}
local shell_hash
# Strip everything after the first dot.
local name=$(basename $provenance)
local job_id=${name%%.*}
# NOTE: for some reason zsh when printing /proc/$$/status gets a cat process,
# not a zsh process? Check out /proc/$$/maps too. Omitting it for now.
log "--- Job $job_id ---"
for sh_path in bash dash mksh bin/osh _bin/osh; do
echo "--- $sh_path"
local host=$(hostname)
local out_dir="$base_dir/$host.$job_id"
mkdir -p $out_dir
# Fourth column is the shell.
cat $provenance | while read _ _ _ sh_path shell_hash; do
local sh_name=$(basename $sh_path)
tmp_dir=_tmp/shell-id/$sh_name
benchmarks/id.sh dump-shell-id $sh_path $tmp_dir
shell_hash=$(benchmarks/id.sh publish-shell-id $tmp_dir)
# There is a race condition on the status but sleep helps.
local out="$out_dir/${sh_name}-${shell_hash}.txt"
$sh_path -c 'sleep 0.001; cat /proc/$$/status' > $out
echo "Wrote $out"
echo
done
echo
echo "$out_dir:"
ls -l $out_dir
}
csv-demo() {
local -a job_dirs=(_tmp/vm-baseline/lisa.2017-*)
benchmarks/virtual_memory.py baseline ${job_dirs[-1]}
}
# Combine CSV files.
baseline-csv() {
local raw_dir=$1
local out=_tmp/vm-baseline/stage1
mkdir -p $out
@@ -63,6 +62,7 @@ baseline-csv() {
| tee $out/vm-baseline.csv
}
# Demo of the --dump-proc-status-to flag.
# NOTE: Could also add Python introspection.
dump-demo() {
local out=_tmp/virtual-memory
View
@@ -22,6 +22,7 @@ def main(argv):
out.writerow(
('host', 'shell_name', 'shell_hash', 'metric_name', 'metric_value'))
# Dir name looks like "$host.$job_id"
for input_dir in input_dirs:
d = os.path.basename(input_dir)
host, job_id = d.split('.')
View
@@ -29,7 +29,7 @@ all() {
echo
echo 'BENCHMARKS'
wc -l benchmarks/*.sh | sort --numeric
wc -l benchmarks/*.{sh,py,R} | sort --numeric
echo
echo 'SPEC TESTS'
View
@@ -3,7 +3,8 @@
# Wild tests that actually run code.
#
# TODO:
# - Use a better name.
# - Use a better name. Maybe move Python's configure to conf-wild. This could
# be misc-wild.
# - There are a lot of hard-coded paths in this script.
#
# Usage:
@@ -13,7 +14,7 @@ set -o nounset
set -o pipefail
set -o errexit
readonly OSH=~/git/oil/bin/osh
readonly OSH=~/git/oilshell/oil/bin/osh
replace-shebang() {
local dir=$1
@@ -33,6 +34,10 @@ build-toybox() {
make
}
#
# Debootstrap
#
readonly DE_DIR=~/git/basis-build/_tmp/debootstrap
sh-debootstrap() {
@@ -42,7 +47,9 @@ sh-debootstrap() {
}
osh-de-help() {
sh-debootstrap $OSH --help
time sh-debootstrap $OSH --help
#time sh-debootstrap bash --help
#time sh-debootstrap dash --help
}
# Probably not great to run as root.
@@ -53,30 +60,34 @@ sh-de-xenial() {
time sudo $0 debootstrap $sh xenial $target_dir || true
}
#
# Configure
#
readonly PYTHON_DIR=$PWD/Python-2.7.13
sh-py-configure() {
local sh=${1:-bash}
local out=_tmp/wild2/$(basename $sh)-py-configure
mkdir -p $out
# Hm this seems to take a long time to parse. TODO: Show parse timing with
# -v or xtrace or something.
pushd $out
time $sh $PYTHON_DIR/configure || true
popd
tree $out
}
# ~18 seconds vs ~10 seconds.
osh-py-configure() {
OIL_TIMING=1 sh-py-configure $OSH
}
# NOTE: These would compare more equally if we put them in the same dir, and
# then copied it elsewhere.
compare-pyconfig() {
#diff -u -r _tmp/wild2/{bash,osh}-py-configure
diff -u -r _tmp/wild2/{bash,osh}-py-configure/config.status
diff -u -r _tmp/wild2/{bash,osh}-py-configure
#diff -u -r _tmp/wild2/{bash,osh}-py-configure/config.status
}
# Hm this is behavior differently. Ideas for better xtrace in osh:
@@ -101,10 +112,10 @@ sh-config-status() {
}
osh-config-status() {
OIL_TIMING=1 sh-config-status $OSH
#OIL_TIMING=1
sh-config-status $OSH
}
# TODO: Save these files and make sure they are the same!
"$@"