Permalink
Browse files

Work on ovm-build.sh benchmark, and test IFS splitting corner cases.

- Functions to track the provenance of compilers in benchmarks/id.sh.
- Filling out ovm-build.sh.

- Tests for word-eval.  Found a problem with IFS='\'.
- Another test case for the interaction between word splitting and
  globbing.
  • Loading branch information...
Andy Chu
Andy Chu committed Feb 11, 2018
1 parent 0a33187 commit 083486180572c82e737fed06b8a65dca9f19f23d
Showing with 316 additions and 121 deletions.
  1. +2 −75 benchmarks/auto.sh
  2. +10 −0 benchmarks/common.sh
  3. +136 −9 benchmarks/id.sh
  4. +1 −1 benchmarks/osh-runtime.sh
  5. +56 −33 benchmarks/ovm-build.sh
  6. +1 −0 build/oil-manifest.txt
  7. +8 −0 core/legacy.py
  8. +57 −0 gold/word-eval.sh
  9. +38 −2 spec/word-split.test.sh
  10. +6 −0 test/gold.sh
  11. +1 −1 test/spec.sh
View
@@ -33,68 +33,6 @@ prereq() {
test/spec.sh all
}
# Writes a table of host and shells to stdout. Writes text files and
# calculates IDs for them as a side effect.
#
# The table can be passed to other benchmarks to ensure that their provenance
# is recorded.
#
# TODO: Move to id.sh/provenance.sh?
record-provenance() {
local job_id=$1
local host
host=$(hostname)
# Write Header of the CSV file that is appended to.
#echo 'host_name,host_hash,shell_name,shell_hash'
local tmp_dir=_tmp/host-id/$host
benchmarks/id.sh dump-host-id $tmp_dir
local host_hash
host_hash=$(benchmarks/id.sh publish-host-id $tmp_dir)
#echo $host $host_hash
local shell_hash
#for sh_path in bash dash mksh zsh; do
for sh_path in bash dash mksh zsh bin/osh _bin/osh; do
# There will be two different OSH
local name=$(basename $sh_path)
tmp_dir=_tmp/shell-id/$name
benchmarks/id.sh dump-shell-id $sh_path $tmp_dir
shell_hash=$(benchmarks/id.sh publish-shell-id $tmp_dir)
#echo "$sh_path ID: $shell_hash"
echo "$job_id $host $host_hash $sh_path $shell_hash"
done
}
gen-prefix() {
local job_id=$1
local host
host=$(hostname)
echo _tmp/${host}.${job_id}.provenance.txt
}
write-provenance-txt() {
local job_id
job_id="$(date +%Y-%m-%d__%H-%M-%S)"
local out=${1:-$(gen-prefix $job_id)}
record-provenance $job_id > $out
log "Wrote $out"
}
measure-all() {
local provenance=$1
local base_dir=${2:-../benchmark-data}
@@ -132,19 +70,8 @@ all() {
_bin/osh -c 'echo OSH production build'
# Make observations.
# TODO: Factor shell-id / host-id here. Every benchmark will use that.
# Just write a task file, like _tmp/benchmark-tasks.txt?
# And then have a function to execute the tasks.
# It has to make the write CSV files?
local job_id
job_id="$(date +%Y-%m-%d__%H-%M-%S)"
local provenance=$(gen-prefix $job_id)
record-provenance $job_id > $provenance
local provenance
provenance=$(benchmarks/id.sh shell-provenance) # capture the filename
measure-all $provenance
View
@@ -3,6 +3,16 @@
# Common functions for benchmarks.
#
# NOTE: This is in {build,test}/common.sh too.
die() {
echo "FATAL: $@" 1>&2
exit 1
}
log() {
echo "$@" 1>&2
}
csv-concat() {
tools/csv_concat.py "$@"
}
View
@@ -9,6 +9,9 @@ set -o nounset
set -o pipefail
set -o errexit
source build/common.sh # for $CLANG
source benchmarks/common.sh
# TODO: add benchmark labels/hashes for osh and all other shells
#
# Need to archive labels too.
@@ -50,15 +53,6 @@ set -o errexit
# host-id/
# lisa-$HASH/
die() {
echo "FATAL: $@" 1>&2
exit 1
}
log() {
echo "$@" 1>&2
}
_dump-if-exists() {
local path=$1
local out=$2
@@ -237,4 +231,137 @@ publish-host-id() {
echo $id
}
#
# Compilers
#
dump-compiler-id() {
local cc=$1 # path to the compiler
local out_dir=${2:-_tmp/compiler-id/$(basename $cc)}
mkdir -p $out_dir
case $cc in
gcc)
$cc --version 2>&1
# -v has more details, but they might be overkill.
;;
*/clang)
$cc --version
# -v has stuff we don't want
;;
esac > $out_dir/version.txt
}
test-compiler-id() {
dump-compiler-id gcc
dump-compiler-id $CLANG
head _tmp/compiler-id/*/version.txt
}
_compiler-id-hash() {
local src=$1
# Remove some extraneous information from clang.
cat $src/version.txt | grep -v InstalledDir
}
# Writes a short ID to stdout.
publish-compiler-id() {
local src=$1 # e.g. _tmp/compiler-id/clang
local dest_base=${2:-../benchmark-data/compiler-id}
local name=$(basename $src)
local hash
hash=$(_compiler-id-hash $src | md5sum) # not secure, an identifier
local id="${hash:0:8}"
local dest="$dest_base/$name-$id"
mkdir -p $dest
cp --no-target-directory --recursive $src/ $dest/
echo $hash > $dest/HASH.txt
log "Published compiler ID to $dest"
echo $id
}
#
# Table Output
#
# Writes a table of host and shells to stdout. Writes text files and
# calculates IDs for them as a side effect.
#
# The table can be passed to other benchmarks to ensure that their provenance
# is recorded.
#
# TODO: Move to id.sh/provenance.sh?
shell-provenance() {
local job_id
job_id="$(date +%Y-%m-%d__%H-%M-%S)"
local host
host=$(hostname)
# Filename
local out=_tmp/${host}.${job_id}.provenance.txt
local tmp_dir=_tmp/host-id/$host
dump-host-id $tmp_dir
local host_hash
host_hash=$(publish-host-id $tmp_dir)
local shell_hash
for sh_path in bash dash mksh zsh bin/osh _bin/osh; do
# There will be two different OSH
local name=$(basename $sh_path)
tmp_dir=_tmp/shell-id/$name
dump-shell-id $sh_path $tmp_dir
shell_hash=$(publish-shell-id $tmp_dir)
echo "$job_id $host $host_hash $sh_path $shell_hash"
done > $out
log "Wrote $out"
}
compiler-provenance() {
local job_id
job_id="$(date +%Y-%m-%d__%H-%M-%S)"
local host
host=$(hostname)
# Filename
local out=_tmp/${host}.${job_id}.compiler-provenance.txt
local tmp_dir=_tmp/host-id/$host
dump-host-id $tmp_dir
local host_hash
host_hash=$(publish-host-id $tmp_dir)
local compiler_hash
# gcc is assumed to be in the $PATH.
for compiler_path in $(which gcc) $CLANG; do
local name=$(basename $compiler_path)
tmp_dir=_tmp/compiler-id/$name
dump-compiler-id $compiler_path $tmp_dir
compiler_hash=$(publish-compiler-id $tmp_dir)
echo "$job_id $host $host_hash $compiler_path $compiler_hash"
done > $out
log "Wrote $out"
}
"$@"
@@ -228,7 +228,7 @@ readonly NUM_COLUMNS=7 # 5 from provenence, then task_type / task_arg
measure() {
local provenance=$1
local raw_dir=${2:-_tmp/osh-runtime/raw}
local raw_dir=${2:-$BASE_DIR/raw}
local pattern=${3:-}
#local base_dir=${2:-../benchmark-data/osh-parser}
View
@@ -11,6 +11,8 @@ set -o errexit
source build/common.sh # for $CLANG
readonly BASE_DIR=_tmp/ovm-build
# NOTE: build/test.sh measures the time already.
# Coarse Size and Time Benchmarks
@@ -84,24 +86,7 @@ clang() {
#
# It would be possible, but it complicates the makefile.
readonly HEADER='status,elapsed_secs,host_name,host_hash,compiler_name,compiler_hash,tarball,target,target_num_bytes'
# I think I want to do this for every version. Save it in
# ~/git/oil/benchmarks-data.
expand-tasks() {
while read host_name compiler_name; do
#local prefix="$job_id $host_name $host_hash $sh_path $shell_hash"
local prefix="$host_name $compiler_name"
# NOTE: it MUST be a tarball and not the git repo, because we do the build
# of bytecode.zip! We care about the "package experience".
local tarball=_release/oil.tar
echo "$prefix" $tarball _build/oil/ovm
echo "$prefix" $tarball _build/oil/ovm-dbg
done
}
#readonly HEADER='status,elapsed_secs,host_name,host_hash,compiler_name,compiler_hash,tarball,target,target_num_bytes'
# 5 releases: 0.0.0 to 0.4.0. Or we could just do the 0.5.alpha1 release?
# Then you can show the drop.
@@ -119,28 +104,66 @@ other-shells() {
echo
}
build-task() {
local raw_dir=$1 # output
local job_id=$2
local host=$3
local host_hash=$4
local compiler_path=$5
local compiler_hash=$6
local tarball=$7
local target=$8
# Really we should just measure "make", and then the ovm-dbg target can be
# separate?
# We also want to do ./configure. Do that for bash/dash too.
# time them with benchmarks/time.py
echo TODO $tarball $target
}
print-tasks() {
local h=$(hostname)
{ echo $h gcc;
echo $h clang;
} | expand-tasks
local provenance=$1
# NOTE: it MUST be a tarball and not the git repo, because we do the build
# of bytecode.zip! We care about the "package experience".
local tarball='_release/oil.0.5.alpha1.gz'
# Add 1 field for each of 5 fields.
cat $provenance | while read line; do
echo "$line" $tarball _build/oil/ovm
echo "$line" $tarball _build/oil/ovm-dbg
done
}
readonly HEADER='status,elapsed_secs,host_name,host_hash,compiler_path,compiler_hash,tarball,target'
readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
measure() {
print-tasks
}
local provenance=$1 # from benchmarks/id.sh compiler-provenance
local raw_dir=${2:-$BASE_DIR/raw}
# TODO: Move to benchmarks/id.
#local base_dir=${2:-../benchmark-data/osh-parser}
gcc-hash() {
#gcc --version
gcc -v
}
# Job ID is everything up to the first dot in the filename.
local name=$(basename $provenance)
local prefix=${name%.compiler-provenance.txt} # strip suffix
local times_out="$raw_dir/$prefix.times.csv"
mkdir -p $BASE_DIR/{raw,stage1}
# Write Header of the CSV file that is appended to.
echo $HEADER > $times_out
local tasks=$BASE_DIR/tasks.txt
print-tasks $provenance > $tasks
time cat $tasks |
xargs -n $NUM_COLUMNS -- $0 build-task $raw_dir ||
die "*** Some tasks failed. ***"
clang-hash() {
#$CLANG -v
# -v has some output we don't want.
$CLANG --version | grep -v InstalledDir
cp -v $provenance $raw_dir
}
"$@"
Oops, something went wrong.

0 comments on commit 0834861

Please sign in to comment.