Skip to content

Commit

Permalink
[benchmarks] Remove old provenance function.
Browse files Browse the repository at this point in the history
- Add shell unit test
  • Loading branch information
Andy C committed Dec 29, 2022
1 parent f4b3844 commit 5db813b
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 124 deletions.
67 changes: 18 additions & 49 deletions benchmarks/auto.sh
Expand Up @@ -33,76 +33,45 @@ _banner() {
echo -----
}

# New interface for shell-provenance
# 3 fixed inputs:
# maybe_host - 'lenny' or 'no-host'
# job_id - use $(print-job-timestamp)
# out_dir - location for put shell-id, host-id, but TSV is first
# written to _tmp/provenance.tsv, and later COPIED TO EACH
# $out_dir/$bench_name/$host_job_id/ dir
# Variable inputs:
# list of shells

# shell-provenance-tsv 'no-host' $(print-job-id) _tmp \
# bash dash bin/osh $OSH_EVAL_NINJA_BUILD

# shell-provenance-tsv 'lenny' $(print-job-id) ../benchmark-data \
# bash dash bin/osh $OSH_EVAL_BENCHMARK_DATA
#
# - A key problem is that you need to concat the two provenances
# - and CHECK that you're comparing the same shells!
# - the number of hosts should be 2, and they should have an equal number
# of rows
# - and there should be exactly 2 of every hash?

measure-shells() {
local host_name=$1
local host_job_id=$2

# TODO:
local job_id=$2

# capture the filename
local provenance
# pass empty label, so it writes to ../benchmark-data/{shell,host}-id
provenance=$(benchmarks/id.sh shell-provenance '' \
local out_dir=../benchmark-data
benchmarks/id.sh shell-provenance-2 \
$host_name $job_id $out_dir \
"${SHELLS[@]}" $OSH_EVAL_BENCHMARK_DATA python2
)

local out_dir=../benchmark-data

#local name
#name=$(basename $provenance)
#local host_job_id=${name%.provenance.txt} # strip suffix

benchmarks/vm-baseline.sh measure \
$provenance $host_job_id $out_dir/vm-baseline
local host_job_id="$host_name.$job_id"

# New Style doesn't need provenance -- it's joined later
benchmarks/osh-runtime.sh measure \
$host_name $host_job_id $OSH_EVAL_BENCHMARK_DATA $out_dir/osh-runtime

# TODO: Either
# (OLD) cp -v _tmp/provenance.txt $out_dir/osh-runtime/$host.$job_id.provenance.txt
# (NEW) cp -v _tmp/provenance.tsv $out_dir/osh-runtime/raw.$host.$job_id/
#
# Eliminate $job_id calculation from shell-provenance altogether
# All soil-shell-provenance callers should just pass $job_id and $maybe_host
# Old style needs provenance
local provenance=_tmp/provenance.txt

# SAVE provenance so you know which 2 machines a benchmark ran on
cp -v $provenance $out_dir/osh-runtime
benchmarks/vm-baseline.sh measure \
$provenance $host_job_id $out_dir/vm-baseline

benchmarks/osh-parser.sh measure \
$provenance $host_job_id $out_dir/osh-parser

benchmarks/compute.sh measure \
$provenance $host_job_id $out_dir/compute
}

measure-builds() {
local base_dir=../benchmark-data
local host_name=$1
local job_id=$2

local out_dir=../benchmark-data

local provenance
provenance=$(benchmarks/id.sh compiler-provenance) # capture the filename

benchmarks/ovm-build.sh measure $provenance $base_dir/ovm-build
benchmarks/ovm-build.sh measure $provenance $out_dir/ovm-build
}

# Run all benchmarks from a clean git checkout.
Expand Down Expand Up @@ -131,8 +100,8 @@ all() {
benchmarks/osh-parser.sh cachegrind-main $host_job_id ''
fi

measure-shells $host_name $host_job_id
measure-builds
measure-shells $host_name $job_id
measure-builds $host_name $job_id
}

#
Expand Down
10 changes: 6 additions & 4 deletions benchmarks/compute.sh
Expand Up @@ -385,9 +385,10 @@ measure() {
hello-all $provenance $host_job_id $out_dir
fib-all $provenance $host_job_id $out_dir

if test -n "${QUICKLY:-}"; then
return
fi
# TODO: doesn't work because we would need duplicate logic in stage1
#if test -n "${QUICKLY:-}"; then
# return
#fi

word_freq-all $provenance $host_job_id $out_dir
parse_help-all $provenance $host_job_id $out_dir
Expand Down Expand Up @@ -456,7 +457,7 @@ stage1() {

local -a raw=()

# TODO: Doesn't respect QUICKLY=1
# TODO: We should respect QUICKLY=1
for metric in hello fib word_freq parse_help bubble_sort palindrome; do
local dir=$raw_dir/$metric

Expand Down Expand Up @@ -544,6 +545,7 @@ EOF
tsv2html $in_dir/bubble_sort.tsv

# Comment out until checksum is fixed

if false; then
cmark <<EOF
### palindrome (byte strings, unicode strings)
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/id-test.sh
Expand Up @@ -11,7 +11,8 @@ set -o pipefail
set -o errexit

test-shell-prov() {
shell-provenance no-host bin/osh
shell-provenance-2 no-host 2022-12-29 _tmp/ \
bin/osh
}

test-out-param() {
Expand Down
68 changes: 1 addition & 67 deletions benchmarks/id.sh
Expand Up @@ -3,7 +3,7 @@
# Keep track of benchmark data provenance.
#
# Usage:
# ./id.sh <function name>
# benchmarks/id.sh <function name>

set -o nounset
set -o pipefail
Expand Down Expand Up @@ -335,72 +335,6 @@ publish-compiler-id() {
# The table can be passed to other benchmarks to ensure that their provenance
# is recorded.

shell-provenance() {
### Write info about the given shells to a file, and print its name
local label=$1 # if it exists, it overrides the host
shift

# log "*** shell-provenance"

local job_id
job_id=$(print-job-id)

local tmp_prov_dir=_tmp/provenance
mkdir -p $tmp_prov_dir

local host
local prov_dir # for $prov_dir/{shell-id,host-id}

if test -n "$label"; then # label is often 'no-host'
host_name=$label
prov_dir=$tmp_prov_dir # local links
else
host_name=$(hostname)
prov_dir='../benchmark-data' # shared links
fi

log "*** $label $host_name $prov_dir"

#set -x

local tmp_dir=_tmp/host-id/$host_name
dump-host-id $tmp_dir

local host_hash
host_hash=$(publish-host-id $tmp_dir "$prov_dir/host-id")
local shell_hash

# Legacy text file. TODO: remove
local out_txt=$tmp_prov_dir/${host_name}.${job_id}.provenance.txt
echo -n '' > $out_txt # trunacte, no header

# TSV file
local out_tsv=$tmp_prov_dir/${host_name}.${job_id}.provenance.tsv
tsv-row job_id host_name host_hash sh_path shell_hash > $out_tsv

for sh_path in "$@"; do
# There will be two different OSH
local name=$(basename $sh_path)

tmp_dir=_tmp/shell-id/$name
dump-shell-id $sh_path $tmp_dir

# writes to ../benchmark-data or _tmp/provenance
shell_hash=$(publish-shell-id $tmp_dir "$prov_dir/shell-id")

# note: filter-provenance depends on $4 being $sh_path
# APPEND to txt
echo "$job_id $host_name $host_hash $sh_path $shell_hash" >> $out_txt

tsv-row "$job_id" "$host_name" "$host_hash" "$sh_path" "$shell_hash" >> $out_tsv
done

log "Wrote $out_txt and $out_tsv"

# Return value used in command sub
echo $out_txt
}

shell-provenance-2() {
### Write to _tmp/provenance.{txt,tsv} and $out_dir/{shell,host-id}

Expand Down
8 changes: 5 additions & 3 deletions benchmarks/osh-runtime.sh
Expand Up @@ -222,6 +222,8 @@ measure() {
print-tasks $host_name $osh_native | run-tasks $tsv_out $files_base_dir

# TODO: call gc_stats_to_tsv.py here, adding HOST NAME, and put it in 'raw'

cp -v _tmp/provenance.tsv $out_dir
}

stage1() {
Expand Down Expand Up @@ -257,6 +259,9 @@ stage1() {
# - concat multiple hosts in stage1
benchmarks/gc_stats_to_tsv.py $raw_dir/gc-*.txt \
> $BASE_DIR/stage1/gc_stats.tsv

# TODO: Concatenate by host.
cp -v $raw_dir/provenance.tsv $out_dir
}

print-report() {
Expand Down Expand Up @@ -352,9 +357,6 @@ soil-run() {

measure $single_machine $host_job_id $OSH_EVAL_NINJA_BUILD

# R uses the TSV version of the provenance. TODO: concatenate per-host
cp -v _tmp/provenance.tsv $BASE_DIR/stage1/provenance.tsv

# Trivial concatenation for 1 machine
stage1 '' $single_machine

Expand Down
1 change: 1 addition & 0 deletions soil/worker.sh
Expand Up @@ -286,6 +286,7 @@ dump-distro soil/worker.sh dump-distro -
dump-locale soil/worker.sh dump-locale -
configure-test ./configure-test.sh soil_run -
time-test benchmarks/time-test.sh soil-run -
id-test benchmarks/id-test.sh soil-run -
csv-concat-test devtools/csv-concat-test.sh soil-run -
osh2oil test/osh2oil.sh soil-run -
R-test devtools/R-test.sh soil-run -
Expand Down

0 comments on commit 5db813b

Please sign in to comment.