[benchmarks] Remove old provenance function.

- Add shell unit test
oilshell · Dec 29, 2022 · 5db813b · 5db813b
1 parent f4b3844
commit 5db813b
Show file tree

Hide file tree

Showing 6 changed files with 33 additions and 124 deletions.
diff --git a/benchmarks/auto.sh b/benchmarks/auto.sh
@@ -33,76 +33,45 @@ _banner() {
   echo -----
 }
 
-  # New interface for shell-provenance
-  # 3 fixed inputs:
-  #   maybe_host   - 'lenny' or 'no-host'
-  #   job_id       - use $(print-job-timestamp)
-  #   out_dir      - location for put shell-id, host-id, but TSV is first
-  #                  written to _tmp/provenance.tsv, and later COPIED TO EACH
-  #                  $out_dir/$bench_name/$host_job_id/ dir
-  # Variable inputs:
-  #   list of shells
-
-  # shell-provenance-tsv 'no-host' $(print-job-id) _tmp \
-  #   bash dash bin/osh $OSH_EVAL_NINJA_BUILD
-
-  # shell-provenance-tsv 'lenny' $(print-job-id) ../benchmark-data \
-  #   bash dash bin/osh $OSH_EVAL_BENCHMARK_DATA
-  #
-  # - A key problem is that you need to concat the two provenances
-  #   - and CHECK that you're comparing the same shells!
-  #   - the number of hosts should be 2, and they should have an equal number
-  #   of rows
-  #   - and there should be exactly 2 of every hash?
-
 measure-shells() {
   local host_name=$1
-  local host_job_id=$2
-
-  # TODO:
+  local job_id=$2
 
-  # capture the filename
-  local provenance
-  # pass empty label, so it writes to ../benchmark-data/{shell,host}-id
-  provenance=$(benchmarks/id.sh shell-provenance '' \
+  local out_dir=../benchmark-data
+  benchmarks/id.sh shell-provenance-2 \
+    $host_name $job_id $out_dir \
     "${SHELLS[@]}" $OSH_EVAL_BENCHMARK_DATA python2
   )
 
-  local out_dir=../benchmark-data
-
-  #local name
-  #name=$(basename $provenance)
-  #local host_job_id=${name%.provenance.txt}  # strip suffix
-
-  benchmarks/vm-baseline.sh measure \
-    $provenance $host_job_id $out_dir/vm-baseline
+  local host_job_id="$host_name.$job_id"
 
+  # New Style doesn't need provenance -- it's joined later
   benchmarks/osh-runtime.sh measure \
     $host_name $host_job_id $OSH_EVAL_BENCHMARK_DATA $out_dir/osh-runtime
 
-  # TODO: Either
-  # (OLD) cp -v _tmp/provenance.txt $out_dir/osh-runtime/$host.$job_id.provenance.txt
-  # (NEW) cp -v _tmp/provenance.tsv $out_dir/osh-runtime/raw.$host.$job_id/
-  #
-  # Eliminate $job_id calculation from shell-provenance altogether
-  # All soil-shell-provenance callers should just pass $job_id and $maybe_host
+  # Old style needs provenance
+  local provenance=_tmp/provenance.txt
 
-  # SAVE provenance so you know which 2 machines a benchmark ran on
-  cp -v $provenance $out_dir/osh-runtime
+  benchmarks/vm-baseline.sh measure \
+    $provenance $host_job_id $out_dir/vm-baseline
 
   benchmarks/osh-parser.sh measure \
     $provenance $host_job_id $out_dir/osh-parser
+
   benchmarks/compute.sh measure \
     $provenance $host_job_id $out_dir/compute
 }
 
 measure-builds() {
-  local base_dir=../benchmark-data
+  local host_name=$1
+  local job_id=$2
+
+  local out_dir=../benchmark-data
 
   local provenance
   provenance=$(benchmarks/id.sh compiler-provenance)  # capture the filename
 
-  benchmarks/ovm-build.sh measure $provenance $base_dir/ovm-build
+  benchmarks/ovm-build.sh measure $provenance $out_dir/ovm-build
 }
 
 # Run all benchmarks from a clean git checkout.
@@ -131,8 +100,8 @@ all() {
     benchmarks/osh-parser.sh cachegrind-main $host_job_id ''
   fi
 
-  measure-shells $host_name $host_job_id
-  measure-builds
+  measure-shells $host_name $job_id
+  measure-builds $host_name $job_id
 }
 
 #

diff --git a/benchmarks/compute.sh b/benchmarks/compute.sh
@@ -385,9 +385,10 @@ measure() {
   hello-all $provenance $host_job_id $out_dir
   fib-all $provenance $host_job_id $out_dir
 
-  if test -n "${QUICKLY:-}"; then
-    return
-  fi
+  # TODO: doesn't work because we would need duplicate logic in stage1
+  #if test -n "${QUICKLY:-}"; then
+  #  return
+  #fi
 
   word_freq-all $provenance $host_job_id $out_dir
   parse_help-all $provenance $host_job_id $out_dir
@@ -456,7 +457,7 @@ stage1() {
 
   local -a raw=()
 
-  # TODO: Doesn't respect QUICKLY=1
+  # TODO: We should respect QUICKLY=1
   for metric in hello fib word_freq parse_help bubble_sort palindrome; do
     local dir=$raw_dir/$metric
 
@@ -544,6 +545,7 @@ EOF
   tsv2html $in_dir/bubble_sort.tsv
 
   # Comment out until checksum is fixed
+
 if false; then
   cmark <<EOF
 ### palindrome (byte strings, unicode strings)

diff --git a/benchmarks/id-test.sh b/benchmarks/id-test.sh
@@ -11,7 +11,8 @@ set -o pipefail
 set -o errexit
 
 test-shell-prov() {
-  shell-provenance no-host bin/osh
+  shell-provenance-2 no-host 2022-12-29 _tmp/ \
+    bin/osh
 }
 
 test-out-param() {

diff --git a/benchmarks/id.sh b/benchmarks/id.sh
@@ -3,7 +3,7 @@
 # Keep track of benchmark data provenance.
 #
 # Usage:
-#   ./id.sh <function name>
+#   benchmarks/id.sh <function name>
 
 set -o nounset
 set -o pipefail
@@ -335,72 +335,6 @@ publish-compiler-id() {
 # The table can be passed to other benchmarks to ensure that their provenance
 # is recorded.
 
-shell-provenance() {
-  ### Write info about the given shells to a file, and print its name
-  local label=$1  # if it exists, it overrides the host
-  shift
-
-  # log "*** shell-provenance"
-
-  local job_id
-  job_id=$(print-job-id)
-
-  local tmp_prov_dir=_tmp/provenance
-  mkdir -p $tmp_prov_dir
-
-  local host
-  local prov_dir  # for $prov_dir/{shell-id,host-id}
-
-  if test -n "$label"; then  # label is often 'no-host'
-    host_name=$label
-    prov_dir=$tmp_prov_dir  # local links
-  else
-    host_name=$(hostname)
-    prov_dir='../benchmark-data'  # shared links
-  fi
-
-  log "*** $label $host_name $prov_dir"
-
-  #set -x
-
-  local tmp_dir=_tmp/host-id/$host_name
-  dump-host-id $tmp_dir
-
-  local host_hash
-  host_hash=$(publish-host-id $tmp_dir "$prov_dir/host-id")
-  local shell_hash
-
-  # Legacy text file.  TODO: remove
-  local out_txt=$tmp_prov_dir/${host_name}.${job_id}.provenance.txt
-  echo -n '' > $out_txt  # trunacte, no header
-
-  # TSV file
-  local out_tsv=$tmp_prov_dir/${host_name}.${job_id}.provenance.tsv
-  tsv-row job_id host_name host_hash sh_path shell_hash > $out_tsv
-
-  for sh_path in "$@"; do
-    # There will be two different OSH
-    local name=$(basename $sh_path)
-
-    tmp_dir=_tmp/shell-id/$name
-    dump-shell-id $sh_path $tmp_dir
-
-    # writes to ../benchmark-data or _tmp/provenance
-    shell_hash=$(publish-shell-id $tmp_dir "$prov_dir/shell-id")
-
-    # note: filter-provenance depends on $4 being $sh_path
-    # APPEND to txt
-    echo "$job_id $host_name $host_hash $sh_path $shell_hash" >> $out_txt
-
-    tsv-row "$job_id" "$host_name" "$host_hash" "$sh_path" "$shell_hash" >> $out_tsv
-  done
-
-  log "Wrote $out_txt and $out_tsv"
-
-  # Return value used in command sub
-  echo $out_txt
-}
-
 shell-provenance-2() {
   ### Write to _tmp/provenance.{txt,tsv} and $out_dir/{shell,host-id}
 

diff --git a/benchmarks/osh-runtime.sh b/benchmarks/osh-runtime.sh
@@ -222,6 +222,8 @@ measure() {
   print-tasks $host_name $osh_native | run-tasks $tsv_out $files_base_dir
 
   # TODO: call gc_stats_to_tsv.py here, adding HOST NAME, and put it in 'raw'
+
+  cp -v _tmp/provenance.tsv $out_dir
 }
 
 stage1() {
@@ -257,6 +259,9 @@ stage1() {
   # - concat multiple hosts in stage1
   benchmarks/gc_stats_to_tsv.py $raw_dir/gc-*.txt \
     > $BASE_DIR/stage1/gc_stats.tsv
+
+  # TODO: Concatenate by host.
+  cp -v $raw_dir/provenance.tsv $out_dir
 }
 
 print-report() {
@@ -352,9 +357,6 @@ soil-run() {
 
   measure $single_machine $host_job_id $OSH_EVAL_NINJA_BUILD
 
-  # R uses the TSV version of the provenance.  TODO: concatenate per-host
-  cp -v _tmp/provenance.tsv $BASE_DIR/stage1/provenance.tsv
-
   # Trivial concatenation for 1 machine
   stage1 '' $single_machine
 

diff --git a/soil/worker.sh b/soil/worker.sh
@@ -286,6 +286,7 @@ dump-distro            soil/worker.sh dump-distro                 -
 dump-locale            soil/worker.sh dump-locale                 -
 configure-test         ./configure-test.sh soil_run               -
 time-test              benchmarks/time-test.sh soil-run           -
+id-test                benchmarks/id-test.sh soil-run             -
 csv-concat-test        devtools/csv-concat-test.sh soil-run       -
 osh2oil                test/osh2oil.sh soil-run                   -
 R-test                 devtools/R-test.sh soil-run                -