From eac976109c11ada7d0bddbf88394d32acef6cf54 Mon Sep 17 00:00:00 2001
From: Andy C <andy@lenny>
Date: Thu, 10 Nov 2022 10:47:21 -0500
Subject: [PATCH] [benchmarks/mycpp refactor] Be consistent with other
 benchmarks

- copy Ninja task files into _tmp/mycpp-examples, and make an index.html
  for them

Also:

- Clean up release automation
- release-quality.md: Add link for benchmarks/gc/
---
 benchmarks/auto.sh     | 33 ++++++++++++++--------------
 benchmarks/gc.sh       |  2 +-
 benchmarks/mycpp.sh    | 50 ++++++++++++++++++++++++++++++++++--------
 benchmarks/report.sh   | 28 +++--------------------
 devtools/release.sh    | 24 +++++++++-----------
 doc/release-quality.md |  2 ++
 mycpp/TEST.sh          | 26 ++++------------------
 soil/worker.sh         |  2 +-
 8 files changed, 79 insertions(+), 88 deletions(-)
diff --git a/benchmarks/auto.sh b/benchmarks/auto.sh
index 787c2ee9f4..3a79b9a061 100755
--- a/benchmarks/auto.sh
+++ b/benchmarks/auto.sh
@@ -7,14 +7,17 @@
 #
 # List of benchmarks:
 #
-# - osh-parser
-# - osh-runtime (now called runtime.sh, or wild-run)
-# - vm-baseline
-# - compute
-#   - awk-python could be moved here
-#   - startup.sh could be moved here, it also has strace counts
-# - ovm-build
-# - gc
+# - Single Machine (for now):
+#   - mycpp-examples
+#   - gc
+# - Multiple machines
+#   - osh-parser
+#   - osh-runtime
+#   - vm-baseline
+#   - compute
+#     - awk-python could be moved here
+#     - startup.sh could be moved here, it also has strace counts
+#   - ovm-build
 
 set -o nounset
 set -o pipefail
@@ -29,12 +32,6 @@ _banner() {
   echo -----
 }
 
-# Check that the code is correct before measuring performance!
-prereq() {
-  test/unit.sh all
-  test/spec.sh all
-}
-
 osh-parser-quick() {
   ### Quick evaluation of the parser
   # Follow the instructions at the top of benchmarks/osh-parser.sh to use this
@@ -101,12 +98,16 @@ measure-builds() {
 # Before this, run devtools/release.sh benchmark-build.
 
 all() {
-  local do_cachegrind=${1:-}
+  local do_machine1=${1:-}
 
   # Notes:
   # - During release, this happens on machine1, but not machine2
   # - Depends on oil-native being built
-  if test -n "$do_cachegrind"; then
+  if test -n "$do_machine1"; then
+    # Only run on one machine
+    benchmarks/mycpp.sh soil-run
+    benchmarks/gc.sh soil-run
+
     benchmarks/osh-parser.sh cachegrind-main '' $OIL_NATIVE
   fi
 
diff --git a/benchmarks/gc.sh b/benchmarks/gc.sh
index 06f96c8df9..e15e63b50f 100755
--- a/benchmarks/gc.sh
+++ b/benchmarks/gc.sh
@@ -223,7 +223,7 @@ parse-compare-two() {
 print-report() {
   local in_dir=$1
 
-  benchmark-html-head 'Allocation and GC Overhead'
+  benchmark-html-head 'Memory Management Overhead'
 
   cat <<EOF
   <body class="width60">
diff --git a/benchmarks/mycpp.sh b/benchmarks/mycpp.sh
index a506cea84e..ad82dc2635 100755
--- a/benchmarks/mycpp.sh
+++ b/benchmarks/mycpp.sh
@@ -13,6 +13,8 @@ REPO_ROOT=$(cd $(dirname $0)/.. && pwd)
 readonly REPO_ROOT
 
 source benchmarks/common.sh
+source soil/common.sh  # find-dir-html
+source test/common.sh  # R_PATH
 source test/tsv-lib.sh  # tsv2html
 
 print-report() {
@@ -54,6 +56,14 @@ EOF
 
   tsv2html $in_dir/max_rss.tsv
 
+  # This file is benchmarks.wwz/mycpp-examples/ or _tmp/mycpp-examples/
+  # The link only exists in the latter case
+  cmark << 'EOF'
+---
+[raw benchmark files](raw/benchmark/index.html)
+
+EOF
+
 
 if false; then
   cmark <<EOF
@@ -64,20 +74,42 @@ EOF
   tsv2html $in_dir/details.tsv
 fi
 
-  cmark <<'EOF'
-### TODO
-
-- Benchmark with both GCC and Clang, and show compiler provenance.  Right now
-  the compiler is forced to be the system `c++`.
-- Run this benchmark on multiple machines.
-
-EOF
-
   cat <<EOF
   </body>
 </html>
 EOF
 }
 
+soil-run() {
+  # Run and report mycpp/examples BENCHMARKS only.
+
+  local base_dir=${1:-_tmp/mycpp-examples}
+  local in_tsv=_test/benchmark-table.tsv
+
+  # Force SERIAL reexecution of benchmarks
+  # Notes:
+  # - This is why benchmarks don't really belong in Ninja?
+  # - mycpp/TEST.sh test-translator does 'mycpp-logs-equal', which also runs
+  #   tests
+
+  local task_dir=_test/tasks/benchmark
+  rm -r -f --verbose $task_dir
+  ninja -j 1 $in_tsv
+
+  mkdir -p $base_dir/raw
+  cp -v $in_tsv $base_dir/raw
+  cp -R $task_dir/ $base_dir/raw/benchmark/
+
+  local dir2=$base_dir/stage2
+  mkdir -p $dir2
+
+  R_LIBS_USER=$R_PATH benchmarks/report.R mycpp $base_dir/raw $dir2
+
+  benchmarks/report.sh stage3 $base_dir mycpp
+
+  # The data is in _test/tasks; we could move it to _test/benchmarks/mycpp/ or
+  # something
+  find-dir-html $base_dir/raw/benchmark
+}
 
 "$@"
diff --git a/benchmarks/report.sh b/benchmarks/report.sh
index b3f5555230..1edb8295ef 100755
--- a/benchmarks/report.sh
+++ b/benchmarks/report.sh
@@ -83,29 +83,6 @@ compute() {
   stage3 $base_dir
 }
 
-mycpp-examples() {
-  # Run AND report benchmarks.
-
-  local base_dir=${1:-_tmp/mycpp-examples}
-  local in_tsv=_test/benchmark-table.tsv
-
-  # Force SERIAL reexecution
-  # TODO: This is why benchmarks don't really belong in Ninja?
-  rm -r -f --verbose _test/tasks/benchmark/
-
-  ninja -j 1 $in_tsv
-
-  mkdir -p $base_dir/raw
-  cp -v $in_tsv $base_dir/raw
-
-  local dir2=$base_dir/stage2
-  mkdir -p $dir2
-
-  R_LIBS_USER=$R_PATH benchmarks/report.R mycpp $base_dir/raw $dir2
-
-  stage3 $base_dir mycpp
-}
-
 all() {
   osh-parser
   osh-runtime
@@ -113,8 +90,9 @@ all() {
   ovm-build
   compute
 
-  # Only run on one machine
-  mycpp-examples
+  # Note:
+  # benchmarks/mycpp and benchmarks/gc run on one machine, and are done in
+  # benchmarks/auto.sh
 }
 
 # For view
diff --git a/devtools/release.sh b/devtools/release.sh
index 3fad366992..c9ee8f1e8a 100755
--- a/devtools/release.sh
+++ b/devtools/release.sh
@@ -94,8 +94,7 @@ auto-machine1() {
   fi
 
   $0 spec-all  # spec tests run here again
-  $0 benchmark-run do_cachegrind
-  $0 mycpp-examples
+  $0 benchmark-run do_machine1
 }
 
 # Note: needs dep-benchmarks to run
@@ -352,10 +351,10 @@ benchmark-build() {
 
 # Run benchmarks with the binary built out of the tarball.
 benchmark-run() {
-  local do_cachegrind=${1:-}
+  local do_machine1=${1:-}
 
   _build-oil-native-benchmark-data
-  OSH_OVM=$OSH_RELEASE_BINARY benchmarks/auto.sh all "$do_cachegrind"
+  OSH_OVM=$OSH_RELEASE_BINARY benchmarks/auto.sh all "$do_machine1"
 }
 
 _compressed-tarball() {
@@ -457,28 +456,25 @@ compress-benchmarks() {
 
   local out="$root/benchmarks.wwz"
 
-  # Technically we only need index.html.  But it's nice to have stage1 and
-  # stage2 in case we need backup.
+  # - For benchmarks that run on multiple machines, technically we only need
+  #   index.html, but include stage1 and stage2.
+  # - For those that run on single machines, we also archive the raw/ dir.
+  #   - Although benchmarks/compute is saved in oilshell/benchmark-data
 
-  # note: mycpp-benchmarks only run on one machine
   pushd _tmp
   find \
-    compute/{stage1,stage2,index.html} \
     osh-parser/{stage1,stage2,index.html} \
     osh-runtime/{stage1,stage2,index.html} \
     vm-baseline/{stage1,stage2,index.html} \
     ovm-build/{stage1,stage2,index.html} \
-    mycpp-examples/{stage2,index.html} \
+    compute/{raw,stage1,stage2,index.html} \
+    gc/{raw,stage2,index.html} \
+    mycpp-examples/{raw,stage2,index.html} \
     -type f \
     | xargs --verbose -- zip -q $out 
   popd
 }
 
-mycpp-examples() {
-  ### Single machine benchmarks that show our GC progress
-  mycpp/TEST.sh test-translator
-}
-
 line-counts() {
   local out=$1  # should be an absolute path
   mkdir -p $out
diff --git a/doc/release-quality.md b/doc/release-quality.md
index f36e140db2..49762413ff 100644
--- a/doc/release-quality.md
+++ b/doc/release-quality.md
@@ -84,6 +84,8 @@ This is a supplement to the [main release page](index.html).
   shells use at startup?
 - [mycpp](benchmarks.wwz/mycpp-examples/).  Compares Python and generated C++
   on small examples.
+- [Memory Management Overhead](benchmarks.wwz/gc/).  How much time do we spend
+  managing memory, compared with the shell interpreter?
 
 ## Metrics
 
diff --git a/mycpp/TEST.sh b/mycpp/TEST.sh
index 9930a203d8..1336a87974 100755
--- a/mycpp/TEST.sh
+++ b/mycpp/TEST.sh
@@ -258,27 +258,6 @@ test-runtime() {
 # Translator
 #
 
-compare-examples() {
-  banner 'compare-examples'
-
-  ./NINJA-config.sh
-
-  # 'mycpp-all' has other stuff like type checking alone, stripping, clang builds
-  # Note: only tests CORRECTNESS of benchmarks.  To test speed, we run them
-  # SERIALLY with benchmarks/report.sh.  TODO: could move that here.
-
-  set +o errexit
-  ninja mycpp-logs-equal
-  local status=$?
-  set -o errexit
-
-  # Only for CI
-  find-dir-html _test mycpp-examples
-
-  # Now we want to zip up
-  return $status
-}
-
 test-translator() {
   ### Invoked by soil/worker.sh
 
@@ -294,7 +273,10 @@ test-translator() {
   run-test-func test-invalid-examples _test/mycpp/test-invalid-examples.log
 
   # Runs test in cxx-asan variant, and benchmarks in cxx-opt variant
-  compare-examples
+  if ! ninja mycpp-logs-equal; then
+    log 'FAIL mycpp-logs-equal'
+    return 1
+  fi
 }
 
 unit-test-coverage() {
diff --git a/soil/worker.sh b/soil/worker.sh
index d082e0b710..ead8b45afc 100755
--- a/soil/worker.sh
+++ b/soil/worker.sh
@@ -181,7 +181,7 @@ osh-runtime      benchmarks/osh-runtime.sh soil-run    _tmp/osh-runtime/index.ht
 vm-baseline      benchmarks/vm-baseline.sh soil-run    _tmp/vm-baseline/index.html
 compute          benchmarks/compute.sh soil-run        _tmp/compute/index.html
 gc               benchmarks/gc.sh soil-run             _tmp/gc/index.html
-mycpp-benchmarks benchmarks/report.sh mycpp-examples   _tmp/mycpp-examples/index.html
+mycpp-benchmarks benchmarks/mycpp.sh soil-run          _tmp/mycpp-examples/index.html
 EOF
 }