From eac976109c11ada7d0bddbf88394d32acef6cf54 Mon Sep 17 00:00:00 2001 From: Andy C Date: Thu, 10 Nov 2022 10:47:21 -0500 Subject: [PATCH] [benchmarks/mycpp refactor] Be consistent with other benchmarks - copy Ninja task files into _tmp/mycpp-examples, and make an index.html for them Also: - Clean up release automation - release-quality.md: Add link for benchmarks/gc/ --- benchmarks/auto.sh | 33 ++++++++++++++-------------- benchmarks/gc.sh | 2 +- benchmarks/mycpp.sh | 50 ++++++++++++++++++++++++++++++++++-------- benchmarks/report.sh | 28 +++-------------------- devtools/release.sh | 24 +++++++++----------- doc/release-quality.md | 2 ++ mycpp/TEST.sh | 26 ++++------------------ soil/worker.sh | 2 +- 8 files changed, 79 insertions(+), 88 deletions(-) diff --git a/benchmarks/auto.sh b/benchmarks/auto.sh index 787c2ee9f4..3a79b9a061 100755 --- a/benchmarks/auto.sh +++ b/benchmarks/auto.sh @@ -7,14 +7,17 @@ # # List of benchmarks: # -# - osh-parser -# - osh-runtime (now called runtime.sh, or wild-run) -# - vm-baseline -# - compute -# - awk-python could be moved here -# - startup.sh could be moved here, it also has strace counts -# - ovm-build -# - gc +# - Single Machine (for now): +# - mycpp-examples +# - gc +# - Multiple machines +# - osh-parser +# - osh-runtime +# - vm-baseline +# - compute +# - awk-python could be moved here +# - startup.sh could be moved here, it also has strace counts +# - ovm-build set -o nounset set -o pipefail @@ -29,12 +32,6 @@ _banner() { echo ----- } -# Check that the code is correct before measuring performance! -prereq() { - test/unit.sh all - test/spec.sh all -} - osh-parser-quick() { ### Quick evaluation of the parser # Follow the instructions at the top of benchmarks/osh-parser.sh to use this @@ -101,12 +98,16 @@ measure-builds() { # Before this, run devtools/release.sh benchmark-build. all() { - local do_cachegrind=${1:-} + local do_machine1=${1:-} # Notes: # - During release, this happens on machine1, but not machine2 # - Depends on oil-native being built - if test -n "$do_cachegrind"; then + if test -n "$do_machine1"; then + # Only run on one machine + benchmarks/mycpp.sh soil-run + benchmarks/gc.sh soil-run + benchmarks/osh-parser.sh cachegrind-main '' $OIL_NATIVE fi diff --git a/benchmarks/gc.sh b/benchmarks/gc.sh index 06f96c8df9..e15e63b50f 100755 --- a/benchmarks/gc.sh +++ b/benchmarks/gc.sh @@ -223,7 +223,7 @@ parse-compare-two() { print-report() { local in_dir=$1 - benchmark-html-head 'Allocation and GC Overhead' + benchmark-html-head 'Memory Management Overhead' cat < diff --git a/benchmarks/mycpp.sh b/benchmarks/mycpp.sh index a506cea84e..ad82dc2635 100755 --- a/benchmarks/mycpp.sh +++ b/benchmarks/mycpp.sh @@ -13,6 +13,8 @@ REPO_ROOT=$(cd $(dirname $0)/.. && pwd) readonly REPO_ROOT source benchmarks/common.sh +source soil/common.sh # find-dir-html +source test/common.sh # R_PATH source test/tsv-lib.sh # tsv2html print-report() { @@ -54,6 +56,14 @@ EOF tsv2html $in_dir/max_rss.tsv + # This file is benchmarks.wwz/mycpp-examples/ or _tmp/mycpp-examples/ + # The link only exists in the latter case + cmark << 'EOF' +--- +[raw benchmark files](raw/benchmark/index.html) + +EOF + if false; then cmark < EOF } +soil-run() { + # Run and report mycpp/examples BENCHMARKS only. + + local base_dir=${1:-_tmp/mycpp-examples} + local in_tsv=_test/benchmark-table.tsv + + # Force SERIAL reexecution of benchmarks + # Notes: + # - This is why benchmarks don't really belong in Ninja? + # - mycpp/TEST.sh test-translator does 'mycpp-logs-equal', which also runs + # tests + + local task_dir=_test/tasks/benchmark + rm -r -f --verbose $task_dir + ninja -j 1 $in_tsv + + mkdir -p $base_dir/raw + cp -v $in_tsv $base_dir/raw + cp -R $task_dir/ $base_dir/raw/benchmark/ + + local dir2=$base_dir/stage2 + mkdir -p $dir2 + + R_LIBS_USER=$R_PATH benchmarks/report.R mycpp $base_dir/raw $dir2 + + benchmarks/report.sh stage3 $base_dir mycpp + + # The data is in _test/tasks; we could move it to _test/benchmarks/mycpp/ or + # something + find-dir-html $base_dir/raw/benchmark +} "$@" diff --git a/benchmarks/report.sh b/benchmarks/report.sh index b3f5555230..1edb8295ef 100755 --- a/benchmarks/report.sh +++ b/benchmarks/report.sh @@ -83,29 +83,6 @@ compute() { stage3 $base_dir } -mycpp-examples() { - # Run AND report benchmarks. - - local base_dir=${1:-_tmp/mycpp-examples} - local in_tsv=_test/benchmark-table.tsv - - # Force SERIAL reexecution - # TODO: This is why benchmarks don't really belong in Ninja? - rm -r -f --verbose _test/tasks/benchmark/ - - ninja -j 1 $in_tsv - - mkdir -p $base_dir/raw - cp -v $in_tsv $base_dir/raw - - local dir2=$base_dir/stage2 - mkdir -p $dir2 - - R_LIBS_USER=$R_PATH benchmarks/report.R mycpp $base_dir/raw $dir2 - - stage3 $base_dir mycpp -} - all() { osh-parser osh-runtime @@ -113,8 +90,9 @@ all() { ovm-build compute - # Only run on one machine - mycpp-examples + # Note: + # benchmarks/mycpp and benchmarks/gc run on one machine, and are done in + # benchmarks/auto.sh } # For view diff --git a/devtools/release.sh b/devtools/release.sh index 3fad366992..c9ee8f1e8a 100755 --- a/devtools/release.sh +++ b/devtools/release.sh @@ -94,8 +94,7 @@ auto-machine1() { fi $0 spec-all # spec tests run here again - $0 benchmark-run do_cachegrind - $0 mycpp-examples + $0 benchmark-run do_machine1 } # Note: needs dep-benchmarks to run @@ -352,10 +351,10 @@ benchmark-build() { # Run benchmarks with the binary built out of the tarball. benchmark-run() { - local do_cachegrind=${1:-} + local do_machine1=${1:-} _build-oil-native-benchmark-data - OSH_OVM=$OSH_RELEASE_BINARY benchmarks/auto.sh all "$do_cachegrind" + OSH_OVM=$OSH_RELEASE_BINARY benchmarks/auto.sh all "$do_machine1" } _compressed-tarball() { @@ -457,28 +456,25 @@ compress-benchmarks() { local out="$root/benchmarks.wwz" - # Technically we only need index.html. But it's nice to have stage1 and - # stage2 in case we need backup. + # - For benchmarks that run on multiple machines, technically we only need + # index.html, but include stage1 and stage2. + # - For those that run on single machines, we also archive the raw/ dir. + # - Although benchmarks/compute is saved in oilshell/benchmark-data - # note: mycpp-benchmarks only run on one machine pushd _tmp find \ - compute/{stage1,stage2,index.html} \ osh-parser/{stage1,stage2,index.html} \ osh-runtime/{stage1,stage2,index.html} \ vm-baseline/{stage1,stage2,index.html} \ ovm-build/{stage1,stage2,index.html} \ - mycpp-examples/{stage2,index.html} \ + compute/{raw,stage1,stage2,index.html} \ + gc/{raw,stage2,index.html} \ + mycpp-examples/{raw,stage2,index.html} \ -type f \ | xargs --verbose -- zip -q $out popd } -mycpp-examples() { - ### Single machine benchmarks that show our GC progress - mycpp/TEST.sh test-translator -} - line-counts() { local out=$1 # should be an absolute path mkdir -p $out diff --git a/doc/release-quality.md b/doc/release-quality.md index f36e140db2..49762413ff 100644 --- a/doc/release-quality.md +++ b/doc/release-quality.md @@ -84,6 +84,8 @@ This is a supplement to the [main release page](index.html). shells use at startup? - [mycpp](benchmarks.wwz/mycpp-examples/). Compares Python and generated C++ on small examples. +- [Memory Management Overhead](benchmarks.wwz/gc/). How much time do we spend + managing memory, compared with the shell interpreter? ## Metrics diff --git a/mycpp/TEST.sh b/mycpp/TEST.sh index 9930a203d8..1336a87974 100755 --- a/mycpp/TEST.sh +++ b/mycpp/TEST.sh @@ -258,27 +258,6 @@ test-runtime() { # Translator # -compare-examples() { - banner 'compare-examples' - - ./NINJA-config.sh - - # 'mycpp-all' has other stuff like type checking alone, stripping, clang builds - # Note: only tests CORRECTNESS of benchmarks. To test speed, we run them - # SERIALLY with benchmarks/report.sh. TODO: could move that here. - - set +o errexit - ninja mycpp-logs-equal - local status=$? - set -o errexit - - # Only for CI - find-dir-html _test mycpp-examples - - # Now we want to zip up - return $status -} - test-translator() { ### Invoked by soil/worker.sh @@ -294,7 +273,10 @@ test-translator() { run-test-func test-invalid-examples _test/mycpp/test-invalid-examples.log # Runs test in cxx-asan variant, and benchmarks in cxx-opt variant - compare-examples + if ! ninja mycpp-logs-equal; then + log 'FAIL mycpp-logs-equal' + return 1 + fi } unit-test-coverage() { diff --git a/soil/worker.sh b/soil/worker.sh index d082e0b710..ead8b45afc 100755 --- a/soil/worker.sh +++ b/soil/worker.sh @@ -181,7 +181,7 @@ osh-runtime benchmarks/osh-runtime.sh soil-run _tmp/osh-runtime/index.ht vm-baseline benchmarks/vm-baseline.sh soil-run _tmp/vm-baseline/index.html compute benchmarks/compute.sh soil-run _tmp/compute/index.html gc benchmarks/gc.sh soil-run _tmp/gc/index.html -mycpp-benchmarks benchmarks/report.sh mycpp-examples _tmp/mycpp-examples/index.html +mycpp-benchmarks benchmarks/mycpp.sh soil-run _tmp/mycpp-examples/index.html EOF }