diff --git a/benchmarks/id.sh b/benchmarks/id.sh index a1f31d1f31..203bb97f46 100755 --- a/benchmarks/id.sh +++ b/benchmarks/id.sh @@ -338,7 +338,7 @@ shell-provenance() { if test -n "$label"; then # label is often 'no-host' host='no-host' - host_hash='-' + host_hash='no-hash' # Don't write to ../benchmark-data dest_base=_tmp/provenance diff --git a/benchmarks/osh-parser.sh b/benchmarks/osh-parser.sh index c395a28d65..b2002b2d2e 100755 --- a/benchmarks/osh-parser.sh +++ b/benchmarks/osh-parser.sh @@ -3,7 +3,7 @@ # Measure how fast the OSH parser is. # # Usage: -# ./osh-parser.sh +# benchmarks/osh-parser.sh # # Hacky way to run it by itself: # @@ -152,10 +152,12 @@ print-tasks() { # Add 1 field for each of 5 fields. cat $provenance | filter-provenance "$@" | while read fields; do - cat $SORTED | xargs -n 1 -- echo "$fields" - - # Quick test - #head -n 2 $SORTED | xargs -n 1 -- echo "$fields" + if test -n "${QUICKLY:-}"; then + # Quick test + head -n 2 $SORTED | xargs -n 1 -- echo "$fields" + else + cat $SORTED | xargs -n 1 -- echo "$fields" + fi done } @@ -199,6 +201,7 @@ readonly NUM_TASK_COLS=6 # input columns: 5 from provenance, 1 for file measure() { local provenance=$1 local raw_dir=${2:-$BASE_DIR/raw} + local oil_native=${3:-$OIL_NATIVE} # Job ID is everything up to the first dot in the filename. local name=$(basename $provenance) @@ -221,7 +224,7 @@ measure() { > $times_out local tasks=$BASE_DIR/tasks.txt - print-tasks $provenance "${SHELLS[@]}" $OIL_NATIVE > $tasks + print-tasks $provenance "${SHELLS[@]}" $oil_native > $tasks # Run them all cat $tasks | xargs -n $NUM_TASK_COLS -- $0 parser-task $raw_dir @@ -232,6 +235,7 @@ measure() { measure-cachegrind() { local provenance=$1 local raw_dir=${2:-$BASE_DIR/raw} + local oil_native=${3:-$OIL_NATIVE} # Job ID is everything up to the first dot in the filename. local name=$(basename $provenance) @@ -260,7 +264,7 @@ measure-cachegrind() { # zsh weirdly forks during zsh -n, which complicates our cachegrind # measurement. So just ignore it. (This can be seen with # strace -e fork -f -- zsh -n $file) - print-tasks $provenance bash dash mksh $OIL_NATIVE > $ctasks + print-tasks $provenance bash dash mksh $oil_native > $ctasks cat $ctasks | xargs -n $NUM_TASK_COLS -- $0 cachegrind-task $raw_dir @@ -312,6 +316,7 @@ stage1-cachegrind() { stage1() { local raw_dir=${1:-$BASE_DIR/raw} + local single_machine=${2:-} local out=$BASE_DIR/stage1 mkdir -p $out @@ -322,35 +327,45 @@ stage1() { stage1-cachegrind $raw_dir $out $raw_data_csv - local -a x=($raw_dir/$MACHINE1.*.virtual-memory) - local -a y=($raw_dir/$MACHINE2.*.virtual-memory) + local lines_csv=$out/lines.csv - local times_csv=$out/times.csv - # Globs are in lexicographical order, which works for our dates. - local -a a=($raw_dir/$MACHINE1.*.times.csv) - local -a b=($raw_dir/$MACHINE2.*.times.csv) + local -a raw=() + if test -n "$single_machine"; then + local -a a=($raw_dir/$single_machine.*.times.csv) + raw+=( ${a[-1]} ) + echo ${a[-1]} >> $raw_data_csv - csv-concat ${a[-1]} ${b[-1]} > $times_csv + # They are the same, output one of them. + cat $raw_dir/$single_machine.*.lines.csv > $lines_csv + else + # Globs are in lexicographical order, which works for our dates. + local -a a=($raw_dir/$MACHINE1.*.times.csv) + local -a b=($raw_dir/$MACHINE2.*.times.csv) - { - echo ${a[-1]} - echo ${b[-1]} - } >> $raw_data_csv + raw+=( ${a[-1]} ${b[-1]} ) + { + echo ${a[-1]} + echo ${b[-1]} + } >> $raw_data_csv - # Verify that the files are equal, and pass one of them. - local lines_csv=$out/lines.csv - local -a c=($raw_dir/$MACHINE1.*.lines.csv) - local -a d=($raw_dir/$MACHINE2.*.lines.csv) - local left=${c[-1]} - local right=${d[-1]} + # Verify that the files are equal, and pass one of them. + local -a c=($raw_dir/$MACHINE1.*.lines.csv) + local -a d=($raw_dir/$MACHINE2.*.lines.csv) - if ! diff $left $right; then - die "Benchmarks were run on different files ($left != $right)" + local left=${c[-1]} + local right=${d[-1]} + + if ! diff $left $right; then + die "Benchmarks were run on different files ($left != $right)" + fi + + # They are the same, output one of them. + cat $left > $lines_csv fi - # They are the same, output one of them. - cat $left > $lines_csv + local times_csv=$out/times.csv + csv-concat "${raw[@]}" > $times_csv head $out/* wc -l $out/* @@ -402,32 +417,43 @@ are chosen to minimize its effect. EOF csv2html $in_dir/summary.csv - cmark < @@ -460,39 +492,48 @@ cachegrind-main() { } -# Measure the parser with cachegrind in CI. -# -# TODO: -# - benchmarks/gc.sh -# - add HTML for this -# - benchmarks/vm-baseline -# - add bin/osh too? We have oil-native -# - benchmarks/compute -# - Enhance it to use cachegrind, not wall time. -# - number of allocations with uftrace - -# - benchmarks/osh-parser can also measure -# - the HTML report should accept one machine -# - number of allocations with uftrace -# -# Later: -# - benchmarks/ovm-build.sh -- binary size and timing -# - maybe just do it for oil-native +soil-shell-provenance() { + ### Only measure shells in the Docker image + + local label=$1 + shift + # This is a superset of shells; see filter-provenance + # - _bin/osh isn't available in the Docker image, so use bin/osh instead + + benchmarks/id.sh shell-provenance "$label" bash dash bin/osh "$@" +} soil-run() { - local base_dir=_tmp/benchmark-data - mkdir -p $base_dir + ### Run it on just this machine, and make a report + + rm -r -f $BASE_DIR + mkdir -p $BASE_DIR + + # TODO: could add _bin/cxx-bumpleak/osh_eval, but we would need to fix + # $shell_name - # Test the one that's IN TREE, NOT in ../benchmark-data local osh_eval=_bin/cxx-opt/osh_eval.stripped + local -a oil_bin=( $osh_eval ) + ninja "${oil_bin[@]}" + + local label='no-host' + + local provenance + provenance=$(soil-shell-provenance $label "${oil_bin[@]}") + + measure $provenance '' $osh_eval + + measure-cachegrind $provenance '' $osh_eval - # Assume ./NINJA-config.sh was already run - ninja $osh_eval + # Make it run on one machine + stage1 '' $label - OIL_NATIVE=$osh_eval cachegrind-main $base_dir + benchmarks/report.sh stage2 $BASE_DIR + benchmarks/report.sh stage3 $BASE_DIR - find-dir-html $base_dir + # Index of raw files + find-dir-html _tmp/osh-parser files } "$@" diff --git a/benchmarks/osh-runtime.sh b/benchmarks/osh-runtime.sh index 40a5a26c8e..4830a7d37f 100755 --- a/benchmarks/osh-runtime.sh +++ b/benchmarks/osh-runtime.sh @@ -180,6 +180,11 @@ print-tasks() { # NOTE: 'abuild-help' is a dummy label. echo "$prefix" hello-world hello-world echo "$prefix" abuild abuild-help + + if test -n "${QUICKLY:-}"; then + continue + fi + echo "$prefix" cpython cpython-configure for dir in "${TAR_SUBDIRS[@]}"; do diff --git a/benchmarks/report.R b/benchmarks/report.R index 63da51de3b..6dd2785898 100755 --- a/benchmarks/report.R +++ b/benchmarks/report.R @@ -41,14 +41,18 @@ GetOshLabel = function(shell_hash) { path = sprintf('../benchmark-data/shell-id/osh-%s/osh-version.txt', shell_hash) - Log('Reading %s', path) - lines = readLines(path) - if (length(grep('OVM', lines)) > 0) { - label = 'osh-ovm' - } else if (length(grep('CPython', lines)) > 0) { - label = 'osh-cpython' + if (file.exists(path)) { + Log('Reading %s', path) + lines = readLines(path) + if (length(grep('OVM', lines)) > 0) { + label = 'osh-ovm' + } else if (length(grep('CPython', lines)) > 0) { + label = 'osh-cpython' + } else { + stop("Couldn't find OVM or CPython in the version string") + } } else { - stop("Couldn't find OVM or CPython in the version string") + label = sprintf('osh-%s', shell_hash) } return(label) } @@ -121,9 +125,13 @@ ParserReport = function(in_dir, out_dir) { # lines_per_sec? times %>% left_join(lines, by = c('path')) %>% - mutate(elapsed_ms = elapsed_secs * 1000, + mutate(filename = basename(path), filename_HREF = sourceUrl(path), + max_rss_MB = max_rss_KiB * 1024 / 1e6, + elapsed_ms = elapsed_secs * 1000, + user_ms = user_secs * 1000, + sys_ms = sys_secs * 1000, lines_per_ms = num_lines / elapsed_ms) %>% - select(-c(elapsed_secs)) -> + select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) -> joined_times #print(head(times)) @@ -175,11 +183,16 @@ ParserReport = function(in_dir, out_dir) { summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>% mutate(lines_per_ms = total_lines / total_ms) %>% select(-c(total_ms)) %>% - spread(key = host_label, value = lines_per_ms) %>% - # sort by parsing rate on the fast machine - arrange(desc(`host lenny`)) -> + spread(key = host_label, value = lines_per_ms) -> times_summary + # Sort by parsing rate on the fast machine + if ("host lenny" %in% colnames(times_summary)) { + times_summary %>% arrange(desc(`host lenny`)) -> times_summary + } else { + times_summary %>% arrange(desc(`host no-host`)) -> times_summary + } + Log('times_summary:') print(times_summary) @@ -192,69 +205,82 @@ ParserReport = function(in_dir, out_dir) { select(-c(total_irefs)) -> cachegrind_summary - # Elapsed seconds for each shell by platform and file - joined_times %>% - select(-c(lines_per_ms, user_secs, sys_secs, max_rss_KiB)) %>% - spread(key = shell_label, value = elapsed_ms) %>% - arrange(host_label, num_lines) %>% - mutate(filename = basename(path), filename_HREF = sourceUrl(path), - osh_to_bash_ratio = `oil-native` / bash) %>% - select(c(host_label, bash, dash, mksh, zsh, - `osh-ovm`, `osh-cpython`, `oil-native`, - osh_to_bash_ratio, num_lines, filename, filename_HREF)) -> - elapsed + if ("no-host" %in% distinct_hosts$host_label) { - Log('\n') - Log('ELAPSED') - print(elapsed) + # We don't have all the shells + elapsed = NA + rate = NA + max_rss = NA + instructions = NA - # Rates by file and shell - joined_times %>% - select(-c(elapsed_ms, user_secs, sys_secs, max_rss_KiB)) %>% - spread(key = shell_label, value = lines_per_ms) %>% - arrange(host_label, num_lines) %>% - mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>% - select(c(host_label, bash, dash, mksh, zsh, - `osh-ovm`, `osh-cpython`, `oil-native`, - num_lines, filename, filename_HREF)) -> - rate - - Log('\n') - Log('RATE') - print(rate) - - # Memory usage by file - joined_times %>% - select(-c(elapsed_ms, lines_per_ms, user_secs, sys_secs)) %>% - mutate(max_rss_MB = max_rss_KiB * 1024 / 1e6) %>% - select(-c(max_rss_KiB)) %>% - spread(key = shell_label, value = max_rss_MB) %>% - arrange(host_label, num_lines) %>% - mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>% - select(c(host_label, bash, dash, mksh, zsh, - `osh-ovm`, `osh-cpython`, `oil-native`, - num_lines, filename, filename_HREF)) -> - max_rss + joined_times %>% + select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB, + num_lines, filename, filename_HREF)) %>% + arrange(filename, elapsed_ms) -> times_flat - Log('\n') - Log('joined_cachegrind has %d rows', nrow(joined_cachegrind)) - #print(joined_cachegrind) - print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh')) + } else { - # Cachegrind instructions by file - joined_cachegrind %>% - mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>% - select(-c(irefs)) %>% - spread(key = shell_label, value = thousand_irefs_per_line) %>% - arrange(num_lines) %>% - mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>% - select(c(bash, dash, mksh, `oil-native`, - num_lines, filename, filename_HREF)) -> - instructions - - Log('\n') - Log('instructions has %d rows', nrow(instructions)) - print(instructions) + times_flat = NA + + # Elapsed seconds for each shell by platform and file + joined_times %>% + select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>% + spread(key = shell_label, value = elapsed_ms) %>% + arrange(host_label, num_lines) %>% + mutate(osh_to_bash_ratio = `oil-native` / bash) %>% + select(c(host_label, bash, dash, mksh, zsh, + `osh-ovm`, `osh-cpython`, `oil-native`, + osh_to_bash_ratio, num_lines, filename, filename_HREF)) -> + elapsed + + Log('\n') + Log('ELAPSED') + print(elapsed) + + # Rates by file and shell + joined_times %>% + select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>% + spread(key = shell_label, value = lines_per_ms) %>% + arrange(host_label, num_lines) %>% + select(c(host_label, bash, dash, mksh, zsh, + `osh-ovm`, `osh-cpython`, `oil-native`, + num_lines, filename, filename_HREF)) -> + rate + + Log('\n') + Log('RATE') + print(rate) + + # Memory usage by file + joined_times %>% + select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>% + spread(key = shell_label, value = max_rss_MB) %>% + arrange(host_label, num_lines) %>% + select(c(host_label, bash, dash, mksh, zsh, + `osh-ovm`, `osh-cpython`, `oil-native`, + num_lines, filename, filename_HREF)) -> + max_rss + + Log('\n') + Log('joined_cachegrind has %d rows', nrow(joined_cachegrind)) + #print(joined_cachegrind) + print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh')) + + # Cachegrind instructions by file + joined_cachegrind %>% + mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>% + select(-c(irefs)) %>% + spread(key = shell_label, value = thousand_irefs_per_line) %>% + arrange(num_lines) %>% + mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>% + select(c(bash, dash, mksh, `oil-native`, + num_lines, filename, filename_HREF)) -> + instructions + + Log('\n') + Log('instructions has %d rows', nrow(instructions)) + print(instructions) + } WriteDetails(distinct_hosts, distinct_shells, out_dir) @@ -272,14 +298,21 @@ ParserReport = function(in_dir, out_dir) { precision = ColumnPrecision(list(), default = 1) writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision) - # Round to nearest millisecond, but the ratio has a decimal point. - precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0) - writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision) - writeCsv(rate, file.path(out_dir, 'rate')) - writeCsv(max_rss, file.path(out_dir, 'max_rss')) + if (!is.na(times_flat)) { + writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision) + } - precision = ColumnPrecision(list(), default = 1) - writeTsv(instructions, file.path(out_dir, 'instructions'), precision) + if (!is.na(elapsed)) { # equivalent to no-host + # Round to nearest millisecond, but the ratio has a decimal point. + precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0) + + writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision) + writeCsv(rate, file.path(out_dir, 'rate')) + writeCsv(max_rss, file.path(out_dir, 'max_rss')) + + precision = ColumnPrecision(list(), default = 1) + writeTsv(instructions, file.path(out_dir, 'instructions'), precision) + } Log('Wrote %s', out_dir) } diff --git a/benchmarks/vm-baseline.sh b/benchmarks/vm-baseline.sh index 27937fa866..3a16aff345 100755 --- a/benchmarks/vm-baseline.sh +++ b/benchmarks/vm-baseline.sh @@ -102,22 +102,26 @@ print-report() { -

Virtual Memory Baseline

+EOF + + cmark << 'EOF' +## Virtual Memory Baseline -

Memory Used at Startup (MB)

+### Memory Used at Startup (MB) -

Running under osh-ovm. Memory usage is measured in MB - (powers of 10), not MiB (powers of 2).

+Running under `osh-ovm`. Memory usage is measured in MB (powers of 10), not +MiB (powers of 2). EOF csv2html $in_dir/vm-baseline.csv - # TODO: This could be shared with osh-parser and osh-runtime? - cat <Shell and Host Details --> -EOF - #csv2html $in_dir/shells.csv - #csv2html $in_dir/hosts.csv + # R code doesn't generate this + if false; then + cmark <<< '### Shell and Host Details' + + csv2html $in_dir/shells.csv + csv2html $in_dir/hosts.csv + fi cat < @@ -137,7 +141,7 @@ soil-shell-provenance() { shift # TODO: mksh, zsh - benchmarks/id.sh shell-provenance "$label" bash dash "$@" + benchmarks/id.sh shell-provenance "$label" bash dash bin/osh "$@" } soil-run() { diff --git a/soil/web-remote.sh b/soil/web-remote.sh index 9cc8e29a91..8fb007d493 100644 --- a/soil/web-remote.sh +++ b/soil/web-remote.sh @@ -260,7 +260,7 @@ make-job-wwz() { zip -q -r $wwz \ index.html \ _test \ - _tmp/{soil,spec,stateful,syscall,benchmark-data,metrics,mycpp-examples,compute,gc,vm-baseline,osh-runtime} \ + _tmp/{soil,spec,stateful,syscall,benchmark-data,metrics,mycpp-examples,compute,gc,vm-baseline,osh-runtime,osh-parser} \ web/{base,spec-code,spec-tests,spec-cpp,line-counts,benchmarks}.css web/ajax.js \ web/table/table-sort.{css,js} \ _release/oil*.tar _release/VERSION/doc diff --git a/soil/worker.sh b/soil/worker.sh index 7664dc9eba..5e28d3ff80 100755 --- a/soil/worker.sh +++ b/soil/worker.sh @@ -176,13 +176,15 @@ benchmarks-tasks() { cat <