Permalink
Browse files

Add virtual memory benchmarks.

- The 'vm-baseline' benchmark compares OSH vs bash/dash/mksh.  OSH uses
  more memory than any of them.  Notably, the app bundle uses
  significantly less than the dev version running under CPython.
- Add a --dump-proc-status-to hook in bin/oil.py to measure virtual
  memory after parsing.
- Use this flag in benchmarks/osh-parser.sh.  The amount varies by file.
  It can get over 200 MB for the largest file in OSH!
  • Loading branch information...
Andy Chu
Andy Chu committed Nov 18, 2017
1 parent 4d1771a commit 3a6c86eb7e6833d779e2787bd84cb37888086ea4
Showing with 127 additions and 16 deletions.
  1. +10 −3 benchmarks/id.sh
  2. +27 −13 benchmarks/osh-parser.sh
  3. +72 −0 benchmarks/virtual-memory.sh
  4. +18 −0 bin/oil.py
View
@@ -55,6 +55,10 @@ die() {
exit 1
}
log() {
echo "$@" 1>&2
}
_dump-if-exists() {
local path=$1
local out=$2
@@ -145,7 +149,8 @@ publish-shell-id() {
echo $hash > $dest/HASH.txt
ls -l $dest 1>&2
#ls -l $dest 1>&2 # don't write to stdout
log "Published shell ID to $dest"
echo $id
}
@@ -185,7 +190,7 @@ dump-host-id() {
# it's not swapping. But shouldn't be part of the hash.
cat /proc/meminfo > $out_dir/meminfo.txt
head $out_dir/*
#head $out_dir/* 1>&2 # don't write to stdout
}
# There is already concept of the triple?
@@ -226,7 +231,9 @@ publish-host-id() {
echo $hash > $dest/HASH.txt
ls -l $dest 1>&2
#ls -l $dest 1>&2
log "Published host ID to $dest"
echo $id
}
View
@@ -29,11 +29,12 @@ import-files() {
sh-one() {
local append_out=$1
local sh_path=$2
local host_name=$3
local host_hash=$4
local shell_hash=$5
local path=$6
local vm_out_dir=$2
local sh_path=$3
local host_name=$4
local host_hash=$5
local shell_hash=$6
local path=$7
echo "--- $sh_path $path ---"
local shell_name
@@ -43,7 +44,17 @@ sh-one() {
# 4.4.
extra_args=''
if test "$shell_name" = 'osh'; then
extra_args='--ast-format none'
#extra_args='--ast-format none'
local script_name
local vm_out_path
script_name=$(basename $path)
vm_out_path="${vm_out_dir}/${shell_name}-${shell_hash}__${script_name}.txt"
extra_args="--dump-proc-status-to $vm_out_path"
# And then add that as --field?
# This adds 0.01 seconds?
# or shell_hash
# Then you need a Python or R script to make a CSV file out of VmPeak VmRSS
# etc.
fi
# exit code, time in seconds, host_hash, shell_hash, path. \0
@@ -93,19 +104,22 @@ run() {
local job_id
job_id="$host.$(date +%Y-%m-%d__%H-%M-%S)"
local out_dir='../benchmark-data/osh-parser/'
local out="$out_dir/$job_id.times.csv"
local out_dir='../benchmark-data/osh-parser'
local times_out="$out_dir/$job_id.times.csv"
local lines_out="$out_dir/$job_id.lines.csv"
local vm_out_dir="$out_dir/$job_id.virtual-memory"
mkdir -p \
$(dirname $out) \
$(dirname $times_out) \
$vm_out_dir \
$BASE_DIR/{tmp,raw,stage1,www}
write-sorted-manifest '' $lines_out
local sorted=$SORTED
# Write Header of the CSV file that is appended to.
echo 'status,elapsed_secs,host_name,host_hash,shell_name,shell_hash,path' > $out
echo 'status,elapsed_secs,host_name,host_hash,shell_name,shell_hash,path' \
> $times_out
local tmp_dir=_tmp/host-id/$host
benchmarks/id.sh dump-host-id $tmp_dir
@@ -134,12 +148,12 @@ run() {
if ! test -n "$preview"; then
# 20ms for ltmain.sh; 34ms for configure
cat $sorted | xargs -n 1 -- $0 \
sh-one $out $sh_path $host $host_hash $shell_hash || true
sh-one $times_out $vm_out_dir $sh_path $host $host_hash $shell_hash || true
fi
done
cat $out
echo "Wrote $out"
cat $times_out
echo "Wrote $times_out, $lines_out, and $vm_out_dir/"
}
# TODO:
@@ -0,0 +1,72 @@
#!/bin/bash
#
# Usage:
# ./virtual-memory.sh <function name>
set -o nounset
set -o pipefail
set -o errexit
# TODO: What format should this be recorded in?
# I think a Python script can parse it to CSV / TSV2.
# Use benchmark/id.sh too
baseline() {
local host=$(hostname)
local job_id="$host.$(date +%Y-%m-%d__%H-%M-%S)"
local out_dir="../benchmark-data/vm-baseline/$job_id"
mkdir -p $out_dir
local tmp_dir
tmp_dir=_tmp/host-id/$host
benchmarks/id.sh dump-host-id $tmp_dir
local host_hash=$(benchmarks/id.sh publish-host-id $tmp_dir)
echo $host $host_hash
local shell_hash
# NOTE: for some reason zsh when printing /proc/$$/status gets a cat process,
# not a zsh process? Check out /proc/$$/maps too. Omitting it for now.
for sh_path in bash dash mksh bin/osh _bin/osh; do
echo "--- $sh_path"
local sh_name=$(basename $sh_path)
tmp_dir=_tmp/shell-id/$sh_name
benchmarks/id.sh dump-shell-id $sh_path $tmp_dir
shell_hash=$(benchmarks/id.sh publish-shell-id $tmp_dir)
# There is a race condition on the status but sleep helps.
local out="$out_dir/${sh_name}-${shell_hash}.txt"
$sh_path -c 'sleep 0.001; cat /proc/$$/status' > $out
echo "Wrote $out"
echo
done
}
# TODO: parse 10 osh-parser files, measure virtual memory at the end. However
# this only applies to OSH, because you need a hook to dump the /proc/$$/status
# file.
demo() {
local out=_tmp/virtual-memory
mkdir -p $out
# VmRSS: 46 MB for abuild, 200 MB for configure! That is bad. This
# benchmark really is necessary.
local input=benchmarks/testdata/abuild
#local input=benchmarks/testdata/configure
bin/osh \
--dump-proc-status-to $out/demo.txt \
$input
cat $out/demo.txt
}
"$@"
View
@@ -194,6 +194,9 @@ def OshMain(argv, login_shell):
spec.LongFlag('--trace', ['cmd-parse', 'word-parse', 'lexer']) # NOTE: can only trace one now
spec.LongFlag('--hijack-shebang')
# For benchmarks/virtual-memory.sh.
spec.LongFlag('--dump-proc-status-to', args.Str)
builtin.AddOptionsToArgSpec(spec)
try:
@@ -355,6 +358,21 @@ def OshMain(argv, login_shell):
if exec_opts.noexec:
do_exec = False
# Do this after parsing the entire file. There could be another option to
# do it before exiting runtime?
if opts.dump_proc_status_to:
import time
# This might be superstition, but we want to let the value stabilize
# after parsing. bash -c 'cat /proc/$$/status' gives different results
# with a sleep.
time.sleep(0.001)
input_path = '/proc/%d/status' % os.getpid()
with open(input_path) as f, open(opts.dump_proc_status_to, 'w') as f2:
contents = f.read()
f2.write(contents)
log('Wrote %s to %s', input_path, opts.dump_proc_status_to)
sys.exit(0)
# -n prints AST, --show-ast prints and executes
if exec_opts.noexec or opts.show_ast:
if opts.ast_format == 'none':

0 comments on commit 3a6c86e

Please sign in to comment.