|
|
@@ -16,28 +16,45 @@ source test/common.sh # die |
|
|
readonly BASE_DIR=_tmp/osh-parser
|
|
|
readonly SORTED=$BASE_DIR/tmp/sorted.txt
|
|
|
|
|
|
# Where we copied them from.
|
|
|
import-files() {
|
|
|
grep -v '^#' benchmarks/osh-parser-originals.txt |
|
|
|
xargs --verbose -I {} -- cp {} benchmarks/testdata
|
|
|
}
|
|
|
|
|
|
# NOTE --ast-format none eliminates print time! That is more than
|
|
|
# half of it! ( 60 seconds with serialization, 29 seconds without.)
|
|
|
# TODO: That is the only difference... hm.
|
|
|
#
|
|
|
# TODO:
|
|
|
# - Have OSH --parse-and-dump-path
|
|
|
# - it can dump /proc/self/meminfo
|
|
|
|
|
|
sh-one() {
|
|
|
local append_out=$1
|
|
|
local vm_out_dir=$2
|
|
|
local sh_path=$3
|
|
|
local host_name=$4
|
|
|
local host_hash=$5
|
|
|
write-sorted-manifest() {
|
|
|
local files=${1:-benchmarks/osh-parser-files.txt}
|
|
|
local counts=$BASE_DIR/tmp/line-counts.txt
|
|
|
local csv_out=$2
|
|
|
|
|
|
# Remove comments and sort by line count
|
|
|
grep -v '^#' $files | xargs wc -l | sort -n > $counts
|
|
|
|
|
|
# Raw list of paths
|
|
|
cat $counts | awk '$2 != "total" { print $2 }' > $SORTED
|
|
|
|
|
|
# Make a CSV file from wc output
|
|
|
cat $counts | awk '
|
|
|
BEGIN { print "num_lines,path" }
|
|
|
$2 != "total" { print $1 "," $2 }' \
|
|
|
> $csv_out
|
|
|
}
|
|
|
|
|
|
# Calls by xargs with a task row.
|
|
|
parser-task() {
|
|
|
local raw_dir=$1 # output
|
|
|
local job_id=$2
|
|
|
local host=$3
|
|
|
local host_hash=$4
|
|
|
local sh_path=$5
|
|
|
local shell_hash=$6
|
|
|
local path=$7
|
|
|
echo "--- $sh_path $path ---"
|
|
|
local script_path=$7
|
|
|
|
|
|
echo "--- $sh_path $script_path ---"
|
|
|
|
|
|
local times_out="$raw_dir/$host.$job_id.times.csv"
|
|
|
local vm_out_dir="$raw_dir/$host.$job_id.virtual-memory"
|
|
|
mkdir -p $vm_out_dir
|
|
|
|
|
|
local shell_name
|
|
|
shell_name=$(basename $sh_path)
|
|
|
@@ -49,110 +66,84 @@ sh-one() { |
|
|
#extra_args='--ast-format none'
|
|
|
local script_name
|
|
|
local vm_out_path
|
|
|
script_name=$(basename $path)
|
|
|
script_name=$(basename $script_path)
|
|
|
vm_out_path="${vm_out_dir}/${shell_name}-${shell_hash}__${script_name}.txt"
|
|
|
extra_args="--dump-proc-status-to $vm_out_path"
|
|
|
# And then add that as --field?
|
|
|
# This adds 0.01 seconds?
|
|
|
# or shell_hash
|
|
|
# Then you need a Python or R script to make a CSV file out of VmPeak VmRSS
|
|
|
# etc.
|
|
|
|
|
|
# Should we add a field here to say it has VM stats?
|
|
|
fi
|
|
|
|
|
|
# exit code, time in seconds, host_hash, shell_hash, path. \0
|
|
|
# would have been nice here!
|
|
|
benchmarks/time.py \
|
|
|
--output $append_out \
|
|
|
--field "$host_name" --field "$host_hash" \
|
|
|
--output $times_out \
|
|
|
--field "$host" --field "$host_hash" \
|
|
|
--field "$shell_name" --field "$shell_hash" \
|
|
|
--field "$path" -- \
|
|
|
"$sh_path" -n $extra_args "$path" || echo FAILED
|
|
|
--field "$script_path" -- \
|
|
|
"$sh_path" -n $extra_args "$script_path" || echo FAILED
|
|
|
}
|
|
|
|
|
|
import-files() {
|
|
|
grep -v '^#' benchmarks/osh-parser-originals.txt |
|
|
|
xargs --verbose -I {} -- cp {} benchmarks/testdata
|
|
|
}
|
|
|
# For each shell, print 10 script paths.
|
|
|
print-tasks() {
|
|
|
local provenance=$1
|
|
|
|
|
|
write-sorted-manifest() {
|
|
|
local files=${1:-benchmarks/osh-parser-files.txt}
|
|
|
local counts=$BASE_DIR/raw/line-counts.txt
|
|
|
local csv=$2
|
|
|
|
|
|
# Remove comments and sort by line count
|
|
|
grep -v '^#' $files | xargs wc -l | sort -n > $counts
|
|
|
|
|
|
# Raw list of paths
|
|
|
cat $counts | awk '$2 != "total" { print $2 }' > $SORTED
|
|
|
|
|
|
# Make a LINES_CSV from wc output
|
|
|
cat $counts | awk '
|
|
|
BEGIN { print "num_lines,path" }
|
|
|
$2 != "total" { print $1 "," $2 }' \
|
|
|
> $csv
|
|
|
|
|
|
cat $SORTED
|
|
|
echo ---
|
|
|
cat $csv
|
|
|
# Add 1 field for each of 5 fields.
|
|
|
cat $provenance | while read fields; do
|
|
|
cat $sorted | xargs -n 1 -- echo $fields
|
|
|
done
|
|
|
}
|
|
|
|
|
|
# runtime_id, host_hash, toolchain_id (which sometimes you don't know)
|
|
|
# 5 from provenance, 1 for file
|
|
|
readonly NUM_COLUMNS=6
|
|
|
|
|
|
run() {
|
|
|
local preview=${1:-}
|
|
|
local host
|
|
|
host=$(hostname)
|
|
|
# Figure out all tasks to run, and run them. When called from auto.sh, $2
|
|
|
# should be the ../benchmarks-data repo.
|
|
|
all() {
|
|
|
local provenance=$1
|
|
|
local raw_dir=${2:-$BASE_DIR/raw}
|
|
|
|
|
|
local job_id
|
|
|
job_id="$host.$(date +%Y-%m-%d__%H-%M-%S)"
|
|
|
# Job ID is everything up to the first dot in the filename.
|
|
|
local name=$(basename $provenance)
|
|
|
local prefix=${name%.provenance.txt} # strip suffix
|
|
|
|
|
|
local out_dir='../benchmark-data/osh-parser'
|
|
|
local times_out="$out_dir/$job_id.times.csv"
|
|
|
local lines_out="$out_dir/$job_id.lines.csv"
|
|
|
local vm_out_dir="$out_dir/$job_id.virtual-memory"
|
|
|
local times_out="$raw_dir/$prefix.times.csv"
|
|
|
local lines_out="$raw_dir/$prefix.lines.csv"
|
|
|
|
|
|
mkdir -p \
|
|
|
$(dirname $times_out) \
|
|
|
$vm_out_dir \
|
|
|
$BASE_DIR/{tmp,raw,stage1,www}
|
|
|
mkdir -p $BASE_DIR/{tmp,raw,stage1,www}
|
|
|
|
|
|
write-sorted-manifest '' $lines_out
|
|
|
local sorted=$SORTED
|
|
|
|
|
|
# Write Header of the CSV file that is appended to.
|
|
|
echo 'status,elapsed_secs,host_name,host_hash,shell_name,shell_hash,path' \
|
|
|
> $times_out
|
|
|
|
|
|
local tmp_dir=_tmp/host-id/$host
|
|
|
benchmarks/id.sh dump-host-id $tmp_dir
|
|
|
echo 'status,elapsed_secs,host_name,host_hash,shell_name,shell_hash,path' > $times_out
|
|
|
|
|
|
local host_hash
|
|
|
host_hash=$(benchmarks/id.sh publish-host-id $tmp_dir)
|
|
|
echo $host $host_hash
|
|
|
local tasks=$raw_dir/tasks.txt
|
|
|
print-tasks $provenance > $tasks
|
|
|
|
|
|
local shell_hash
|
|
|
# Run them all
|
|
|
cat $tasks | xargs -n $NUM_COLUMNS -- $0 parser-task $raw_dir
|
|
|
|
|
|
#for sh_path in bash dash mksh zsh; do
|
|
|
for sh_path in bash dash mksh zsh bin/osh _bin/osh; do
|
|
|
# There will be two different OSH
|
|
|
local name=$(basename $sh_path)
|
|
|
|
|
|
tmp_dir=_tmp/shell-id/$name
|
|
|
benchmarks/id.sh dump-shell-id $sh_path $tmp_dir
|
|
|
|
|
|
shell_hash=$(benchmarks/id.sh publish-shell-id $tmp_dir)
|
|
|
cp -v $provenance $raw_dir
|
|
|
}
|
|
|
|
|
|
echo "$sh_path ID: $shell_hash"
|
|
|
#
|
|
|
# Testing
|
|
|
#
|
|
|
|
|
|
if ! test -n "$preview"; then
|
|
|
# 20ms for ltmain.sh; 34ms for configure
|
|
|
cat $sorted | xargs -n 1 -- $0 \
|
|
|
sh-one $times_out $vm_out_dir $sh_path $host $host_hash $shell_hash || true
|
|
|
# Copy data so it looks like it's from another host
|
|
|
fake-other-host() {
|
|
|
local dir=${1:-_tmp/osh-parser/raw}
|
|
|
for entry in $dir/lisa*; do
|
|
|
local fake=${entry/lisa/flanders}
|
|
|
#echo $entry $fake
|
|
|
mv -v $entry $fake
|
|
|
|
|
|
# The host ID isn't changed, but that's OK.
|
|
|
# provencence.txt has host names.
|
|
|
if test -f $fake; then
|
|
|
sed -i 's/lisa/flanders/g' $fake
|
|
|
fi
|
|
|
done
|
|
|
|
|
|
cat $times_out
|
|
|
echo "Wrote $times_out, $lines_out, and $vm_out_dir/"
|
|
|
}
|
|
|
|
|
|
#
|
|
|
@@ -164,18 +155,21 @@ csv-concat() { |
|
|
}
|
|
|
|
|
|
stage1() {
|
|
|
local raw_dir=${1:-_tmp/osh-parser/raw}
|
|
|
#local raw_dir=${1:-../benchmark-data/osh-parser}
|
|
|
|
|
|
local out=_tmp/osh-parser/stage1
|
|
|
mkdir -p $out
|
|
|
|
|
|
local vm_csv=$out/virtual-memory.csv
|
|
|
local -a x=(../benchmark-data/osh-parser/flanders.*.virtual-memory)
|
|
|
local -a y=(../benchmark-data/osh-parser/lisa.*.virtual-memory)
|
|
|
local -a x=($raw_dir/flanders.*.virtual-memory)
|
|
|
local -a y=($raw_dir/lisa.*.virtual-memory)
|
|
|
benchmarks/virtual_memory.py osh-parser ${x[-1]} ${y[-1]} > $vm_csv
|
|
|
|
|
|
local times_csv=$out/times.csv
|
|
|
# Globs are in lexicographical order, which works for our dates.
|
|
|
local -a a=(../benchmark-data/osh-parser/flanders.*.times.csv)
|
|
|
local -a b=(../benchmark-data/osh-parser/lisa.*.times.csv)
|
|
|
local -a a=($raw_dir/flanders.*.times.csv)
|
|
|
local -a b=($raw_dir/lisa.*.times.csv)
|
|
|
csv-concat ${a[-1]} ${b[-1]} > $times_csv
|
|
|
|
|
|
# Construct a one-column CSV file
|
|
|
@@ -187,8 +181,8 @@ stage1() { |
|
|
|
|
|
# Verify that the files are equal, and pass one of them.
|
|
|
local lines_csv=$out/lines.csv
|
|
|
local -a c=(../benchmark-data/osh-parser/flanders.*.lines.csv)
|
|
|
local -a d=(../benchmark-data/osh-parser/lisa.*.lines.csv)
|
|
|
local -a c=($raw_dir/flanders.*.lines.csv)
|
|
|
local -a d=($raw_dir/lisa.*.lines.csv)
|
|
|
|
|
|
local left=${c[-1]}
|
|
|
local right=${d[-1]}
|
|
|
@@ -334,43 +328,6 @@ report() { |
|
|
stage3
|
|
|
}
|
|
|
|
|
|
_banner() {
|
|
|
echo -----
|
|
|
echo "$@"
|
|
|
echo -----
|
|
|
}
|
|
|
|
|
|
# Run the whole benchmark from a clean git checkout.
|
|
|
#
|
|
|
# Similar to scripts/release.sh build-and-test.
|
|
|
auto() {
|
|
|
test/spec.sh install-shells
|
|
|
|
|
|
# Technically we need build-essential too?
|
|
|
sudo apt install python-dev
|
|
|
|
|
|
build/dev.sh all
|
|
|
build/codegen.sh lexer
|
|
|
|
|
|
_banner 'OSH dev build'
|
|
|
bin/osh -c 'echo OSH dev build'
|
|
|
|
|
|
build/prepare.sh configure
|
|
|
build/prepare.sh build-python
|
|
|
|
|
|
make _bin/oil.ovm
|
|
|
# This does what 'install' does.
|
|
|
scripts/run.sh make-bin-links
|
|
|
|
|
|
_banner 'OSH production build'
|
|
|
|
|
|
_bin/osh -c 'echo OSH production build'
|
|
|
|
|
|
run # make observations
|
|
|
|
|
|
# Then summarize report can be done on a central machine?
|
|
|
}
|
|
|
|
|
|
time-test() {
|
|
|
benchmarks/time.py \
|
|
|
--field bash --field foo.txt --output _tmp/bench.csv \
|
|
|
|