Permalink
Browse files

Run the osh-parser benchmarks on different platforms.

- Added shell-id and platform-id concecepts in benchmarks/id.sh.
  Hopefully this will retain the provenance of the data over the long
  term, so we can do historical comparisons.
- Adjust the summariation and report
  • Loading branch information...
Andy Chu
Andy Chu committed Oct 30, 2017
1 parent 547056e commit 802064650618851d53013699b283f2af877e6c4d
Showing with 398 additions and 170 deletions.
  1. +30 −0 benchmarks/NOTES.txt
  2. +233 −0 benchmarks/id.sh
  3. +57 −46 benchmarks/osh-parser.R
  4. +78 −124 benchmarks/osh-parser.sh
View
@@ -0,0 +1,30 @@
OSH 0.2 comparisons
-------------------
dimension: shell runtime
- osh vs. bash vs. dash etc.
dimension: Python runtime (including toolchain flags, which is -O3)
- _bin/osh vs bin/osh
ovm vs system-python
dimension: machine (cpu, kernel, etc.)
- flanders vs. lisa
OSH 0.3 comparisons
-------------------
dimension: Python bytecode compiler
- opy vs. cpython
dimension: C compiler
- clang vs gcc
dimension: C compiler flags
- -O2 vs. -O3
- -m32 ?
TODO:
- Should add bytecode compiler version to oil --version
- What about compiler flags?
View
@@ -0,0 +1,233 @@
#!/bin/bash
#
# Keep track of benchmark data provenance.
#
# Usage:
# ./id.sh <function name>
set -o nounset
set -o pipefail
set -o errexit
# TODO: add benchmark labels/hashes for osh and all other shells
#
# Need to archive labels too.
#
# TODO: How do I make sure the zsh label is current? Across different
# machines?
#
# What happens when zsh is silently upgraded?
# I guess before every benchmark, you have to run the ID collection. Man
# that is a lot of code.
#
# Should I make symlinks to the published location?
#
# Maybe bash/dash/mksh/zsh should be invoked through a symlink?
# Every symlink is a shell runtime version, and it has an associated
# toolchain?
# Platform is ambient?
# _tmp/
# shell-id/
# bash/
# HASH.txt
# version.txt
# dash/
# HASH.txt
# version.txt
# platform-id/
# lisa/
# HASH.txt
# cpuinfo.txt
# cpuinfo.txt
# ../benchmark-data/
# shell-id/
# bash-$HASH/
# osh-$HASH/ # osh-cpython, osh-ovm? osh-opy-ovm? Too many dimensions.
# # the other shells don't have this?
# zsh-$HASH/
# platform-id/
# lisa-$HASH/
die() {
echo "FATAL: $@" 1>&2
exit 1
}
_dump-if-exists() {
local path=$1
local out=$2
test -f $path || return
cat $path > $out
}
#
# Shell ID
#
dump-shell-id() {
local sh=$1 # path to the shell
local name
name=$(basename $sh)
local out_dir=${2:-_tmp/shell-id/$name}
mkdir -p $out_dir
# Add extra repository info for osh.
case $sh in
bin/osh|_bin/osh)
local branch
branch=$(git rev-parse --abbrev-ref HEAD)
echo $branch > $out_dir/git-branch.txt
git rev-parse $branch > $out_dir/git-commit-hash.txt
;;
esac
case $name in
bash|zsh)
$sh --version > $out_dir/version.txt
;;
osh)
$sh --version > $out_dir/osh-version.txt
;;
dash|mksh)
# These don't have version strings!
dpkg -s $name > $out_dir/dpkg-version.txt
;;
*)
die "Invalid shell '$name'"
;;
esac
}
_shell-id-hash() {
local src=$1
local file
file=$src/version.txt
test -f $file && cat $file
# Only hash the dimensions we want to keep
file=$src/dpkg-version.txt
test -f $file && egrep '^Version' $file
# Interpreter as CPython vs. OVM is what we care about now.
file=$src/osh-version.txt
test -f $file && egrep '^Oil version|^Interpreter' $file
# For OSH
file=$src/git-commit-hash.txt
test -f $file && cat $file
return 0
}
# Writes a short ID to stdout.
publish-shell-id() {
local src=$1 # e.g. _tmp/shell-id/osh
local dest_base=${2:-../benchmark-data/shell-id}
local name=$(basename $src)
local hash
# Problem: OSH is built on each machine. Get rid of the release date?
# And use the commit hash or what?
hash=$(_shell-id-hash $src | md5sum) # not secure, an identifier
local id="$name-${hash:0:8}"
local dest="$dest_base/$id"
mkdir -p $dest
cp --no-target-directory --recursive $src/ $dest/
echo $hash > $dest/HASH.txt
ls -l $dest 1>&2
echo $id
}
#
# Platform ID
#
# Events that will change the env for a given machine:
# - kernel upgrade
# - distro upgrade
# How about ~/git/oilshell/benchmark-data/platform-id/lisa-$HASH
# How to calculate the hash though?
dump-platform-id() {
local out_dir=${1:-_tmp/platform-id/$(hostname)}
mkdir -p $out_dir
hostname > $out_dir/hostname.txt
# does it make sense to do individual fields like -m?
# avoid parsing?
# We care about the kernel and the CPU architecture.
# There is a lot of redundant information there.
uname -m > $out_dir/machine.txt
# machine
{ uname --kernel-release
uname --kernel-version
} > $out_dir/kernel.txt
_dump-if-exists /etc/lsb-release $out_dir/lsb-release.txt
cat /proc/cpuinfo > $out_dir/cpuinfo.txt
# mem info doesn't make a difference? I guess it's just nice to check that
# it's not swapping. But shouldn't be part of the hash.
cat /proc/meminfo > $out_dir/meminfo.txt
head $out_dir/*
}
# There is already concept of the triple?
# http://wiki.osdev.org/Target_Triplet
# It's not exactly the same as what we need here, but close.
_platform-id-hash() {
local src=$1
# Don't hash CPU or memory
#cat $src/cpuinfo.txt
#cat $src/hostname.txt # e.g. lisa
cat $src/machine.txt # e.g. x86_64
cat $src/kernel.txt
# OS
local file=$src/lsb-release.txt
test -f $file && cat $file
return 0
}
# Writes a short ID to stdout.
publish-platform-id() {
local src=$1 # e.g. _tmp/platform-id/lisa
local dest_base=${2:-../benchmark-data/platform-id}
local name=$(basename $src)
local hash
hash=$(_platform-id-hash $src | md5sum) # not secure, an identifier
local id="$name-${hash:0:8}"
local dest="$dest_base/$id"
mkdir -p $dest
cp --no-target-directory --recursive $src/ $dest/
echo $hash > $dest/HASH.txt
ls -l $dest 1>&2
echo $id
}
"$@"
Oops, something went wrong.

0 comments on commit 8020646

Please sign in to comment.