Permalink
Browse files

Prepare to analyze the osh-runtime benchmark in R.

Renamed the R script so it can be used for many benchmarks.
  • Loading branch information...
Andy Chu
Andy Chu committed Dec 5, 2017
1 parent 5c998d6 commit a8bb5c6f952aa3194c7b90dcac648fe307f28196
Showing with 71 additions and 29 deletions.
  1. +7 −9 benchmarks/osh-parser.sh
  2. +22 −2 benchmarks/osh-runtime.sh
  3. +42 −18 benchmarks/{osh-parser.R → report.R}
View
@@ -1,6 +1,6 @@
#!/bin/bash
#
# Measure how fast the OSH parser is.a
# Measure how fast the OSH parser is.
#
# Usage:
# ./osh-parser.sh <function name>
@@ -10,6 +10,7 @@ set -o pipefail
set -o errexit
source test/common.sh # die
source benchmarks/common.sh # die
# TODO: The raw files should be published. In both
# ~/git/oilshell/benchmarks-data and also in the /release/ hierarchy?
@@ -92,6 +93,7 @@ print-tasks() {
done
}
readonly HEADER='status,elapsed_secs,host_name,host_hash,shell_name,shell_hash,path'
readonly NUM_COLUMNS=6 # 5 from provenance, 1 for file
# Figure out all tasks to run, and run them. When called from auto.sh, $2
@@ -113,7 +115,7 @@ all() {
local sorted=$SORTED
# Write Header of the CSV file that is appended to.
echo 'status,elapsed_secs,host_name,host_hash,shell_name,shell_hash,path' > $times_out
echo $HEADER > $times_out
local tasks=$raw_dir/tasks.txt
print-tasks $provenance > $tasks
@@ -148,10 +150,6 @@ fake-other-host() {
# Data Preparation and Analysis
#
csv-concat() {
tools/csv_concat.py "$@"
}
stage1() {
local raw_dir=${1:-_tmp/osh-parser/raw}
#local raw_dir=${1:-../benchmark-data/osh-parser}
@@ -197,12 +195,12 @@ stage1() {
}
stage2() {
local out=_tmp/osh-parser/stage2
local out=$BASE_DIR/stage2
mkdir -p $out
benchmarks/osh-parser.R _tmp/osh-parser/stage1 $out
benchmarks/report.R osh-parser $BASE_DIR/stage1 $out
tree $BASE_DIR
tree $out
}
# TODO:
View
@@ -10,6 +10,7 @@ set -o pipefail
set -o errexit
source test/common.sh
source benchmarks/common.sh # csv-concat
readonly BASE_DIR=_tmp/osh-runtime
readonly TAR_DIR=$PWD/$BASE_DIR # Make it absolute
@@ -97,8 +98,6 @@ conf-task() {
pushd $conf_dir >/dev/null
touch __TIMESTAMP
# TODO: write timestamp
# exit code, time in seconds, host_hash, shell_hash, path. \0
# would have been nice here!
$time_tool \
@@ -193,6 +192,27 @@ all() {
cp -v $provenance $raw_dir
}
stage1() {
local raw_dir=${1:-$BASE_DIR/raw}
local out_dir=$BASE_DIR/stage1
mkdir -p $out_dir
# Just copy for now
cp -v $raw_dir/*.times.csv $out_dir/times.csv
#local raw_dir=${1:-../benchmark-data/osh-parser}
}
stage2() {
local out=$BASE_DIR/stage2
mkdir -p $out
benchmarks/report.R osh-runtime $BASE_DIR/stage1 $out
tree $out
}
#
# Non-configure scripts
#
@@ -27,10 +27,25 @@ sourceUrl2 = function(filename) {
filename)
}
main = function(argv) {
in_dir = argv[[1]]
out_dir = argv[[2]]
# Write a CSV file along with a schema.
writeCsv = function(table, prefix) {
data_out_path = paste0(prefix, '.csv')
write.csv(table, data_out_path, row.names = F)
fieldType = function(field_name) { typeof(table[[field_name]]) }
types_list = lapply(names(table), fieldType)
types = as.character(types_list)
schema = data_frame(
column_name = names(table),
type = types
)
schema_out_path = paste0(prefix, '.schema.csv')
write.csv(schema, schema_out_path, row.names = F)
}
ParserReport = function(in_dir, out_dir) {
times = read.csv(file.path(in_dir, 'times.csv'))
lines = read.csv(file.path(in_dir, 'lines.csv'))
raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
@@ -203,26 +218,35 @@ main = function(argv) {
writeCsv(vm_table, file.path(out_dir, 'virtual-memory'))
Log('Wrote %s', out_dir)
Log('PID %d done', Sys.getpid())
}
# Write a CSV file along with a schema.
writeCsv = function(table, prefix) {
data_out_path = paste0(prefix, '.csv')
write.csv(table, data_out_path, row.names = F)
RuntimeReport = function(in_dir, out_dir) {
times = read.csv(file.path(in_dir, 'times.csv'))
fieldType = function(field_name) { typeof(table[[field_name]]) }
print(summary(times))
print(head(times))
types_list = lapply(names(table), fieldType)
types = as.character(types_list)
#lines = read.csv(file.path(in_dir, 'lines.csv'))
#raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
#vm = read.csv(file.path(in_dir, 'virtual-memory.csv'))
schema = data_frame(
column_name = names(table),
type = types
)
schema_out_path = paste0(prefix, '.schema.csv')
write.csv(schema, schema_out_path, row.names = F)
#writeCsv(host_table, file.path(out_dir, 'hosts'))
Log('Wrote %s', out_dir)
}
main = function(argv) {
action = argv[[1]]
in_dir = argv[[2]]
out_dir = argv[[3]]
if (action == 'osh-parser') {
ParserReport(in_dir, out_dir)
} else if (action == 'osh-runtime') {
RuntimeReport(in_dir, out_dir)
} else {
Log("Invalid action '%s'", action)
quit(status = 1)
}
Log('PID %d done', Sys.getpid())
}
if (length(sys.frames()) == 0) {

0 comments on commit a8bb5c6

Please sign in to comment.