Permalink
Browse files

Adjust the precision of floating point numbers in the benchmark tables.

Most measurements need milliseconds, not fractions of a millisecond.

- The R code now has ways to specify precision by column.
- And csv2html.py reads precision out of the schema.

Also:

- A benchmark index
- Move tasks.txt out of the way in osh-{parser,runtime}.sh.
  • Loading branch information...
Andy Chu
Andy Chu committed Dec 8, 2017
1 parent 001647c commit 6a4683b70865c4826ae1d800c33733bcfacfda01
Showing with 114 additions and 20 deletions.
  1. +1 −1 benchmarks/osh-parser.sh
  2. +1 −1 benchmarks/osh-runtime.sh
  3. +45 −10 benchmarks/report.R
  4. +10 −0 benchmarks/report.sh
  5. +3 −1 scripts/release.sh
  6. +22 −0 web/table/csv2html-test.sh
  7. +32 −7 web/table/csv2html.py
View
@@ -110,7 +110,7 @@ measure() {
# Write Header of the CSV file that is appended to.
echo $HEADER > $times_out
local tasks=$raw_dir/tasks.txt
local tasks=$BASE_DIR/tasks.txt
print-tasks $provenance > $tasks
# Run them all
@@ -239,7 +239,7 @@ measure() {
# Write Header of the CSV file that is appended to.
echo $HEADER > $times_out
local tasks=$raw_dir/tasks.txt
local tasks=$BASE_DIR/tasks.txt
print-tasks $provenance > $tasks
# An empty pattern matches every line.
View
@@ -34,19 +34,46 @@ benchmarkDataLink = function(subdir, name, suffix) {
subdir, name, suffix)
}
# Same precision for all columns.
SamePrecision = function(precision = 1) {
return(function(column_name) {
precision
})
}
# Precision by column.
ColumnPrecision = function(precision_map, default = 1) {
return(function(column_name) {
p = precision_map[[column_name]]
if (is.null(p)) {
default
} else {
p
}
})
}
# Write a CSV file along with a schema.
writeCsv = function(table, prefix) {
#
# precision: list(column name -> integer precision)
writeCsv = function(table, prefix, precision_func = NULL) {
data_out_path = paste0(prefix, '.csv')
write.csv(table, data_out_path, row.names = F)
fieldType = function(field_name) { typeof(table[[field_name]]) }
getFieldType = function(field_name) { typeof(table[[field_name]]) }
types_list = lapply(names(table), fieldType)
types = as.character(types_list)
if (is.null(precision_func)) {
precision_func = function(column_name) { 1 }
}
types_list = lapply(names(table), getFieldType)
precision_list = lapply(names(table), precision_func)
print(precision_list)
schema = data_frame(
column_name = names(table),
type = types
type = as.character(types_list),
precision = as.character(precision_list)
)
schema_out_path = paste0(prefix, '.schema.csv')
write.csv(schema, schema_out_path, row.names = F)
@@ -196,8 +223,12 @@ ParserReport = function(in_dir, out_dir) {
print(raw_data_table)
writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
writeCsv(shell_summary, file.path(out_dir, 'summary'))
writeCsv(elapsed, file.path(out_dir, 'elapsed'))
precision = ColumnPrecision(list(total_ms = 0)) # round to nearest millisecond
writeCsv(shell_summary, file.path(out_dir, 'summary'), precision)
precision = SamePrecision(0) # round to nearest millisecond
writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
writeCsv(rate, file.path(out_dir, 'rate'))
writeCsv(vm_table, file.path(out_dir, 'virtual-memory'))
@@ -295,7 +326,9 @@ RuntimeReport = function(in_dir, out_dir) {
print(vm)
WriteDetails(distinct_hosts, distinct_shells, out_dir)
writeCsv(times, file.path(out_dir, 'times'))
precision = ColumnPrecision(list(bash = 0, dash = 0, osh = 0))
writeCsv(times, file.path(out_dir, 'times'), precision)
writeCsv(vm, file.path(out_dir, 'virtual-memory'))
Log('Wrote %s', out_dir)
@@ -339,8 +372,10 @@ OheapReport = function(in_dir, out_dir) {
print(ratios)
writeCsv(sizes, file.path(out_dir, 'encoding_size'))
writeCsv(ratios, file.path(out_dir, 'encoding_ratios'))
precision = SamePrecision(0)
writeCsv(sizes, file.path(out_dir, 'encoding_size'), precision)
precision = SamePrecision(2)
writeCsv(ratios, file.path(out_dir, 'encoding_ratios'), precision)
Log('Wrote %s', out_dir)
}
View
@@ -7,6 +7,7 @@ set -o nounset
set -o pipefail
set -o errexit
source test/common.sh # log
# TODO: Move stuff from osh-parser.sh, osh-runtime.sh, etc.
#
@@ -75,4 +76,13 @@ oheap() {
stage3 $base_dir
}
# For view
dev-index() {
local out=_tmp/benchmarks.html
for name in osh-parser osh-runtime vm-baseline oheap; do
echo "<a href=\"$name/index.html\">$name</a> <br/>"
done > $out
log "Wrote $out"
}
"$@"
View
@@ -172,7 +172,7 @@ print re.sub(
'
}
# TODO: It would be nice to column of bugs fixed / addressed!
# TODO: It would be nice to have a column of bugs fixed / addressed!
_git-changelog-body() {
local prev_branch=$1
@@ -335,6 +335,8 @@ compress() {
time zip -r -q $out . # recursive, quiet
popd
# TODO: _tmp/*/raw/ should't be included. Raw data is in ../benchmarks-data.
log "--- benchmarks/osh-parser"
local out="$root/benchmarks/osh-parser.wwz"
pushd _tmp/osh-parser/
View
@@ -44,11 +44,33 @@ EOF
./csv2html.py _tmp/bar.csv
}
test-precision() {
cat >_tmp/prec.csv <<EOF
name,age
andy,1.2345
bob,2.3456789
EOF
# NOTE: Columns are out of order, which is OK.
# type: could be html-anchor:shell-id, html-href:shell-id
cat >_tmp/prec.schema.csv <<EOF
column_name,type,precision
name,string,1
age,double,3
EOF
./csv2html.py _tmp/prec.csv
}
if test $# -eq 0; then
test-no-schema
echo '--'
test-schema
echo '--'
test-precision
else
"$@"
fi
View
@@ -74,11 +74,31 @@ def ColumnIndexHasHref(self, index):
class Schema:
def __init__(self, rows):
schema_col_names = rows[0]
assert schema_col_names[0] == 'column_name', schema_col_names[0]
assert schema_col_names[1] == 'type', schema_col_names[1]
body = rows[1:]
self.type_lookup = dict((row[0], row[1]) for row in body)
assert 'column_name' in schema_col_names, schema_col_names
assert 'type' in schema_col_names, schema_col_names
# Schema columns
s_cols = {}
s_cols['column_name'] = []
s_cols['type'] = []
s_cols['precision'] = []
for row in rows[1:]:
for i, cell in enumerate(row):
name = schema_col_names[i]
s_cols[name].append(cell)
self.type_lookup = dict(
(name, t) for (name, t) in
zip(s_cols['column_name'], s_cols['type']))
# NOTE: it's OK if precision is missing.
self.precision_lookup = dict(
(name, p) for (name, p) in
zip(s_cols['column_name'], s_cols['precision']))
#log('SCHEMA %s', schema_col_names)
#log('type_lookup %s', self.type_lookup)
#log('precision_lookup %s', self.precision_lookup)
self.col_names = None
self.col_has_href = None
@@ -114,6 +134,10 @@ def ColumnIndexHasHref(self, index):
Is the next one?
"""
return self.col_has_href[index]
def ColumnPrecision(self, index):
col_name = self.col_names[index]
return self.precision_lookup.get(col_name, 1) # default is arbitrary
def PrintRow(row, schema):
@@ -152,8 +176,9 @@ def PrintRow(row, schema):
except ValueError:
pass # NA
else:
# commas AND floating point
cell_str = '{:,.1f}'.format(cell_float)
# commas AND floating point to a given precision
precision = schema.ColumnPrecision(i)
cell_str = '{0:,.{precision}f}'.format(cell_float, precision=precision)
# Percentage
#cell_str = '{:.1f}%'.format(cell_float * 100)

0 comments on commit 6a4683b

Please sign in to comment.