View
@@ -29,28 +29,31 @@ import-files() {
sh-one() {
local append_out=$1
local sh=$2
local host=$3
local host_id=$4
local shell_id=$5
local sh_path=$2
local host_name=$3
local host_hash=$4
local shell_hash=$5
local path=$6
echo "--- $sh $path ---"
echo "--- $sh_path $path ---"
local shell_name
shell_name=$(basename $sh_path)
# Can't use array because of set -u bug!!! Only fixed in bash
# 4.4.
extra_args=''
if [[ $sh == */osh ]]; then
if test "$shell_name" = 'osh'; then
extra_args='--ast-format none'
fi
# exit code, time in seconds, host_id, shell_id, path. \0
# exit code, time in seconds, host_hash, shell_hash, path. \0
# would have been nice here!
benchmarks/time.py \
--output $append_out \
--field "$host" --field "$host_id" \
--field "$sh" --field "$shell_id" --field "$path" -- \
"$sh" -n $extra_args "$path" || echo FAILED
--field "$host_name" --field "$host_hash" \
--field "$shell_name" --field "$shell_hash" \
--field "$path" -- \
"$sh_path" -n $extra_args "$path" || echo FAILED
}
import-files() {
@@ -80,7 +83,7 @@ write-sorted-manifest() {
cat $csv
}
# runtime_id, host_id, toolchain_id (which sometimes you don't know)
# runtime_id, host_hash, toolchain_id (which sometimes you don't know)
run() {
local preview=${1:-}
@@ -102,16 +105,16 @@ run() {
local sorted=$SORTED
# Write Header of the CSV file that is appended to.
echo 'status,elapsed_secs,host,host_id,shell,shell_id,path' > $out
echo 'status,elapsed_secs,host_name,host_hash,shell_name,shell_hash,path' > $out
local tmp_dir=_tmp/platform-id/$host
benchmarks/id.sh dump-platform-id $tmp_dir
local tmp_dir=_tmp/host-id/$host
benchmarks/id.sh dump-host-id $tmp_dir
local host_id
host_id=$(benchmarks/id.sh publish-platform-id $tmp_dir)
echo $host $host_id
local host_hash
host_hash=$(benchmarks/id.sh publish-host-id $tmp_dir)
echo $host $host_hash
local shell_id
local shell_hash
#for sh_path in bash dash mksh zsh; do
for sh_path in bash dash mksh zsh bin/osh _bin/osh; do
@@ -121,17 +124,17 @@ run() {
tmp_dir=_tmp/shell-id/$name
benchmarks/id.sh dump-shell-id $sh_path $tmp_dir
shell_id=$(benchmarks/id.sh publish-shell-id $tmp_dir)
shell_hash=$(benchmarks/id.sh publish-shell-id $tmp_dir)
echo "$sh_path ID: $shell_id"
echo "$sh_path ID: $shell_hash"
# TODO: Shell ID should be separate columns?
# It's really shell_version_id?
if ! test -n "$preview"; then
# 20ms for ltmain.sh; 34ms for configure
cat $sorted | xargs -n 1 -- $0 \
sh-one $out $sh_path $host $host_id $shell_id || true
sh-one $out $sh_path $host $host_hash $shell_hash || true
fi
done
@@ -142,74 +145,109 @@ run() {
summarize() {
local out=_tmp/osh-parser/stage1
mkdir -p $out
benchmarks/osh-parser.R $out ../benchmark-data/osh-parser/*.times.csv
# Globs are in lexicographical order, which works for our dates.
local -a m1=(../benchmark-data/osh-parser/flanders.*.times.csv)
local -a m2=(../benchmark-data/osh-parser/lisa.*.times.csv)
# The last one
local -a latest=(${m1[-1]} ${m2[-1]})
benchmarks/osh-parser.R $out "${latest[@]}"
tree $BASE_DIR
}
# TODO:
# - maybe rowspan for hosts: flanders/lisa
# - does that interfere with sorting?
#
# NOTE: not bothering to make it sortable now. Just using the CSS.
_print-report() {
local base_url='../../../web/table'
cat <<EOF
<!DOCTYPE html>
<html>
<head>
<title>OSH Parser Benchmark</title>
<title>OSH Parser Performance</title>
<script type="text/javascript" src="$base_url/table-sort.js"></script>
<link rel="stylesheet" type="text/css" href="$base_url/table-sort.css" />
<style>
td { text-align: right; }
body {
margin: 0 auto;
width: 60em;
}
code { color: green; }
code {
color: green;
}
table {
margin-left: 3em;
font-family: sans-serif;
}
td {
padding: 8px; /* override default of 5px */
}
h3, h4 {
color: darkgreen;
}
/* these two tables are side by side */
#shells, #hosts {
display: inline-block;
vertical-align: top;
}
#home-link {
text-align: right;
}
/* columns */
#osh-ovm, #osh-cpython {
background-color: oldlace;
}
/* rows */
.osh-row {
background-color: oldlace;
}
</style>
</head>
<body>
<h2>OSH Parser Benchmark</h2>
<p id="home-link">
<a href="/">oilshell.org</a>
</p>
<h2>OSH Parser Performance</h2>
<p>We run <code>\$sh -n \$file</code> for various files under various
shells. This means that shell startup time is included in the
elapsed time measurements, but long files are chosen to minimize its
effect.</p>
<h3>Labels</h3>
<!-- TODO:
host ID | host label
[lisa-1234] lisa
[flanders-1234] flanders
shell ID | shell label
[osh-1234] osh-ovm
[osh-abcd] osh-host-cpython
-->
<h3>Summary</h3>
<table id="rate-summary">
EOF
web/table/csv_to_html.py < $BASE_DIR/stage1/rate_summary.csv
web/table/csv2html.py $BASE_DIR/stage1/summary.csv
cat <<EOF
</table>
<h3>Elasped Time by File and Shell (milliseconds)</h3>
<h3>Shell and Host Details</h3>
EOF
web/table/csv2html.py $BASE_DIR/stage1/shells.csv
web/table/csv2html.py $BASE_DIR/stage1/hosts.csv
cat <<EOF
<h3>Per-File Breakdown</h3>
<table id="elapsed">
<h4>Elasped Time in milliseconds</h4>
EOF
web/table/csv_to_html.py < $BASE_DIR/stage1/elapsed.csv
web/table/csv2html.py $BASE_DIR/stage1/elapsed.csv
cat <<EOF
</table>
<h3>Parsing Rate by File and Shell (lines/millisecond)</h3>
<table id="rate">
<h4>Parsing Rate in lines/millisecond</h4>
EOF
web/table/csv_to_html.py < $BASE_DIR/stage1/rate.csv
web/table/csv2html.py $BASE_DIR/stage1/rate.csv
cat <<EOF
</table>
</body>
</html>
EOF
View

This file was deleted.

Oops, something went wrong.
View
@@ -0,0 +1,56 @@
#!/bin/bash
#
# Usage:
# ./csv2html-test.sh <function name>
. ~/hg/taste/taste.sh
set -o nounset
set -o pipefail
set -o errexit
test-no-schema() {
cat >_tmp/foo.csv <<EOF
a_number,b
1,2
3,4
NA,4
EOF
./csv2html.py _tmp/foo.csv
}
test-schema() {
cat >_tmp/bar.csv <<EOF
name,name_HREF,num
spam,#spam,11
eggs,#eggs,22
ham,#ham,99
xxx,#xxx,123456
zzz,#zzz,NA
EOF
# NOTE: Columns are out of order, which is OK.
# type: could be html-anchor:shell-id, html-href:shell-id
cat >_tmp/bar.schema.csv <<EOF
column_name,type
num,integer
name,string
name_HREF,string
EOF
./csv2html.py _tmp/bar.csv
}
if test $# -eq 0; then
test-no-schema
echo '--'
test-schema
else
"$@"
fi
View
@@ -0,0 +1,311 @@
#!/usr/bin/python
"""
csv2html.py
Usage:
csv2html.py foo.csv
Attempts to read foo_schema.csv. If not it assumes everything is a string.
Things it handles:
- table-sort.js integration <colgroup>
- <table id="foo"> for making columns sortable
- for choosing the comparator to use!
- for highlighting on sort
- static / visual
- Aligning right for number, left for strings.
- highlighting NA numbers in red (only if it's considered a number)
- formatting numbers to a certain precision
- or displaying them as percentages
- changing CSV headers like 'elapsed_ms' to 'elapsed ms'
- Accepting a column with a '_HREF' suffix to make an HTML link
- We could have something like type:
string/anchor:shell-id
string/href:shell-id
- But the simple _HREF suffix is simpler. Easier to write R code for.
Implementation notes:
- To align right: need a class on every cell, e.g. "num". Can't do it through
<colgroup>.
- To color, can use <colgroup>. table-sort.js needs this.
TODO:
Does it make sense to implement <rowspan> and <colspan> ? It's nice for
visualization.
"""
import cgi
import csv
import optparse
import os
import sys
def log(msg, *args):
if args:
msg = msg % args
print >>sys.stderr, msg
class NullSchema:
def VerifyColumnNames(self, col_names):
pass
def IsNumeric(self, col_name):
return False
def ColumnIndexIsNumeric(self, index):
return False
def ColumnIndexIsInteger(self, index):
return False
def ColumnIndexHasHref(self, index):
return False
INTEGER_TYPES = ('integer',)
# for sorting, right-justification
NUMERIC_TYPES = ('double', 'number') + INTEGER_TYPES
class Schema:
def __init__(self, rows):
schema_col_names = rows[0]
assert schema_col_names[0] == 'column_name', schema_col_names[0]
assert schema_col_names[1] == 'type', schema_col_names[1]
body = rows[1:]
self.type_lookup = dict((row[0], row[1]) for row in body)
self.col_names = None
self.col_has_href = None
def VerifyColumnNames(self, col_names):
"""Assert that the column names we got are all in the schema."""
for name in col_names:
log('%s : %s', name, self.type_lookup[name])
n = len(col_names)
self.col_has_href = [False] * n
for i in xrange(n-1):
this_name, next_name= col_names[i], col_names[i+1]
if this_name + '_HREF' == next_name:
self.col_has_href[i] = True
log('href: %s', self.col_has_href)
self.col_names = col_names
def IsNumeric(self, col_name):
return self.type_lookup[col_name] in NUMERIC_TYPES
def ColumnIndexIsNumeric(self, index):
col_name = self.col_names[index]
return self.IsNumeric(col_name)
def ColumnIndexIsInteger(self, index):
col_name = self.col_names[index]
return self.type_lookup[col_name] in INTEGER_TYPES
def ColumnIndexHasHref(self, index):
"""
Is the next one?
"""
return self.col_has_href[index]
def PrintRow(row, schema):
"""Print a CSV row as HTML, using the given formatting.
Returns:
An array of booleans indicating whether each cell is a number.
"""
i = 0
n = len(row)
while True:
if i == n:
break
cell = row[i]
css_classes = []
cell_str = cell # by default, we don't touch it
if schema.ColumnIndexIsInteger(i):
css_classes.append('num') # right justify
try:
cell_int = int(cell)
except ValueError:
pass # NA?
else:
# commas AND floating point
cell_str = '{:,}'.format(cell_int)
# Look up by index now?
elif schema.ColumnIndexIsNumeric(i):
css_classes.append('num') # right justify
try:
cell_float = float(cell)
except ValueError:
pass # NA
else:
# commas AND floating point
cell_str = '{:,.1f}'.format(cell_float)
# Percentage
#cell_str = '{:.1f}%'.format(cell_float * 100)
# Special CSS class for R NA values.
if cell.strip() == 'NA':
css_classes.append('na') # make it red
if css_classes:
print ' <td class="{}">'.format(' '.join(css_classes)),
else:
print ' <td>',
# Advance to next row if it's an _HREF.
if schema.ColumnIndexHasHref(i):
i += 1
href = row[i]
s = '<a href="%s">%s</a>' % (cgi.escape(href), cgi.escape(cell_str))
else:
s = cgi.escape(cell_str)
print s,
print '</td>'
i += 1
def PrintColGroup(col_names, schema):
"""Print HTML colgroup element, used for JavaScript sorting."""
print ' <colgroup>'
for i, col in enumerate(col_names):
if col.endswith('_HREF'):
continue
# CSS class is used for sorting
if schema.IsNumeric(col):
css_class = 'number'
else:
css_class = 'case-insensitive'
# NOTE: id is a comment only; not used
print ' <col id="{}" type="{}" />'.format(col, css_class)
print ' </colgroup>'
def PrintTable(css_id, schema, col_names, rows):
print '<table id="%s">' % css_id
print ' <thead>'
print ' <tr>'
for i, col in enumerate(col_names):
if col.endswith('_HREF'):
continue
heading_str = cgi.escape(col.replace('_', ' '))
if schema.ColumnIndexIsNumeric(i):
print ' <td class="num">%s</td>' % heading_str
else:
print ' <td>%s</td>' % heading_str
print ' </tr>'
print ' </thead>'
print ' <tbody>'
for row in rows:
# TODO: There should be a special column called CSS_CLASS. Output that
# from R.
for cell in row:
if cell.startswith('osh'):
row_class = 'class="osh-row"'
break
else:
row_class = ''
print ' <tr {}>'.format(row_class)
PrintRow(row, schema)
print ' </tr>'
print ' </tbody>'
PrintColGroup(col_names, schema)
print '</table>'
def ReadCsv(f):
"""Read the CSV file, returning the column names and rows."""
c = csv.reader(f)
# The first row of the CSV is assumed to be a header. The rest are data.
col_names = []
rows = []
for i, row in enumerate(c):
if i == 0:
col_names = row
continue
rows.append(row)
return col_names, rows
def CreateOptionsParser():
p = optparse.OptionParser()
# We are taking a path, and not using stdin, because we read it twice.
p.add_option(
'--schema', dest='schema', metavar="PATH", type='str',
help='Path to the schema.')
return p
def main(argv):
(opts, argv) = CreateOptionsParser().parse_args(argv[1:])
try:
csv_path = argv[0]
except IndexError:
raise RuntimeError('Expected CSV filename.')
schema = None
if opts.schema:
try:
schema_f = open(opts.schema)
except IOError:
raise RuntimeError('Error opening %s: %s' % (opts.schema, f))
else:
schema_path = csv_path.replace('.csv', '.schema.csv')
log('schema path %s', schema_path)
try:
schema_f = open(schema_path)
except IOError:
schema_f = None # allowed to have no schema
if schema_f:
r = csv.reader(schema_f)
schema = Schema(list(r))
else:
schema = NullSchema()
# Default string schema
log('schema %s', schema)
with open(csv_path) as f:
col_names, rows = ReadCsv(f)
schema.VerifyColumnNames(col_names)
filename = os.path.basename(csv_path)
css_id, _ = os.path.splitext(filename)
PrintTable(css_id, schema, col_names, rows)
if __name__ == '__main__':
try:
main(sys.argv)
except RuntimeError as e:
print >>sys.stderr, 'FATAL: %s' % e
sys.exit(1)
View

This file was deleted.

Oops, something went wrong.
View

This file was deleted.

Oops, something went wrong.
View
@@ -22,6 +22,10 @@ td {
color: darkred;
}
.num {
text-align: right;
}
.highlight {
background-color: #f0f0f0;
}
@@ -30,3 +34,4 @@ tbody tr:hover {
background-color: lightcyan;
}