Permalink
Browse files

Polish the HTML report produced by benchmarks/osh-parser.sh.

- New csv2html.py that uses a .schema.csv file to determine column types
  and presentation.
- Remove the old csv_to_html.py and its tests.

- Label shells and hosts in R code.  Link to the versioned IDs.
- Rename "platform ID" to "host ID".
  • Loading branch information...
Andy Chu
Andy Chu committed Oct 31, 2017
1 parent 394b260 commit e7e8f19a5753a265670627b9c8c702f953b985b8
View
@@ -35,7 +35,7 @@ set -o errexit
# dash/
# HASH.txt
# version.txt
# platform-id/
# host-id/
# lisa/
# HASH.txt
# cpuinfo.txt
@@ -47,7 +47,7 @@ set -o errexit
# osh-$HASH/ # osh-cpython, osh-ovm? osh-opy-ovm? Too many dimensions.
# # the other shells don't have this?
# zsh-$HASH/
# platform-id/
# host-id/
# lisa-$HASH/
die() {
@@ -158,11 +158,11 @@ publish-shell-id() {
# - kernel upgrade
# - distro upgrade
# How about ~/git/oilshell/benchmark-data/platform-id/lisa-$HASH
# How about ~/git/oilshell/benchmark-data/host-id/lisa-$HASH
# How to calculate the hash though?
dump-platform-id() {
local out_dir=${1:-_tmp/platform-id/$(hostname)}
dump-host-id() {
local out_dir=${1:-_tmp/host-id/$(hostname)}
mkdir -p $out_dir
@@ -192,7 +192,7 @@ dump-platform-id() {
# http://wiki.osdev.org/Target_Triplet
# It's not exactly the same as what we need here, but close.
_platform-id-hash() {
_host-id-hash() {
local src=$1
# Don't hash CPU or memory
@@ -210,13 +210,13 @@ _platform-id-hash() {
}
# Writes a short ID to stdout.
publish-platform-id() {
local src=$1 # e.g. _tmp/platform-id/lisa
local dest_base=${2:-../benchmark-data/platform-id}
publish-host-id() {
local src=$1 # e.g. _tmp/host-id/lisa
local dest_base=${2:-../benchmark-data/host-id}
local name=$(basename $src)
local hash
hash=$(_platform-id-hash $src | md5sum) # not secure, an identifier
hash=$(_host-id-hash $src | md5sum) # not secure, an identifier
local id="${hash:0:8}"
local dest="$dest_base/$name-$id"
View
@@ -7,6 +7,7 @@
library(dplyr)
library(tidyr)
library(stringr)
options(stringsAsFactors = F)
@@ -15,9 +16,14 @@ Log = function(fmt, ...) {
cat('\n')
}
sourceUrl = function(path) {
sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
}
main = function(argv) {
out_dir = argv[[1]]
# Merge all the inputs
hosts = list()
for (i in 2:length(argv)) {
times_path = argv[[i]]
@@ -45,32 +51,74 @@ main = function(argv) {
hosts[[i-1]] = host_rows
}
all_times = bind_rows(hosts)
print(all_times)
print(summary(all_times))
#
# Find distinct shells and hosts, and label them for readability.
#
all_times %>% distinct(platform_id) -> distinct_hosts
all_times %>% distinct(host_name, host_hash) -> distinct_hosts
# Just use the name
distinct_hosts$host_label = distinct_hosts$host_name
print(distinct_hosts)
all_times %>% distinct(shell_id) -> distinct_shells
all_times %>% distinct(shell_name, shell_hash) -> distinct_shells
print(distinct_shells)
return()
distinct_shells$shell_label = NA # the column we fill in below
Log('Labeling shells')
for (i in 1:nrow(distinct_shells)) {
row = distinct_shells[i, ]
if (row$shell_name == 'osh') {
path = sprintf('../benchmark-data/shell-id/osh-%s/osh-version.txt',
row$shell_hash)
Log('Reading %s', path)
lines = readLines(path)
if (length(grep('OVM', lines)) > 0) {
label = 'osh-ovm'
} else if (length(grep('CPython', lines)) > 0) {
label = 'osh-cpython'
}
} else { # same name for other shells
label = row$shell_name
}
distinct_shells[i, ]$shell_label = label
}
print(distinct_shells)
# Replace name/hash combinations with labels.
all_times %>%
left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
select(-c(host_name, host_hash, shell_name, shell_hash)) ->
all_times
print(summary(all_times))
print(head(all_times))
# Summarize rates by platform/shell
all_times %>%
group_by(shell_id, platform_id) %>%
group_by(host_label, shell_label) %>%
summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
mutate(lines_per_ms = total_lines / total_ms) ->
rate_summary
shell_summary
print(rate_summary)
print(shell_summary)
# Elapsed seconds for each shell by platform and file
all_times %>%
select(-c(lines_per_ms)) %>%
spread(key = shell_id, value = elapsed_ms) %>%
arrange(platform_id, num_lines) ->
spread(key = shell_label, value = elapsed_ms) %>%
arrange(host_label, num_lines) %>%
mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
select(c(host_label, bash, dash, mksh, zsh, `osh-ovm`, `osh-cpython`,
num_lines, filename, filename_HREF)) ->
elapsed
#select(c(bash, dash, mksh, zsh, osh, num_lines, path)) ->
Log('\n')
Log('ELAPSED')
@@ -79,25 +127,65 @@ main = function(argv) {
# Rates by file and shell
all_times %>%
select(-c(elapsed_ms)) %>%
spread(key = shell_id, value = lines_per_ms) %>%
arrange(platform_id, num_lines) ->
spread(key = shell_label, value = lines_per_ms) %>%
arrange(host_label, num_lines) %>%
mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
select(c(host_label, bash, dash, mksh, zsh, `osh-ovm`, `osh-cpython`,
num_lines, filename, filename_HREF)) ->
rate
#select(c(bash, dash, mksh, zsh, osh, num_lines, path)) ->
Log('\n')
Log('RATE')
print(rate)
write.csv(rate_summary,
file.path(out_dir, 'rate_summary.csv'), row.names = F)
write.csv(elapsed, file.path(out_dir, 'elapsed.csv'), row.names = F)
write.csv(rate, file.path(out_dir, 'rate.csv'), row.names = F)
# Should be:
# host_id_url
# And then csv_to_html will be smart enough? It should take --url flag?
host_table = data_frame(
host_label = distinct_hosts$host_label,
host_id = paste(distinct_hosts$host_name,
distinct_hosts$host_hash, sep='-'),
host_id_HREF = sprintf('../../../../benchmark-data/host-id/%s', host_id)
)
print(host_table)
shell_table = data_frame(
shell_label = distinct_shells$shell_label,
shell_id = paste(distinct_shells$shell_name,
distinct_shells$shell_hash, sep='-'),
shell_id_HREF = sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
)
print(shell_table)
writeCsv(host_table, file.path(out_dir, 'hosts'))
writeCsv(shell_table, file.path(out_dir, 'shells'))
writeCsv(shell_summary, file.path(out_dir, 'summary'))
writeCsv(elapsed, file.path(out_dir, 'elapsed'))
writeCsv(rate, file.path(out_dir, 'rate'))
Log('Wrote %s', out_dir)
Log('PID %d done', Sys.getpid())
}
# Write a CSV file along with a schema.
writeCsv = function(table, prefix) {
data_out_path = paste0(prefix, '.csv')
write.csv(table, data_out_path, row.names = F)
fieldType = function(field_name) { typeof(table[[field_name]]) }
types_list = lapply(names(table), fieldType)
types = as.character(types_list)
schema = data_frame(
column_name = names(table),
type = types
)
schema_out_path = paste0(prefix, '.schema.csv')
write.csv(schema, schema_out_path, row.names = F)
}
if (length(sys.frames()) == 0) {
# increase ggplot font size globally
#theme_set(theme_grey(base_size = 20))
Oops, something went wrong.

0 comments on commit e7e8f19

Please sign in to comment.