Permalink
Browse files

Compress oheap files and make a report on the encoding size.

  • Loading branch information...
Andy Chu
Andy Chu committed Dec 5, 2017
1 parent 07b0119 commit 1edc52d1347e24b937ba6bd6d6ff9f00651cf95f
Showing with 115 additions and 11 deletions.
  1. +67 −11 benchmarks/oheap.sh
  2. +48 −0 benchmarks/report.R
View
@@ -9,6 +9,10 @@ set -o nounset
set -o pipefail
set -o errexit
source test/common.sh
readonly BASE_DIR=_tmp/oheap
encode-one() {
local script=$1
local oheap_out=$2
@@ -17,21 +21,79 @@ encode-one() {
task-spec() {
while read path; do
echo "$path _tmp/oheap/$(basename $path).oheap"
echo "$path _tmp/oheap/$(basename $path)__oheap"
done < benchmarks/osh-parser-files.txt
}
run() {
encode-all() {
mkdir -p _tmp/oheap
local results=_tmp/oheap/results.csv
echo 'status,elapsed_secs' > $results
local times_csv=_tmp/oheap/times.csv
echo 'status,elapsed_secs' > $times_csv
task-spec | xargs -n 2 --verbose -- \
benchmarks/time.py --output $results -- \
benchmarks/time.py --output $times_csv -- \
$0 encode-one
}
# Out of curiousity, compress oheap and originals.
compress-oheap() {
local c_dir=$BASE_DIR/oheap-compressed
mkdir -p $c_dir
for bin in _tmp/oheap/*__oheap; do
local name=$(basename $bin)
log "Compressing $name"
gzip --stdout $bin > $c_dir/$name.gz
xz --stdout $bin > $c_dir/$name.xz
done
}
compress-text() {
local c_dir=$BASE_DIR/src-compressed
mkdir -p $c_dir
while read src; do
local name=$(basename $src)
log "Compressing $name"
gzip --stdout $src > $c_dir/${name}__text.gz
xz --stdout $src > $c_dir/${name}__text.xz
done < benchmarks/osh-parser-files.txt
}
print-size() {
local c1=$1
local c2=$2
shift 2
# depth 0: just the filename itself.
find "$@" -maxdepth 0 -printf "%s,$c1,$c2,%p\n"
}
print-csv() {
echo 'num_bytes,format,compression,path'
# TODO
print-size text none benchmarks/testdata/*
print-size text gz $BASE_DIR/src-compressed/*.gz
print-size text xz $BASE_DIR/src-compressed/*.xz
print-size oheap none $BASE_DIR/*__oheap
print-size oheap gz $BASE_DIR/oheap-compressed/*.gz
print-size oheap xz $BASE_DIR/oheap-compressed/*.xz
}
report() {
local sizes=$BASE_DIR/sizes.csv
local out_dir=$BASE_DIR/stage1
mkdir -p $out_dir
print-csv > $sizes
benchmarks/report.R oheap $BASE_DIR $out_dir
}
# TODO: instead of running osh_demo, we should generate a C++ program that
# visits every node and counts it. The output might look like:
#
@@ -78,10 +140,4 @@ decode() {
done
}
stats() {
ls -l -h _tmp/oheap
echo
cat _tmp/oheap/results.csv
}
"$@"
View
@@ -291,14 +291,62 @@ RuntimeReport = function(in_dir, out_dir) {
Log('Wrote %s', out_dir)
}
# foo/bar/name.sh__oheap -> name.sh
filenameFromPath = function(path) {
# https://stackoverflow.com/questions/33683862/first-entry-from-string-split
# Not sure why [[1]] doesn't work?
parts = strsplit(basename(path), '__', fixed = T)
sapply(parts, head, 1)
}
OheapReport = function(in_dir, out_dir) {
sizes = read.csv(file.path(in_dir, 'sizes.csv'))
sizes %>%
mutate(filename = filenameFromPath(path),
metric_name = paste(format, compression, sep = '_'),
kilobytes = num_bytes / 1000) %>%
select(-c(path, format, compression, num_bytes)) %>%
spread(key = c(metric_name), value = kilobytes) %>%
select(c(text_none, text_gz, text_xz, oheap_none, oheap_gz, oheap_xz, filename)) %>%
arrange(text_none) ->
sizes
print(sizes)
# Interesting:
# - oheap is 2-7x bigger uncompressed, and 4-12x bigger compressed.
# - oheap is less compressible than text!
sizes %>%
transmute(oheap_to_text = oheap_none / text_none,
xz_text = text_xz / text_none,
xz_oheap = oheap_xz / oheap_none,
oheap_to_text_xz = oheap_xz / text_xz,
) ->
ratios
print(ratios)
writeCsv(sizes, file.path(out_dir, 'encoding_size'))
writeCsv(ratios, file.path(out_dir, 'encoding_ratios'))
Log('Wrote %s', out_dir)
}
main = function(argv) {
action = argv[[1]]
in_dir = argv[[2]]
out_dir = argv[[3]]
if (action == 'osh-parser') {
ParserReport(in_dir, out_dir)
} else if (action == 'osh-runtime') {
RuntimeReport(in_dir, out_dir)
} else if (action == 'oheap') {
OheapReport(in_dir, out_dir)
} else {
Log("Invalid action '%s'", action)
quit(status = 1)

0 comments on commit 1edc52d

Please sign in to comment.