Skip to content

Commit

Permalink
Import revamped nix benchmark framework
Browse files Browse the repository at this point in the history
The nix code for running benchmarks is now imported in-tree. This
makes it easy to automatically run benchmarks and also generate the
fancy R visualizations. See README.md for example usage.

The visualizations are now generated as PNG files instead of an
Rmarkdown document. This is to make it easier to introduce new
visualizations, or variations of existing ones, and easier to load
large images for zooming.

Under the hood the nix code is also updated so that each test run is a
separate derivation. This means that nix is able to parallelize the
test runs (one execution of each benchmark with one raptorjit version)
and distribute them between machines. This should make the tests run
faster on the Hydra CI cluster and also avoid tying up whole servers
with hours-long derivations that run hundreds of test runs at the same
time. (It also allows you to parallelize test runs on the local machine
to use multiple cores.)
  • Loading branch information
lukego committed Mar 27, 2017
1 parent 742104f commit 7a35839
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 0 deletions.
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,39 @@ $ make

... but make sure you have at least `make`, `clang`, and `luajit` in your `$PATH`.

### Run the benchmarks

Nix can also run the full benchmark suite and generate visualizations
with R/ggplot2.

The simplest incantation tests one branch:

```shell
$ nix-build testsuite/bench --arg Asrc ./. # note: ./. means ./
```

You can also test several branches (A-E), give them names, specify
command-line arguments, say how many tests to run, and allow parallel
execution:

```shell
$ nix-build testsuite/bench \
--arg Asrc ~/git/raptorjit \
--argstr Aname master \
--arg Bsrc ~/git/raptorjit-hack \
--argstr Bname hacked \
--arg Csrc ~/git/raptorjit-hack2 \
--argstr Cname hacked-O1 \
--argstr Cargs -O1 \
--arg runs 100 \
-j 5 # Run up to 5 tests in parallel
''

If you are using a distributed nix environment such
as [Hydra](https://nixos.org/hydra/) then the tests can be
automatically parallelized and distributed across a suitable build
farm.

### Quotes

Here are some borrowed words to put this branch into context:
Expand Down
49 changes: 49 additions & 0 deletions testsuite/bench/bench.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# R subroutines for reading and visualizing benchmark results.

suppressPackageStartupMessages({
library(dplyr)
library(ggplot2)
})

## R library routines for analyzing benchmark results
bench.read <- function(filename) {
data <- read.csv(filename)
## baseline is the mean performance of the "A" version
baseline <- data %>%
filter(letter=="A") %>%
group_by(benchmark) %>%
summarize(baseline = mean(cycles))
## Add 'relative' performance column: compared to mean from baseline branch
relative <- data %>%
left_join(baseline, by="benchmark") %>%
group_by(benchmark, version) %>%
mutate(relative = first(baseline) / cycles)
return(relative)
}

## Jitter plot faceted by benchmark
bench.jitterplot <- function(data) {
ggplot(aes(y=relative, x=version, color=version), data=data) +
geom_jitter(shape=1, alpha=0.5) +
scale_y_continuous(breaks=seq(0, 3, 0.1), labels=scales::percent) +
theme(aspect.ratio = 1) +
theme(axis.text.x = element_text(angle=90)) +
ylab("Performance relative to baseline average") +
ggtitle("Comparative performance between RaptorJIT versions") +
facet_wrap(~ benchmark, scales="free_x")
}

## ECDF plot faceted by benchmark
bench.ecdfplot <- function(data) {
ggplot(aes(x=relative, color=version), data=data) +
stat_ecdf() +
scale_x_continuous(labels=scales::percent) +
scale_y_log10(labels=scales::percent) +
theme(aspect.ratio = 1) +
theme(axis.text.x = element_text(angle=90)) +
ylab("Performance relative to baseline average") +
xlab("Percentage of results at or above this performance level") +
ggtitle("Comparative performance between RaptorJIT variants") +
facet_wrap(~ benchmark)
}

103 changes: 103 additions & 0 deletions testsuite/bench/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Run a large parallel benchmark campaign and generate R/ggplot2 reports.

{ pkgs ? (import ../../pkgs.nix) {},
Asrc, Aname ? "A", Aargs ? "",
Bsrc ? null, Bname ? "B", Bargs ? "",
Csrc ? null, Cname ? "C", Cargs ? "",
Dsrc ? null, Dname ? "D", Dargs ? "",
Esrc ? null, Ename ? "E", Eargs ? "",
hardware ? null,
runs ? 30 }:

with pkgs;
with stdenv;

# Derivation to run benchmarks and produce a CSV result.
let benchmark = letter: name: src: args: run:
let raptorjit = (import src {inherit pkgs; version = name;}).raptorjit; in
mkDerivation {
name = "benchmark-${name}-${toString run}";
src = pkgs.lib.cleanSource ./.;
# Force consistent hardware
requiredSystemFeatures = if hardware != null then [hardware] else [];
buildInputs = [ raptorjit linuxPackages.perf utillinux ];
buildPhase = ''
# Run multiple iterations of the benchmarks
echo "Run $run"
mkdir -p result/$run
# Run each individual benchmark
cat PARAM_x86_CI.txt |
(while read benchmark params; do
echo "running $benchmark"
# Execute with performance monitoring & time supervision
# Note: discard stdout due to overwhelming output
timeout -sKILL 60 \
perf stat -x, -o result/$run/$benchmark.perf \
raptorjit ${args} -e "math.randomseed(${toString run})" $benchmark.lua $params \
> /dev/null || \
rm result/$run/$benchmark.perf
done)
'';
installPhase = ''
# Copy the raw perf output for reference
cp -r result $out
# Log the exact CPU
lscpu > $out/cpu.txt
# Create a CSV file
# Create the rows based on the perf logs
for result in result/*.perf; do
version=${name}
benchmark=$(basename -s.perf -a $result)
instructions=$(awk -F, -e '$3 == "instructions" { print $1; }' $result)
cycles=$( awk -F, -e '$3 == "cycles" { print $1; }' $result)
echo ${letter},$version,$benchmark,${toString run},$instructions,$cycles >> $out/bench.csv
done
'';
};

# Run a set of benchmarks and aggregate the results into a CSV file.
# Each benchmark run is a separate derivation. This allows nix to
# parallelize and distribute the benchmarking.
benchmarkSet = letter: name: src: args:
let benchmarks = map (benchmark letter name src args) (pkgs.lib.range 1 runs);
in
runCommand "benchmarks-${name}" { buildInputs = benchmarks; } ''
source $stdenv/setup
mkdir -p $out
for dir in ${pkgs.lib.fold (acc: x: "${acc} ${x}") "" benchmarks}; do
cat $dir/bench.csv >> $out/bench.csv
done
'';

benchA = (benchmarkSet "A" Aname Asrc Aargs);
benchB = if Bsrc != null then (benchmarkSet "B" Bname Bsrc Bargs) else "";
benchC = if Csrc != null then (benchmarkSet "C" Cname Csrc Cargs) else "";
benchD = if Dsrc != null then (benchmarkSet "D" Dname Dsrc Dargs) else "";
benchE = if Esrc != null then (benchmarkSet "E" Ename Esrc Eargs) else "";
in

rec {
benchmarkResults = mkDerivation {
name = "benchmark-results";
buildInputs = with pkgs.rPackages; [ pkgs.R ggplot2 dplyr ];
builder = pkgs.writeText "builder.csv" ''
source $stdenv/setup
# Get the CSV file
mkdir -p $out/nix-support
echo "letter,version,benchmark,run,instructions,cycles" > bench.csv
cat ${benchA}/bench.csv >> bench.csv
[ -n "${benchB}" ] && cat ${benchB}/bench.csv >> bench.csv
[ -n "${benchC}" ] && cat ${benchC}/bench.csv >> bench.csv
[ -n "${benchD}" ] && cat ${benchD}/bench.csv >> bench.csv
[ -n "${benchE}" ] && cat ${benchE}/bench.csv >> bench.csv
cp bench.csv $out
echo "file CSV $out/bench.csv" >> $out/nix-support/hydra-build-products
# Generate the report
(cd ${./.}; Rscript ./generate.R $out/bench.csv $out)
for png in $out/*.png; do
echo "file PNG $png" >> $out/nix-support/hydra-build-products
done
'';
};
}

25 changes: 25 additions & 0 deletions testsuite/bench/generate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env nix-shell
#!nix-shell -i Rscript -p R rpkgs.dplyr rpkgs.ggplot2

# R command-line program for making visualizations from benchmark results.

suppressWarnings(source("bench.R"))

args <- commandArgs(trailingOnly=T)
if (length(args) != 2) {
message("Usage: generate.R <csv> <outdir>"); quit(status=1)
}

filename <- args[[1]]
outdir <- args[[2]]

data <- bench.read(filename)
if (!dir.exists(outdir)) { dir.create(outdir, recursive=T) }

ggsave(filename = file.path(outdir,"bench-jitter.png"),
plot = bench.jitterplot(data),
width=12, height=12)

ggsave(filename = file.path(outdir,"bench-ecdf.png"),
plot = bench.ecdfplot(data),
width=12, height=12)

0 comments on commit 7a35839

Please sign in to comment.