-
Notifications
You must be signed in to change notification settings - Fork 298
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Import revamped nix benchmark framework
The nix code for running benchmarks is now imported in-tree. This makes it easy to automatically run benchmarks and also generate the fancy R visualizations. See README.md for example usage. The visualizations are now generated as PNG files instead of an Rmarkdown document. This is to make it easier to introduce new visualizations, or variations of existing ones, and easier to load large images for zooming. Under the hood the nix code is also updated so that each test run is a separate derivation. This means that nix is able to parallelize the test runs (one execution of each benchmark with one raptorjit version) and distribute them between machines. This should make the tests run faster on the Hydra CI cluster and also avoid tying up whole servers with hours-long derivations that run hundreds of test runs at the same time. (It also allows you to parallelize test runs on the local machine to use multiple cores.)
- Loading branch information
Showing
4 changed files
with
210 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# R subroutines for reading and visualizing benchmark results. | ||
|
||
suppressPackageStartupMessages({ | ||
library(dplyr) | ||
library(ggplot2) | ||
}) | ||
|
||
## R library routines for analyzing benchmark results | ||
bench.read <- function(filename) { | ||
data <- read.csv(filename) | ||
## baseline is the mean performance of the "A" version | ||
baseline <- data %>% | ||
filter(letter=="A") %>% | ||
group_by(benchmark) %>% | ||
summarize(baseline = mean(cycles)) | ||
## Add 'relative' performance column: compared to mean from baseline branch | ||
relative <- data %>% | ||
left_join(baseline, by="benchmark") %>% | ||
group_by(benchmark, version) %>% | ||
mutate(relative = first(baseline) / cycles) | ||
return(relative) | ||
} | ||
|
||
## Jitter plot faceted by benchmark | ||
bench.jitterplot <- function(data) { | ||
ggplot(aes(y=relative, x=version, color=version), data=data) + | ||
geom_jitter(shape=1, alpha=0.5) + | ||
scale_y_continuous(breaks=seq(0, 3, 0.1), labels=scales::percent) + | ||
theme(aspect.ratio = 1) + | ||
theme(axis.text.x = element_text(angle=90)) + | ||
ylab("Performance relative to baseline average") + | ||
ggtitle("Comparative performance between RaptorJIT versions") + | ||
facet_wrap(~ benchmark, scales="free_x") | ||
} | ||
|
||
## ECDF plot faceted by benchmark | ||
bench.ecdfplot <- function(data) { | ||
ggplot(aes(x=relative, color=version), data=data) + | ||
stat_ecdf() + | ||
scale_x_continuous(labels=scales::percent) + | ||
scale_y_log10(labels=scales::percent) + | ||
theme(aspect.ratio = 1) + | ||
theme(axis.text.x = element_text(angle=90)) + | ||
ylab("Performance relative to baseline average") + | ||
xlab("Percentage of results at or above this performance level") + | ||
ggtitle("Comparative performance between RaptorJIT variants") + | ||
facet_wrap(~ benchmark) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# Run a large parallel benchmark campaign and generate R/ggplot2 reports. | ||
|
||
{ pkgs ? (import ../../pkgs.nix) {}, | ||
Asrc, Aname ? "A", Aargs ? "", | ||
Bsrc ? null, Bname ? "B", Bargs ? "", | ||
Csrc ? null, Cname ? "C", Cargs ? "", | ||
Dsrc ? null, Dname ? "D", Dargs ? "", | ||
Esrc ? null, Ename ? "E", Eargs ? "", | ||
hardware ? null, | ||
runs ? 30 }: | ||
|
||
with pkgs; | ||
with stdenv; | ||
|
||
# Derivation to run benchmarks and produce a CSV result. | ||
let benchmark = letter: name: src: args: run: | ||
let raptorjit = (import src {inherit pkgs; version = name;}).raptorjit; in | ||
mkDerivation { | ||
name = "benchmark-${name}-${toString run}"; | ||
src = pkgs.lib.cleanSource ./.; | ||
# Force consistent hardware | ||
requiredSystemFeatures = if hardware != null then [hardware] else []; | ||
buildInputs = [ raptorjit linuxPackages.perf utillinux ]; | ||
buildPhase = '' | ||
# Run multiple iterations of the benchmarks | ||
echo "Run $run" | ||
mkdir -p result/$run | ||
# Run each individual benchmark | ||
cat PARAM_x86_CI.txt | | ||
(while read benchmark params; do | ||
echo "running $benchmark" | ||
# Execute with performance monitoring & time supervision | ||
# Note: discard stdout due to overwhelming output | ||
timeout -sKILL 60 \ | ||
perf stat -x, -o result/$run/$benchmark.perf \ | ||
raptorjit ${args} -e "math.randomseed(${toString run})" $benchmark.lua $params \ | ||
> /dev/null || \ | ||
rm result/$run/$benchmark.perf | ||
done) | ||
''; | ||
installPhase = '' | ||
# Copy the raw perf output for reference | ||
cp -r result $out | ||
# Log the exact CPU | ||
lscpu > $out/cpu.txt | ||
# Create a CSV file | ||
# Create the rows based on the perf logs | ||
for result in result/*.perf; do | ||
version=${name} | ||
benchmark=$(basename -s.perf -a $result) | ||
instructions=$(awk -F, -e '$3 == "instructions" { print $1; }' $result) | ||
cycles=$( awk -F, -e '$3 == "cycles" { print $1; }' $result) | ||
echo ${letter},$version,$benchmark,${toString run},$instructions,$cycles >> $out/bench.csv | ||
done | ||
''; | ||
}; | ||
|
||
# Run a set of benchmarks and aggregate the results into a CSV file. | ||
# Each benchmark run is a separate derivation. This allows nix to | ||
# parallelize and distribute the benchmarking. | ||
benchmarkSet = letter: name: src: args: | ||
let benchmarks = map (benchmark letter name src args) (pkgs.lib.range 1 runs); | ||
in | ||
runCommand "benchmarks-${name}" { buildInputs = benchmarks; } '' | ||
source $stdenv/setup | ||
mkdir -p $out | ||
for dir in ${pkgs.lib.fold (acc: x: "${acc} ${x}") "" benchmarks}; do | ||
cat $dir/bench.csv >> $out/bench.csv | ||
done | ||
''; | ||
|
||
benchA = (benchmarkSet "A" Aname Asrc Aargs); | ||
benchB = if Bsrc != null then (benchmarkSet "B" Bname Bsrc Bargs) else ""; | ||
benchC = if Csrc != null then (benchmarkSet "C" Cname Csrc Cargs) else ""; | ||
benchD = if Dsrc != null then (benchmarkSet "D" Dname Dsrc Dargs) else ""; | ||
benchE = if Esrc != null then (benchmarkSet "E" Ename Esrc Eargs) else ""; | ||
in | ||
|
||
rec { | ||
benchmarkResults = mkDerivation { | ||
name = "benchmark-results"; | ||
buildInputs = with pkgs.rPackages; [ pkgs.R ggplot2 dplyr ]; | ||
builder = pkgs.writeText "builder.csv" '' | ||
source $stdenv/setup | ||
# Get the CSV file | ||
mkdir -p $out/nix-support | ||
echo "letter,version,benchmark,run,instructions,cycles" > bench.csv | ||
cat ${benchA}/bench.csv >> bench.csv | ||
[ -n "${benchB}" ] && cat ${benchB}/bench.csv >> bench.csv | ||
[ -n "${benchC}" ] && cat ${benchC}/bench.csv >> bench.csv | ||
[ -n "${benchD}" ] && cat ${benchD}/bench.csv >> bench.csv | ||
[ -n "${benchE}" ] && cat ${benchE}/bench.csv >> bench.csv | ||
cp bench.csv $out | ||
echo "file CSV $out/bench.csv" >> $out/nix-support/hydra-build-products | ||
# Generate the report | ||
(cd ${./.}; Rscript ./generate.R $out/bench.csv $out) | ||
for png in $out/*.png; do | ||
echo "file PNG $png" >> $out/nix-support/hydra-build-products | ||
done | ||
''; | ||
}; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/usr/bin/env nix-shell | ||
#!nix-shell -i Rscript -p R rpkgs.dplyr rpkgs.ggplot2 | ||
|
||
# R command-line program for making visualizations from benchmark results. | ||
|
||
suppressWarnings(source("bench.R")) | ||
|
||
args <- commandArgs(trailingOnly=T) | ||
if (length(args) != 2) { | ||
message("Usage: generate.R <csv> <outdir>"); quit(status=1) | ||
} | ||
|
||
filename <- args[[1]] | ||
outdir <- args[[2]] | ||
|
||
data <- bench.read(filename) | ||
if (!dir.exists(outdir)) { dir.create(outdir, recursive=T) } | ||
|
||
ggsave(filename = file.path(outdir,"bench-jitter.png"), | ||
plot = bench.jitterplot(data), | ||
width=12, height=12) | ||
|
||
ggsave(filename = file.path(outdir,"bench-ecdf.png"), | ||
plot = bench.ecdfplot(data), | ||
width=12, height=12) |