diff --git a/.gitignore b/.gitignore index c37ddf5..8542310 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,9 @@ bin/.coursier bin/.scalafmt* results/ +reports/ *.iprof -.idea \ No newline at end of file +.idea +*.pid +jobs.sh +logs/ \ No newline at end of file diff --git a/README.md b/README.md index d200b09..bc560a1 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,89 @@ as-is and summarized in a separate post-processing step. python scripts/run.py ``` -## Viewing result summary +## Creating result summary ``` python scripts/summary.py ``` + +The reports can be viewed in the `reports` directory. + +## Advanced use + +### Comparing specific versions + +You can run just the configurations you are interested in +```bash +scripts/run.py stable latest +``` + +Compare the lastest `stable` relesea vs `latest` snapshot +```bash +REPORT=$(scripts/summary.py stable latest) +``` + +### Specifying number of runs, batches, tests run in parallel +```bash +scripts/run.py --par 4 --runs 50 --batches 3000 stable +# 50 runs (4 in parallel) each with 3000 batches for the stable release. +``` + +These settings will impact accuracy, this is why the names of the results folders will include the settings, in this case `scala-native-0.3.8-r50-p40-b3000`. +Note that you can also use `stable-r50-p40-b3000` when using the `summary.py`. + +### Comparing an experimental feature with latest from master +1. specify a suffix to identify it +```bash +NAME=PR9001-adding-a-kitchen-sink +SHA1=adcd124eee +``` +2. run the benchmark and get the summary report + +It will automatically compile the project in `../scala-native` if you provide the SHA1 or reference. +```bash +SHA1=adcd124eee +scripts/run.py latest@master +scripts/run.py --suffix "$NAME" latest@"$SHA1" && +REPORT=$(scripts/summary.py --comment "$NAME" latest latest@"$SHA1"_"$NAME") +``` + +Alternatively you can build the scala-native project from your branch manually and run +```bash +scripts/run.py --suffix "$NAME" latest && +REPORT=$(scripts/summary.py --comment "$NAME" latest latest_"$NAME") +``` + +## Persisting reports +The following commands assume that you have a git repository checked out at `gh-pages` under `../scala-native-benchmark-results`. + +Also that there is an executable script `just-upload.sh` in the root of that repository. +```bash +#just-upload.sh + + +#!/bin/bash +# move to the directory of the script +cd $(dirname "$0") + +git add . && +git commit -m "automated commit" && git push +``` + +### saving experiment data +```bash +cp -r results/ ../scala-native-benchmark-results && +../scala-native-benchmark-results/just-upload.sh +``` + +### restoring experiment data +```bash +cp -r ../scala-native-benchmark-results results/ +``` + +### uploading a report +```bash +mkdir -p ../scala-native-benchmark-results/reports +cp -r "$REPORT" ../scala-native-benchmark-results/reports && +../scala-native-benchmark-results/just-upload.sh +``` \ No newline at end of file diff --git a/build.sbt b/build.sbt index 9874e27..ae87f31 100644 --- a/build.sbt +++ b/build.sbt @@ -1 +1,6 @@ scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/jvm/run b/confs/jvm/run index a4d5cce..3700501 100644 --- a/confs/jvm/run +++ b/confs/jvm/run @@ -1 +1 @@ -java -Xmx1024M -Xms1024M -classpath target/scala-2.11/classes:$HOME/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.11.12.jar $BENCH +java $JAVA_ARGS -classpath target/scala-2.11/classes:$HOME/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.11.12.jar $BENCH diff --git a/confs/native-image-pgo/run b/confs/native-image-pgo/run index a6b6dfc..baed793 100644 --- a/confs/native-image-pgo/run +++ b/confs/native-image-pgo/run @@ -1,2 +1,2 @@ -target/native-image-pgo-bench -Xmx1g -Xmx1g +target/native-image-pgo-bench $JAVA_SIZE_ARGS diff --git a/confs/native-image/run b/confs/native-image/run index 456cc5a..99bbc6e 100644 --- a/confs/native-image/run +++ b/confs/native-image/run @@ -1 +1 @@ -target/native-image-bench -Xmx1g -Xmx1g +target/native-image-bench $JAVA_SIZE_ARGS diff --git a/confs/scala-native-0.3.7/plugins.sbt b/confs/scala-native-0.3.7/plugins.sbt deleted file mode 100644 index afc9d5a..0000000 --- a/confs/scala-native-0.3.7/plugins.sbt +++ /dev/null @@ -1 +0,0 @@ -addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.7") diff --git a/confs/scala-native-0.3.7/build.sbt b/confs/scala-native-0.3.8/build.sbt similarity index 100% rename from confs/scala-native-0.3.7/build.sbt rename to confs/scala-native-0.3.8/build.sbt diff --git a/confs/scala-native-0.3.7/compile b/confs/scala-native-0.3.8/compile similarity index 100% rename from confs/scala-native-0.3.7/compile rename to confs/scala-native-0.3.8/compile diff --git a/confs/scala-native-0.3.8/plugins.sbt b/confs/scala-native-0.3.8/plugins.sbt new file mode 100644 index 0000000..2d38aa0 --- /dev/null +++ b/confs/scala-native-0.3.8/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.8") diff --git a/confs/scala-native-0.3.7/run b/confs/scala-native-0.3.8/run similarity index 100% rename from confs/scala-native-0.3.7/run rename to confs/scala-native-0.3.8/run diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt new file mode 100644 index 0000000..2104b7a --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "commix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/compile b/confs/scala-native-0.3.9-SNAPSHOT-commix/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt new file mode 100644 index 0000000..c1423b6 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/run b/confs/scala-native-0.3.9-SNAPSHOT-commix/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-lto/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/build.sbt new file mode 100644 index 0000000..fae449a --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "full" diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-lto/compile b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-lto/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/plugins.sbt new file mode 100644 index 0000000..c1423b6 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-lto/run b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/confs/scala-native-0.3.9-SNAPSHOT-no-lto/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/build.sbt new file mode 100644 index 0000000..2fc1873 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "none" diff --git a/confs/scala-native-0.3.9-SNAPSHOT-no-lto/compile b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.3.9-SNAPSHOT-no-lto/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/plugins.sbt new file mode 100644 index 0000000..c1423b6 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") diff --git a/confs/scala-native-0.3.9-SNAPSHOT-no-lto/run b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/confs/scala-native-0.3.9-SNAPSHOT/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT/build.sbt new file mode 100644 index 0000000..ae87f31 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/scala-native-0.3.9-SNAPSHOT/compile b/confs/scala-native-0.3.9-SNAPSHOT/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt new file mode 100644 index 0000000..c1423b6 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") diff --git a/confs/scala-native-0.3.9-SNAPSHOT/run b/confs/scala-native-0.3.9-SNAPSHOT/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt b/confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt new file mode 100644 index 0000000..2104b7a --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "commix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/compile b/confs/scala-native-0.4.0-SNAPSHOT-commix/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt b/confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt new file mode 100644 index 0000000..2a63bf0 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT") diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/run b/confs/scala-native-0.4.0-SNAPSHOT-commix/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/confs/scala-native-0.4.0-SNAPSHOT/build.sbt b/confs/scala-native-0.4.0-SNAPSHOT/build.sbt new file mode 100644 index 0000000..ae87f31 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/scala-native-0.4.0-SNAPSHOT/compile b/confs/scala-native-0.4.0-SNAPSHOT/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt b/confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt new file mode 100644 index 0000000..2a63bf0 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT") diff --git a/confs/scala-native-0.4.0-SNAPSHOT/run b/confs/scala-native-0.4.0-SNAPSHOT/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/input/histogram.Histogram b/input/histogram.Histogram new file mode 100644 index 0000000..3365996 --- /dev/null +++ b/input/histogram.Histogram @@ -0,0 +1 @@ +300000,100000 \ No newline at end of file diff --git a/output/histogram.Histogram b/output/histogram.Histogram new file mode 100644 index 0000000..f32a580 --- /dev/null +++ b/output/histogram.Histogram @@ -0,0 +1 @@ +true \ No newline at end of file diff --git a/project/plugins.sbt b/project/plugins.sbt index afc9d5a..2a63bf0 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1 +1 @@ -addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.7") +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT") diff --git a/scripts/install-dependecies.sh b/scripts/install-dependecies.sh new file mode 100755 index 0000000..3b348d2 --- /dev/null +++ b/scripts/install-dependecies.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +sudo pip2 install numpy matplotlib +sudo apt update && sudo apt install python-tk \ No newline at end of file diff --git a/scripts/notebook.ipynb b/scripts/notebook.ipynb index 75c9347..d667762 100644 --- a/scripts/notebook.ipynb +++ b/scripts/notebook.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "collapsed": false, "scrolled": true @@ -11,7 +11,6 @@ { "ename": "ModuleNotFoundError", "evalue": "No module named 'matplotlib'", - "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", @@ -24,7 +23,8 @@ "\u001b[0;32m/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36menable_matplotlib\u001b[0;34m(self, gui)\u001b[0m\n\u001b[1;32m 2933\u001b[0m \"\"\"\n\u001b[1;32m 2934\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mIPython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpylabtools\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2935\u001b[0;31m \u001b[0mgui\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbackend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_gui_and_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgui\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpylab_gui_select\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2936\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2937\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgui\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/lib/python3/dist-packages/IPython/core/pylabtools.py\u001b[0m in \u001b[0;36mfind_gui_and_backend\u001b[0;34m(gui, gui_select)\u001b[0m\n\u001b[1;32m 257\u001b[0m \"\"\"\n\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 259\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgui\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mgui\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'auto'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'" - ] + ], + "output_type": "error" } ], "source": [ diff --git a/scripts/run.py b/scripts/run.py index 187069b..bac70cf 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -1,23 +1,29 @@ -#!/usr/bin/env python -import sys +#!/usr/bin/env python2 import os import errno import subprocess as subp import shutil as sh +import argparse +import multiprocessing as mp +import itertools + + def mkdir(path): try: os.makedirs(path) - except OSError as exc: # Python >2.5 + except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise + def slurp(path): with open(path) as f: return f.read().strip() + def where(cmd): if os.path.isfile(cmd): return cmd @@ -30,82 +36,619 @@ def where(cmd): else: return None -def run(cmd): + +def try_run(cmd, env=None, wd=None): + try: + print run(cmd, env, wd) + return True + except subp.CalledProcessError as err: + print err.output + return False + + +def try_run_silent(cmd, env=None, wd=None): + try: + run(cmd, env, wd) + return True + except subp.CalledProcessError as err: + print err.output + return False + + +def run(cmd, env=None, wd=None): print(">>> " + str(cmd)) - return subp.check_output(cmd) + if wd == None: + return subp.check_output(cmd, stderr=subp.STDOUT, env=env) + else: + return subp.check_output(cmd, stderr=subp.STDOUT, env=env, cwd=wd) + + +scala_native_dir = os.path.join("..", "scala-native") +upload_dir = os.path.abspath(os.path.join("..", "scala-native-benchmark-results")) +local_scala_repo_dir = os.path.abspath(os.path.join("..", "scala-2.11.11-only")) + + +def git_add(dir, *items): + return try_run(["git", "add"] + list(items), wd=dir) + + +def git_commit(dir, msg): + return try_run(["git", "commit", "-m", msg], wd=dir) + + +def git_pull(dir): + my_env = os.environ.copy() + my_env["GIT_MERGE_AUTOEDIT"] = "no" + return try_run(["git", "pull"], env=my_env, wd=dir) + + +def git_push(dir): + return try_run(['git', 'push'], wd=dir) + + +def git_fetch(dir): + return try_run(['git', 'fetch', '--all'], wd=dir) + + +def get_ref(ref): + git_rev_parse = ['git', 'rev-parse', '--short', ref] + try: + return run(git_rev_parse, wd=scala_native_dir).strip() + except subp.CalledProcessError as err: + out = err.output + print "Cannot find", ref, "!" + print out + return None + + +def compile_scala_native(ref, sha1): + if ref != "HEAD": + git_checkout = ['git', 'checkout', sha1] + try: + print run(git_checkout, wd=scala_native_dir) + except subp.CalledProcessError as err: + out = err.output + print "Cannot checkout", sha1, "!" + print out + return False + + compile_cmd = [sbt, '-no-colors', '-J-Xmx2G', 'rebuild', 'sandbox/run'] + compile_env = os.environ.copy() + compile_env["SCALANATIVE_GC"] = "immix" + if os.path.isdir(local_scala_repo_dir): + compile_env["SCALANATIVE_SCALAREPO"] = local_scala_repo_dir + + try: + run(compile_cmd, compile_env, wd=scala_native_dir) + return True + except subp.CalledProcessError as err: + out = err.output + print "Compilation failure!" + print out + return False -def compile(bench, compilecmd): - cmd = [sbt, '-J-Xmx2G', 'clean'] + +def compile(conf, bench, compilecmd, gcstats, debug, trace, extra_args): + cmd = [sbt, '-no-colors', '-J-Xmx2G', 'clean'] cmd.append('set mainClass in Compile := Some("{}")'.format(bench)) + if conf.startswith("scala-native"): + if debug or trace: + cmd.append('set nativeCompileOptions ++= Seq("-g", "-DDEBUG_ASSERT")') + if trace: + cmd.append('set nativeCompileOptions +="-DDEBUG_PRINT"') + if gcstats != None: + cmd.append('set nativeCompileOptions +="-DENABLE_GC_STATS{}"'.format(gcstats)) + for k,v in extra_args.iteritems(): + if k.endswith("?"): + cmd.append('set nativeCompileOptions +="-D{}"'.format(k[:-1])) + else: + cmd.append('set nativeCompileOptions +="-D{}={}"'.format(k,v)) cmd.append(compilecmd) - return run(cmd) + return try_run_silent(cmd) + sbt = where('sbt') -benchmarks = [ - 'bounce.BounceBenchmark', - 'list.ListBenchmark', - 'richards.RichardsBenchmark', - 'queens.QueensBenchmark', - 'permute.PermuteBenchmark', - 'deltablue.DeltaBlueBenchmark', - 'tracer.TracerBenchmark', - 'brainfuck.BrainfuckBenchmark', - 'json.JsonBenchmark', - 'cd.CDBenchmark', - 'kmeans.KmeansBenchmark', - 'gcbench.GCBenchBenchmark', - 'mandelbrot.MandelbrotBenchmark', - 'nbody.NbodyBenchmark', - 'sudoku.SudokuBenchmark', +default_benchmarks = [ + 'bounce.BounceBenchmark', + 'richards.RichardsBenchmark', + 'queens.QueensBenchmark', + 'permute.PermuteBenchmark', + 'deltablue.DeltaBlueBenchmark', + 'tracer.TracerBenchmark', + 'brainfuck.BrainfuckBenchmark', + 'json.JsonBenchmark', + 'cd.CDBenchmark', + 'kmeans.KmeansBenchmark', + 'gcbench.GCBenchBenchmark', + 'nbody.NbodyBenchmark', + 'sudoku.SudokuBenchmark', +] + +all_benchmarks = default_benchmarks + [ + 'histogram.Histogram', + 'list.ListBenchmark', + 'mandelbrot.MandelbrotBenchmark', +] + +stable = 'scala-native-0.3.8' +latest = 'scala-native-0.4.0-SNAPSHOT' +baseline = [ + 'jvm', + stable, ] +default = baseline + [latest] + +confs_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/confs" -configurations = [ - 'jvm', - 'scala-native-0.3.7', +configurations = all_configs = next(os.walk(confs_path))[1] + +graalvm = [ + 'native-image', + 'native-image-pgo', ] if 'GRAALVM_HOME' in os.environ: - configurations += [ - 'native-image', - 'native-image-pgo', - ] + baseline += graalvm +else: + for g in graalvm: + all_configs.remove(g) -runs = 20 -batches = 3000 +default_runs = 20 +default_batches = 3000 +default_par = 1 batch_size = 1 + +def expand_wild_cards(arg): + if arg.startswith("latest"): + return latest + arg[len("latest"):] + elif arg.startswith("stable"): + return stable + arg[len("stable"):] + else: + return arg + + +def benchmark_parse(arg): + parts = arg.split("@") + if len(parts) == 2: + return parts[0], parts[1] + else: + return arg, None + + +def ref_parse(arg): + parts = arg.split("@") + if len(parts) == 3: + return parts[0], (parts[2] + "/" + parts[1]) + elif len(parts) == 2: + return parts[0], parts[1] + else: + return arg, None + + +def size_parse(arg): + parts = arg.split(":") + if len(parts) == 1: + return [arg, arg] + else: + return parts + + +def generate_choices(direct_choices): + results = direct_choices + for dir in direct_choices: + if dir.startswith(latest): + results += ["latest" + dir[len(latest):]] + if dir.startswith(stable): + results += ["stable" + dir[len(stable):]] + return results + + +def single_run(to_run): + n = to_run["n"] + runs = to_run["runs"] + unexpanded_cmd = to_run["cmd"] + resultsdir = to_run["resultsdir"] + conf = to_run["conf"] + bench = to_run["bench"] + gcstats = to_run["gcstats"] + minsize = to_run["size"][0] + maxsize = to_run["size"][1] + gcThreads = to_run["gcThreads"] + perf = to_run["perf"] + + print('--- run {}/{}'.format(n, runs)) + my_env = os.environ.copy() + if gcstats != None: + my_env["SCALANATIVE_STATS_FILE"] = os.path.join(resultsdir, str(n) + ".gc.csv") + + if minsize != "default": + my_env["SCALANATIVE_MIN_HEAP_SIZE"] = minsize + # in 0.4.0 the heap settings names changed. + my_env["SCALANATIVE_MIN_SIZE"] = minsize + else: + if "SCALANATIVE_MIN_HEAP_SIZE" in my_env: + del my_env["SCALANATIVE_MIN_HEAP_SIZE"] + if "SCALANATIVE_MIN_SIZE" in my_env: + del my_env["SCALANATIVE_MIN_SIZE"] + + + if maxsize != "default": + my_env["SCALANATIVE_MAX_HEAP_SIZE"] = maxsize + # in 0.4.0 the heap settings names changed. + my_env["SCALANATIVE_MAX_SIZE"] = maxsize + else: + if "SCALANATIVE_MAX_HEAP_SIZE" in my_env: + del my_env["SCALANATIVE_MAX_HEAP_SIZE"] + if "SCALANATIVE_MAX_SIZE" in my_env: + del my_env["SCALANATIVE_MAX_SIZE"] + + if gcThreads != "default": + my_env["SCALANATIVE_GC_THREADS"] = gcThreads + elif "SCALANATIVE_GC_THREADS" in my_env: + del my_env["SCALANATIVE_GC_THREADS"] + + if perf == "sudo": + myuser = os.environ.get('USER') + cmd = ["sudo", "-E", "perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "15000", "--", "sudo", "-E", "-u", str(myuser)] + elif perf == "normal": + cmd = ["perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "15000", "--"] + else: + cmd = [] + for token in unexpanded_cmd: + if token == "$JAVA_ARGS": + if minsize != "default": + cmd += ["-Xms" + minsize] + if maxsize != "default": + cmd += ["-Xmx" + maxsize] + if gcstats: + cmd += ["-XX:+PrintGCApplicationStoppedTime", "-Xloggc:" + os.path.join(resultsdir, str(n) + ".gc.txt")] + else: + cmd += [token] + + try: + out = run(cmd, my_env) + with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: + resultfile.write(out) + return [] + except subp.CalledProcessError as err: + out = err.output + print "Failure!" + print out + with open(os.path.join(resultsdir, str(n) + ".failed"), 'w+') as failfile: + failfile.write(out) + return [dict(conf=conf, bench=bench, run=n)] + + +def upload(subconfig_dir, symlink, use_git, overwrite): + if os.path.isdir(upload_dir): + target = os.path.join(upload_dir, subconfig_dir) + targetComplete = os.path.isfile(os.path.join(target, ".complete")) + targetExisted = os.path.isdir(target) + if (targetComplete and overwrite) or targetExisted: + mkdir(os.path.join("..", target)) + sh.rmtree(target, ignore_errors=True) + if not targetExisted or overwrite: + sh.copytree(subconfig_dir, target, symlinks=True) + if use_git: + if symlink != None: + git_add(upload_dir, symlink) + if git_add(upload_dir, target) \ + and git_commit(upload_dir, "automated commit " + subconfig_dir) \ + and git_pull(upload_dir) \ + and git_push(upload_dir): + pass + else: + print "WARN", upload_dir, "does not exist!" + + +def create_symlink(generalized_dir, root_dir): + try: + os.unlink(generalized_dir) + except: + pass + print "creating symlink", generalized_dir, "->", root_dir + os.symlink(os.path.split(root_dir)[1], generalized_dir) + + if __name__ == "__main__": - for conf in configurations: - for bench in benchmarks: - print('--- conf: {}, bench: {}'.format(conf, bench)) + parser = argparse.ArgumentParser() + parser.add_argument("--suffix", help="suffix added to results") + parser.add_argument("--perf", help="records perf data", action="store_true") + parser.add_argument("--sperf", help="records perf data using sudo rights", action="store_true") + parser.add_argument("--runs", help="number of runs", type=int, default=default_runs) + parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) + parser.add_argument("--benchmark", help="benchmarks to run", action='append') + parser.add_argument("--argnames", help="compile arguments to set, mark flags with a '?' at the end, split with ','", type=str) + parser.add_argument("--argv", help="argument values, split with ',', booleans as true or false", action='append') + parser.add_argument("--size", help="different size settings to use", action='append') + parser.add_argument("--gcthreads", help="different number of garbage collection threads to use", action='append') + parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) + parser.add_argument("--gc", help="gather gc statistics", action="store_true") + parser.add_argument("--gcv", help="gather gc statistics verbose - batches", action="store_true") + parser.add_argument("--gcvv", help="gather gc statistics very verbose - sync events", action="store_true") + parser.add_argument("--upload", help="copy the results to ../scala-native-benchmark-results", action="store_true") + parser.add_argument("--gitupload", help="copy the results to ../scala-native-benchmark-results and commit and push to git", action="store_true") + parser.add_argument("--overwrite", help="overwrite old results", action="store_true") + parser.add_argument("--append", help="do not delete old data", action="store_true") + parser.add_argument("--gcdebug", help="enable debug for GCs", action="store_true") + parser.add_argument("--gctrace", help="verbose logging for GCs to stdout", action="store_true") + parser.add_argument("set", nargs='*', default="default") + args = parser.parse_args() + print args - input = slurp(os.path.join('input', bench)) - output = slurp(os.path.join('output', bench)) - compilecmd = slurp(os.path.join('confs', conf, 'compile')) - runcmd = slurp(os.path.join('confs', conf, 'run')).replace('$BENCH', bench).replace('$HOME', os.environ['HOME']).split(' ') + runs = args.runs + batches = args.batches + par = args.par - if os.path.exists(os.path.join('confs', conf, 'build.sbt')): - sh.copyfile(os.path.join('confs', conf, 'build.sbt'), 'build.sbt') + if args.benchmark != None: + benchmarks = [] + for b in args.benchmark: + if b == "default": + benchmarks += default_benchmarks else: - os.remove('build.sbt') + bname, bargs = benchmark_parse(b) + matching = filter(lambda s: s.startswith(bname), all_benchmarks) + if bargs != None: + benchmarks += map(lambda x: (x, bargs), matching) + else: + benchmarks += matching + else: + benchmarks = default_benchmarks + + if args.size != None: + sizes = [] + for subconf_str in args.size: + parsed = size_parse(subconf_str) + if parsed == ["default", "default"]: + sizes = [parsed] + sizes + else: + sizes += [parsed] + else: + sizes = [["default", "default"]] + + if args.gcthreads != None: + gcThreadCounts = args.gcthreads + else: + gcThreadCounts = ["default"] + + configurations = [] + for choice in args.set: + expanded = expand_wild_cards(choice) + if expanded == "baseline": + configurations += baseline + elif expanded == "default": + configurations = default + else: + configurations += [expanded] + + print "configurations:", configurations + print "benchmarks:", benchmarks + print "heap sizes:", sizes + print "GC thread counts:", gcThreadCounts + + should_fetch = False + for conf in configurations: + if '@' in conf and not conf.endswith("@HEAD"): + should_fetch = True + break + + if args.argnames != None and args.argv != None: + derived_configs = [] + argnames = args.argnames.split(",") + for valset in args.argv : + values = valset.split(",") + suffix = "-a" + ("-".join(values)) + extra_args = dict() + for (name, value) in zip(argnames, values): + if name.endswith("?"): + if value in ["1", "true", "TRUE", "True"]: + extra_args[name[:-1]] = True + else: + extra_args[name] = value + derived_configs.append((suffix, extra_args)) + else: + derived_configs = [("", dict())] + + if should_fetch: + git_fetch(scala_native_dir) + + suffix = "" + if runs != default_runs: + suffix += "-r" + str(runs) + if batches != default_batches: + suffix += "-b" + str(batches) + if par != default_par: + suffix += "-p" + str(par) + + if args.sperf: + perf = "sudo" + suffix +="-Perf" + elif args.perf: + perf = "normal" + suffix +="-Perf" + else: + perf = None - if os.path.exists(os.path.join('confs', conf, 'plugins.sbt')): - sh.copyfile(os.path.join('confs', conf, 'plugins.sbt'), 'project/plugins.sbt') + if args.gcvv: + suffix += "-gcvv" + gcstats = "_SYNC" + elif args.gcv: + suffix += "-gcv" + gcstats = "_BATCHES" + elif args.gc: + suffix += "-gc" + gcstats = "" + else: + gcstats = None + + if args.gcdebug: + suffix += "-gcdebug" + if args.gctrace: + suffix += "-gctrace" + if args.suffix is not None: + suffix += "_" + args.suffix + + failed = [] + skipped = [] + compile_fail = [] + result_dirs = [] + symlinks = [] + + pool = None + if par > 1: + pool = mp.Pool(par) + + for conf in configurations: + conf_name, ref = ref_parse(conf) + + if ref == None: + sha1 = None + else: + sha1 = get_ref(ref) + if sha1 == None: + compile_fail += [conf] + continue + + if sha1 != None: + success = compile_scala_native(ref, sha1) + if not success: + compile_fail += [conf] + continue + + # derived configurations + for (der_suffix, extra_args) in derived_configs: + generalized_dir = os.path.join('results', conf + suffix + der_suffix) + if sha1 == None: + root_dir = generalized_dir + der_suffix else: - os.remove('project/plugins.sbt') + root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix + der_suffix) + + mkdir(root_dir) + symlink = None + if generalized_dir != root_dir: + create_symlink(generalized_dir, root_dir) + symlinks += [[generalized_dir, root_dir]] + symlink = generalized_dir + if args.upload or args.gitupload: + create_symlink(os.path.join(upload_dir, generalized_dir), root_dir) + + # subconfigurations + for (size, gcThreads) in itertools.product(sizes, gcThreadCounts): + + if size == ["default", "default"] and gcThreads == "default": + subconfig_dir = root_dir + else: + size_str = [] + if size != ["default", "default"] : + size_str = ["size_" + size[0] + "-" + size[1]] + gcThreads_str = [] + if gcThreads != "default": + gcThreads_str = ["gcthreads_" + gcThreads] + subconf_str = "_".join(size_str + gcThreads_str) + subconfig_dir = os.path.join(root_dir, subconf_str) + + if not args.overwrite and os.path.isfile(os.path.join(subconfig_dir, ".complete")): + print subconfig_dir, "already complete, skipping" + skipped += [subconfig_dir] + continue + + if not args.append: + sh.rmtree(subconfig_dir, ignore_errors=True) + + mkdir(subconfig_dir) + + for bconf in benchmarks: + if type(bconf) is tuple: + bench, input = bconf + bfullname = bench + "@" + input + else: + bench = bconf + input = slurp(os.path.join('input', bench)) + bfullname = bench + print('--- heap size: {} GC threads: {} conf: {}, bench: {}'.format(size, gcThreads, conf, bfullname)) + + output = slurp(os.path.join('output', bench)) + compilecmd = slurp(os.path.join('confs', conf_name, 'compile')) + runcmd = slurp(os.path.join('confs', conf_name, 'run')) \ + .replace('$BENCH', bench) \ + .replace('$HOME', os.environ['HOME']).split(' ') + + if os.path.exists(os.path.join('confs', conf_name, 'build.sbt')): + sh.copyfile(os.path.join('confs', conf_name, 'build.sbt'), 'build.sbt') + else: + os.remove('build.sbt') + + if os.path.exists(os.path.join('confs', conf_name, 'plugins.sbt')): + sh.copyfile(os.path.join('confs', conf_name, 'plugins.sbt'), 'project/plugins.sbt') + else: + os.remove('project/plugins.sbt') + + compile_success = compile(conf, bench, compilecmd, gcstats, args.gcdebug, args.gctrace, extra_args) + if not compile_success: + compile_fail += [conf] + break + + resultsdir = os.path.join(subconfig_dir, bfullname) + print "results in", resultsdir + mkdir(resultsdir) + + if conf.startswith("scala-native") and perf in ["sudo", "normal"]: + # perf needs the original binary for the reports to make any sense + runnable = runcmd[0] + saved_binary = os.path.join(resultsdir, "binary") + sh.copyfile(runnable, saved_binary) + os.chmod(saved_binary, 0775) + runcmd = [saved_binary] + runcmd[1:] + + cmd = [] + cmd.extend(runcmd) + cmd.extend([str(batches), str(batch_size), input, output]) + + to_run = [] + for n in xrange(runs): + to_run += [ + dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=gcstats, + size=size, gcThreads=gcThreads, perf = perf)] + + if par == 1: + for tr in to_run: + failed += single_run(tr) + else: + failed += sum(pool.map(single_run, to_run), []) + + # mark it as complete + open(os.path.join(subconfig_dir, ".complete"), 'w+').close() + result_dirs += [subconfig_dir] + + if args.upload or args.gitupload: + upload(subconfig_dir, symlink, args.gitupload, args.overwrite) + + print "results:" + for dir in result_dirs: + print dir + + if len(symlinks) > 0: + print("{} symlinks ".format(len(symlinks))) + for symlink in symlinks: + print symlink[0], "->", symlink[1] + - compile(bench, compilecmd) - resultsdir = os.path.join('results', conf, bench) - mkdir(resultsdir) + if len(compile_fail) > 0: + print("{} compilation failed ".format(len(failed))) + for skip in compile_fail: + print skip - for n in xrange(runs): - print('--- run {}/{}'.format(n, runs)) + if len(skipped) > 0: + print("{} benchmarks skipped ".format(len(failed))) + for skip in skipped: + print skip - cmd = [] - cmd.extend(runcmd) - cmd.extend([str(batches), str(batch_size), input, output]) - out = run(cmd) - with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: - resultfile.write(out) + if len(failed) > 0: + print("{} benchmarks failed ".format(len(failed))) + for fail in failed: + print fail + if len(compile_fail) > 0 or len(failed) > 0: + exit(1) diff --git a/scripts/start.sh b/scripts/start.sh new file mode 100755 index 0000000..d6c4d46 --- /dev/null +++ b/scripts/start.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +setsid scripts/start0.sh \ No newline at end of file diff --git a/scripts/start0.sh b/scripts/start0.sh new file mode 100755 index 0000000..3c50eca --- /dev/null +++ b/scripts/start0.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +echo $$ | tee .pid + +if [ -f jobs.sh ]; then + mkdir -p logs + ./jobs.sh | tee logs/job_$(date +%Y%m%d_%H%M%S).log +fi + +rm .pid \ No newline at end of file diff --git a/scripts/stop.sh b/scripts/stop.sh new file mode 100755 index 0000000..b6deb96 --- /dev/null +++ b/scripts/stop.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +if [ -f .pid ]; then + PID=$(cat .pid) + kill -9 $(ps -s $PID -o pid=) + rm .pid +fi + diff --git a/scripts/summary.py b/scripts/summary.py old mode 100644 new mode 100755 index b953295..cec2423 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1,43 +1,1270 @@ -from run import benchmarks, runs, configurations +#!/usr/bin/env python2 +from run import mkdir, expand_wild_cards, generate_choices +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches import numpy as np +import time +import os +import argparse -def config_data(bench, conf): - out = [] - for run in xrange(runs): + +def config_data_goodruns(bench, conf, warmup, p=50): + benchmark_dir = os.path.join("results", conf, bench) + files = next(os.walk(benchmark_dir), [[], [], []])[2] + runs = [] + for file in files: + if "." not in file: + # regular benchmark data + runs.append(file) + + points_with_50percentile = [] + for run in runs: try: - points = [] - with open('results/{}/{}/{}'.format(conf, bench, run)) as data: + raw_points = [] + with open(os.path.join("results", conf, bench, run)) as data: for line in data.readlines(): - points.append(float(line)) - # take only last 1000 to account for startup - points = points[-1000:] - # filter out 1% worst measurements as outliers - pmax = np.percentile(points, 99) - for point in points: - if point <= pmax: - out.append(point) + try: + # in ms + raw_points.append(float(line) / 1000000) + except Exception as e: + print e + points = raw_points[warmup:] + points_with_50percentile += [(points, np.percentile(points, p))] except IOError: pass + to_discard = int(0.2 * len(points_with_50percentile)) + if to_discard > 0: + sorted_arr = sorted(points_with_50percentile, key=lambda x: -x[1]) + out = map(lambda x: x[0], sorted_arr[to_discard:]) + else: + out = map(lambda x: x[0], points_with_50percentile) + return np.array(sum(out, [])) + + +def config_data_run(bench, conf, run, warmup): + out = [] + try: + points = [] + with open(os.path.join("results", conf, bench, str(run))) as data: + for line in data.readlines(): + try: + # in ms + points.append(float(line) / 1000000) + except Exception as e: + print e + out = points[warmup:] + except IOError: + pass return np.array(out) -def peak_performance(): + +def gc_pauses_main_thread(bench, conf): + benchmark_dir = os.path.join("results", conf, bench) + files = next(os.walk(benchmark_dir), [[], [], []])[2] + runs = [] + for file in files: + if file.endswith(".gc.csv"): + # gc stats data + runs.append(file) + + mark_times = [] + sweep_times = [] + gc_times = [] + for run in runs: + try: + file = os.path.join("results", conf, bench, run) + with open(file) as data: + mark, sweep, total = gc_parse_file(data, file) + mark_times += mark + sweep_times += sweep + gc_times += total + except IOError: + pass + return np.array(mark_times), np.array(sweep_times), np.array(gc_times) + + +def gc_parse_file(data, file): + header = data.readline().strip() + if header.startswith("event_type,"): + return parse_gc_pause_events(data, file, header) + else: + return parse_gc_tabular(data, file, header) + + +def parse_gc_pause_events(data, file, header): + mark_times = [] + sweep_times = [] + gc_times = [] + event_type_index = 0 + time_ns_index = -1 + ns_to_ms_div = 1000 * 1000 + for i, h in enumerate(header.split(',')): + if h == "time_ns": + time_ns_index = i + if time_ns_index == -1: + print "Header does not have time_ns", header, "at", file + return mark_times, sweep_times, gc_times + + for line in data.readlines(): + arr = line.split(",") + event = arr[event_type_index] + try: + time = float(arr[time_ns_index]) / ns_to_ms_div + if event == "mark": + mark_times.append(time) + elif event == "sweep": + sweep_times.append(time) + if event == "mark" or event == "sweep": + gc_times.append(time) + except Exception as e: + print e + + return mark_times, sweep_times, gc_times + + +def parse_gc_tabular(data, file, header): + mark_times = [] + sweep_times = [] + gc_times = [] + # analise header + mark_index = -1 + sweep_index = -1 + mark_to_ms = 0 + sweep_to_ms = 0 + unit2div = dict(ms=1, us=1000, ns=1000 * 1000) + for i, h in enumerate(header.split(',')): + arr = h.rsplit('_', 1) + if len(arr) != 2: + continue + prefix = arr[0] + unit = arr[1] + + if prefix == "mark_time": + mark_index = i + mark_to_ms = unit2div[unit] + elif prefix == "sweep_time": + sweep_index = i + sweep_to_ms = unit2div[unit] + if mark_index == -1: + print "Header does not have mark_time_", header, "at", file + if sweep_index == -1: + print "Header does not have sweep_time_", header, "at", file + if mark_index == -1 or sweep_index == -1: + return mark_times, sweep_times, gc_times + for line in data.readlines(): + arr = line.split(",") + # in ms + mark_time = float(arr[mark_index]) / mark_to_ms + mark_times.append(mark_time) + sweep_time = float(arr[sweep_index]) / sweep_to_ms + sweep_times.append(sweep_time) + gc_times.append(mark_time + sweep_time) + return mark_times, sweep_times, gc_times + + +def merge_or_create(dict1, dict2): + for key1 in dict1.keys(): + if dict2.has_key(key1): + dict1[key1].append(dict2[key1]) + for key2 in dict2.keys(): + if not dict1.has_key(key2): + dict1[key2] = dict2[key2] + + +def gc_events_for_last_n_collections(bench, conf, run=3, n=1): + benchmark_dir = os.path.join("results", conf, bench) + files = next(os.walk(benchmark_dir), [[], [], []])[2] + main_file_name = str(run) + ".gc.csv" + parts = [] + for file in files: + if file.startswith(main_file_name): + parts.append(file) + + collection_events = [] + for part in parts: + try: + file = os.path.join("results", conf, bench, part) + with open(file) as data: + header = data.readline().strip() + collection_events0, _, _, _ = parse_events(data, file, header) + collection_events += collection_events0 + except IOError: + pass + + collection_events = collection_events[-n:] + if len(collection_events) == 0: + return [], dict(), dict(), dict() + + min_time = collection_events[0][1] + time_filter = (lambda t: t > min_time) + + phase_events_by_thread = dict() + batch_events_by_thread = dict() + internal_events_by_thread = dict() + + for part in parts: + try: + file = os.path.join("results", conf, bench, part) + with open(file) as data: + header = data.readline().strip() + # no collection events on other threads + _, phase_events_by_thread0, batch_events_by_thread0, internal_events_by_thread0 = parse_events(data, file, header, time_filter) + merge_or_create(phase_events_by_thread, phase_events_by_thread0) + merge_or_create(batch_events_by_thread, batch_events_by_thread0) + merge_or_create(internal_events_by_thread, internal_events_by_thread0) + except IOError: + pass + + return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread + + +def append_or_create(dict, key, value): + if dict.has_key(key): + dict[key].append(value) + else: + dict[key] = [value] + + +phase_event_types = ["mark", "sweep", "concmark", "concsweep"] +batch_events_types = ["mark_batch", "sweep_batch", "coalesce_batch", "mark_waiting"] +internal_events_types = ["sync"] + + +# event = [type, start, end] +def parse_events(data, file, header, timeFilter=(lambda t: True)): + collection_types = ["collection"] + + collection_events = [] + phase_events_by_thread = dict() + batch_events_by_thread = dict() + internal_events_by_thread = dict() + + event_type_index = 0 + start_ns_index = -1 + time_ns_index = -1 + thread_index = -1 + ns_to_ms_div = 1000 * 1000 + for i, h in enumerate(header.split(',')): + if h == "start_ns": + start_ns_index = i + if h == "time_ns": + time_ns_index = i + if h == "gc_thread": + thread_index = i + + if start_ns_index == -1: + print "Header does not have start_ns", header, "at", file + if time_ns_index == -1: + print "Header does not have time_ns", header, "at", file + if thread_index == -1: + print "Header does not have gc_thread", header, "at", file + if start_ns_index == -1 or time_ns_index == -1 or thread_index == -1: + return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread + + for line in data.readlines(): + arr = line.split(",") + event = arr[event_type_index] + start = float(arr[start_ns_index]) / ns_to_ms_div + if not timeFilter(start): + continue + time = float(arr[time_ns_index]) / ns_to_ms_div + thread = arr[thread_index] + if event in collection_types: + collection_events.append([event, start, time]) + elif event in phase_event_types: + append_or_create(phase_events_by_thread, thread, [event, start, time]) + elif event in batch_events_types: + append_or_create(batch_events_by_thread, thread, [event, start, time]) + elif event in internal_events_types: + append_or_create(internal_events_by_thread, thread, [event, start, time]) + + return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread + + +def parse_batch_times(data, file, header): + + mark_batches = [] + sweep_batches = [] + + event_type_index = 0 + start_ns_index = -1 + time_ns_index = -1 + thread_index = -1 + ns_to_ms_div = 1000 * 1000 + for i, h in enumerate(header.split(',')): + if h == "start_ns": + start_ns_index = i + if h == "time_ns": + time_ns_index = i + if h == "gc_thread": + thread_index = i + + if start_ns_index == -1: + print "Header does not have start_ns", header, "at", file + if time_ns_index == -1: + print "Header does not have time_ns", header, "at", file + if thread_index == -1: + print "Header does not have gc_thread", header, "at", file + if start_ns_index == -1 or time_ns_index == -1 or thread_index == -1: + return mark_batches, sweep_batches + + for line in data.readlines(): + arr = line.split(",") + event = arr[event_type_index] + time = float(arr[time_ns_index]) / ns_to_ms_div + if event == "mark_batch": + mark_batches.append(time) + elif event == "sweep_batch": + sweep_batches.append(time) + + return mark_batches, sweep_batches + + +def gc_batch_times(bench, conf): + benchmark_dir = os.path.join("results", conf, bench) + files = next(os.walk(benchmark_dir), [[], [], []])[2] + parts = [] + for file in files: + if ".gc.csv" in file: + parts.append(file) + sweep_batches = [] + mark_batches = [] + for part in parts: + try: + file = os.path.join("results", conf, bench, part) + with open(file) as data: + header = data.readline().strip() + mark_batches0, sweep_batches0 = parse_batch_times(data, file, header) + mark_batches += mark_batches0 + sweep_batches += sweep_batches0 + except IOError: + pass + + return mark_batches, sweep_batches + + +def gc_stats_total(bench, conf): + _, _, total = gc_pauses_main_thread(bench, conf) + return total + + +def gc_stats_mark_batches(bench, conf): + mark,_ = gc_batch_times(bench, conf) + return np.array(mark) + + +def gc_stats_sweep_batches(bench, conf): + _, sweep = gc_batch_times(bench, conf) + return np.array(sweep) + + +def percentile_gc(configurations, benchmarks, percentile): + out_mark = [] + out_sweep = [] + out_total = [] + for bench in benchmarks: + res_mark, res_sweep, res_total = percentile_gc_bench(configurations, bench, percentile) + out_mark.append(res_mark) + out_sweep.append(res_sweep) + out_total.append(res_total) + + return out_mark, out_sweep, out_total + + +def total_gc(configurations, benchmarks): + out_mark = [] + out_sweep = [] + out_total = [] + for bench in benchmarks: + res_mark, res_sweep, res_total = total_gc_bench(configurations, bench) + out_mark.append(res_mark) + out_sweep.append(res_sweep) + out_total.append(res_total) + return out_mark, out_sweep, out_total + + +def percentile_gc_bench(configurations, bench, p): + res_mark = [] + res_sweep = [] + res_total = [] + for conf in configurations: + try: + mark, sweep, total = gc_pauses_main_thread(bench, conf) + res_mark.append(np.percentile(mark, p)) + res_sweep.append(np.percentile(sweep, p)) + res_total.append(np.percentile(total, p)) + except IndexError: + res_mark.append(0) + res_sweep.append(0) + res_total.append(0) + return res_mark, res_sweep, res_total + + +def total_gc_bench(configurations, bench): + res_mark = [] + res_sweep = [] + res_total = [] + for conf in configurations: + try: + mark, sweep, total = gc_pauses_main_thread(bench, conf) + res_mark.append(np.sum(mark)) + res_sweep.append(np.sum(sweep)) + res_total.append(np.sum(total)) + except IndexError: + res_mark.append(0) + res_sweep.append(0) + res_total.append(0) + return res_mark, res_sweep, res_total + + +def percentile_gc_bench_mark(configurations, bench, p): + mark, _, _ = percentile_gc_bench(configurations, bench, p) + return mark + + +def percentile_gc_bench_sweep(configurations, bench, p): + _, sweep, _ = percentile_gc_bench(configurations, bench, p) + return sweep + + +def percentile_gc_bench_total(configurations, bench, p): + _, _, total = percentile_gc_bench(configurations, bench, p) + return total + + +def percentile(configurations, benchmarks, warmup, p): + out = [] + for bench in benchmarks: + out.append(percentile_bench(configurations, bench, warmup, p)) + return out + + +def percentile_bench(configurations, bench, warmup, p): + res = [] + for conf in configurations: + try: + res.append(np.percentile(config_data_goodruns(bench, conf, warmup, p), p)) + except IndexError: + res.append(0) + return res + + +def totals(configurations, benchmarks, warmup): out = [] for bench in benchmarks: + out.append(totals_bench(configurations, bench, warmup)) + return out + + +def totals_bench(configurations, bench, warmup): + res = [] + for conf in configurations: + try: + res.append(np.sum(config_data_goodruns(bench, conf, warmup))) + except IndexError: + res.append(0) + return res + + +def bar_chart_relative(plt, configurations, benchmarks, data): + plt.clf() + plt.cla() + ind = np.arange(len(benchmarks)) + conf_count = len(configurations) + 1 + base = [] + ref = [] + for bench_idx, bench in enumerate(benchmarks): + try: + base_val = data[bench_idx][0] + if base_val > 0: + base.append(base_val) + ref.append(1.0) + else: + base.append(0.0) + ref.append(0.0) + except IndexError: + base.append(0.0) + ref.append(0.0) + plt.bar(ind * conf_count, ref, label=configurations[0]) + + for i, conf in enumerate(configurations[1:]): + conf_idx = i + 1 res = [] - for conf in configurations: + for bench_idx, (bench, base_val) in enumerate(zip(benchmarks, base)): try: - res.append(np.percentile(config_data(bench, conf), 50)) + if base_val > 0: + res.append(data[bench_idx][conf_idx] / base_val) + else: + res.append(0.0) except IndexError: res.append(0) - out.append(res) - return out + plt.bar(ind * conf_count + conf_idx, res, label=conf) + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) + plt.legend() + return plt -if __name__ == '__main__': + +def total_execution_times(plt, configurations, benchmarks, data): + plt = bar_chart_relative(plt, configurations, benchmarks, data) + plt.title("Total test execution times against " + configurations[0]) + return plt + + +def relative_execution_times(plt, configurations, benchmarks, data, p): + plt = bar_chart_relative(plt, configurations, benchmarks, data) + plt.title("Relative test execution times against " + configurations[0] + " at " + str(p) + " percentile") + return plt + + +def relative_gc_pauses(plt, configurations, benchmarks, data, p): + plt = bar_chart_relative(plt, configurations, benchmarks, data) + plt.title("Relative GC pauses against " + configurations[0] + " at " + str(p) + " percentile") + return plt + + +def bar_chart_gc_relative(plt, configurations, benchmarks, mark_data, total_data): + plt.clf() + plt.cla() + ind = np.arange(len(benchmarks)) + conf_count = len(configurations) + 1 + base = [] + ref = [] + mark_ref = [] + for bench_idx, bench in enumerate(benchmarks): + mark = mark_data[bench_idx][0] + total = total_data[bench_idx][0] + if total > 0: + base.append(total) + ref.append(1.0) + mark_ref.append(mark / total) + else: + base.append(0) + ref.append(0.0) + mark_ref.append(0.0) + plt.bar(ind * conf_count, ref, label=configurations[0] + "-sweep") # total (look like sweep) + plt.bar(ind * conf_count, mark_ref, label=configurations[0] + "-mark") # mark time + + for i, conf in enumerate(configurations[1:]): + conf_idx = i + 1 + res = [] + mark_res = [] + for bench_idx, (bench, base_val) in enumerate(zip(benchmarks, base)): + if base_val > 0: + mark, _, total = gc_pauses_main_thread(bench, conf) + mark = mark_data[bench_idx][conf_idx] + total = total_data[bench_idx][conf_idx] + res.append(np.array(total) / base_val) + mark_res.append(np.array(mark) / base_val) + else: + res.append(0) + mark_res.append(0) + plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) + plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) + plt.title("Relative gc times against " + configurations[0]) + plt.legend() + return plt + + +def bar_chart_gc_absolute(plt, configurations, benchmarks, percentile): + plt.clf() + plt.cla() + ind = np.arange(len(benchmarks)) + conf_count = len(configurations) + 1 + + for i, conf in enumerate(configurations): + res = [] + mark_res = [] + for bench in benchmarks: + try: + mark, _, total = gc_pauses_main_thread(bench, conf) + res.append(np.percentile(total, percentile)) + mark_res.append(np.percentile(mark, percentile)) + except IndexError: + res.append(0) + plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) + plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) + plt.title("Garbage collector pause times at " + str(percentile) + " percentile") + plt.legend() + return plt + + +def example_run_plot(plt, configurations, bench, run=3, lastn=-1): + plt.clf() + plt.cla() + + for conf in configurations: + rawpoints = [] + try: + with open('results/{}/{}/{}'.format(conf, bench, run)) as data: + for line in data.readlines(): + try: + rawpoints.append(float(line) / 1000000) + except Exception as e: + print e + except IOError: + pass + + total_len = len(rawpoints) + if lastn != -1: + first = total_len - lastn + else: + first = 0 + ind = np.arange(first, total_len) + points = rawpoints[first:] + plt.plot(ind, points, label=conf) + plt.title("{} run #{}".format(bench, str(run))) + plt.xlabel("Iteration") + plt.ylabel("Run time (ms)") + plt.legend() + return plt + + +def example_all_runs_plot(plt, conf, bench, lastn=-1): + plt.clf() + plt.cla() + max_run = find_last_run(conf, bench) + + for run in np.arange(0, max_run + 1): + rawpoints = [] + try: + with open('results/{}/{}/{}'.format(conf, bench, run)) as data: + for line in data.readlines(): + try: + rawpoints.append(float(line) / 1000000) + except Exception as e: + print e + except IOError: + pass + + total_len = len(rawpoints) + if total_len == 0: + continue + if lastn != -1: + first = total_len - lastn + else: + first = 0 + ind = np.arange(first, total_len) + points = rawpoints[first:] + plt.plot(ind, points, label=run) + plt.title("{} all runs for {}".format(bench, conf)) + plt.xlabel("Iteration") + plt.ylabel("Run time (ms)") + plt.legend() + return plt + + +def to_gb(size_str): + if size_str[-1] == "k" or size_str[-1] == "K": + return float(size_str[:-1]) / 1024 / 1024 + elif size_str[-1] == "m" or size_str[-1] == "M": + return float(size_str[:-1]) / 1024 + elif size_str[-1] == "g" or size_str[-1] == "G": + return float(size_str[:-1]) + else: + # bytes + return float(size_str) / 1024 / 1024 / 1024 + + +def sizes_per_conf(parent_configuration): + parent_folder = os.path.join("results", parent_configuration) + min_sizes = [] + max_sizes = [] + child_confs = [] + folders = next(os.walk(parent_folder))[1] + for f in folders: + if f.startswith("size_"): + parts = f[len("size_"):].split("-") + min_sizes.append(to_gb(parts[0])) + max_sizes.append(to_gb(parts[1])) + child_confs.append(os.path.join(parent_configuration, f)) + return min_sizes, max_sizes, child_confs + + +def size_compare_chart_generic(plt, parent_configurations, bench, get_percentile, p): + plt.clf() + plt.cla() + for parent_conf in parent_configurations: + min_sizes, max_sizes, child_confs = sizes_per_conf(parent_conf) + equal_sizes = [] + equal_confs = [] + for min_size, max_size, child_conf in zip(min_sizes, max_sizes, child_confs): + if min_size == max_size: + equal_sizes.append(min_size) + equal_confs.append(child_conf) + + # sorts all by size in GB + equal_sizes, equal_confs = zip(*[(x, y) for x, y in sorted(zip(equal_sizes, equal_confs))]) + percentiles = get_percentile(equal_confs, bench, p) + plt.plot(np.array(equal_sizes), percentiles, label=parent_conf) + plt.legend() + plt.xlim(xmin=0) + plt.ylim(ymin=0) + plt.xlabel("Heap Size (GB)") + + return plt + + +def size_compare_chart_gc_combined(plt, parent_configurations, bench): + plt.clf() + plt.cla() + for parent_conf in parent_configurations: + min_sizes, max_sizes, child_confs = sizes_per_conf(parent_conf) + equal_sizes = [] + equal_confs = [] + for min_size, max_size, child_conf in zip(min_sizes, max_sizes, child_confs): + if min_size == max_size: + equal_sizes.append(min_size) + equal_confs.append(child_conf) + + # sorts all by size in GB + equal_sizes, equal_confs = zip(*[(x, y) for x, y in sorted(zip(equal_sizes, equal_confs))]) + + mark, _, total = total_gc_bench(equal_confs, bench) + plt.plot(np.array(equal_sizes), total, label=parent_conf + "-sweep") # total (look like sweep) + plt.plot(np.array(equal_sizes), mark, label=parent_conf + "-mark") # mark time + plt.legend() + plt.xlim(xmin=0) + plt.ylim(ymin=0) + plt.xlabel("Heap Size (GB)") + plt.title("{}: GC total time".format(bench)) + plt.ylabel("Time (ms)") + + return plt + + +def size_compare_chart(plt, parent_configurations, bench, warmup, p): + plt = size_compare_chart_generic(plt, parent_configurations, bench, lambda configurations, benchmark, p: percentile_bench(configurations, benchmark, warmup, p), p) + plt.title("{} at {} percentile".format(bench, p)) + plt.ylabel("Run time (ms)") + return plt + + +def size_compare_chart_gc(plt, parent_configurations, bench, p): + plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_total, p) + plt.title("{}: GC pause time at {} percentile".format(bench, p)) + plt.ylabel("GC pause time (ms)") + return plt + + +def size_compare_chart_gc_mark(plt, parent_configurations, bench, p): + plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_mark, p) + plt.title("{}: GC mark pause time at {} percentile".format(bench, p)) + plt.ylabel("GC mark time (ms)") + return plt + + +def size_compare_chart_gc_sweep(plt, parent_configurations, bench, p): + plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_sweep, p) + plt.title("{}: GC sweep pause time at {} percentile".format(bench, p)) + plt.ylabel("GC sweep time (ms)") + return plt + + +def percentiles_chart_generic_runs(plt, conf, bench, get_data, first, last, step): + plt.clf() + plt.cla() + max_run = find_last_run(conf, bench) + + for run in np.arange(0, max_run + 1): + data = get_data(bench, conf, run) + if data.size > 0: + percentiles = filter(lambda x: 0 <= x <= 100, np.arange(first, last + step, step)) + percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) + plt.plot(percentiles, percvalue, label=run) + plt.legend() + plt.ylim(ymin=0) + plt.xlabel("Percentile") + return plt + + +def percentiles_chart_generic(plt, configurations, bench, get_data, first, last, step): + plt.clf() + plt.cla() + for conf in configurations: + data = get_data(bench, conf) + if data.size > 0: + percentiles = filter(lambda x: 0 <= x <= 100, np.arange(first, last + step, step)) + percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) + plt.plot(percentiles, percvalue, label=conf) + plt.legend() + plt.ylim(ymin=0) + plt.xlabel("Percentile") + return plt + + +def percentiles_chart(plt, configurations, bench, warmup, first=0, last=100, step=0.1): + plt = percentiles_chart_generic(plt, configurations, bench, lambda bench, conf : config_data_goodruns(bench, conf, warmup), first, last, step) + plt.title(bench) + plt.ylabel("Run time (ms)") + return plt + + +def percentiles_chart_runs(plt, conf, bench, warmup, first=0, last=100, step=0.1): + plt = percentiles_chart_generic_runs(plt, conf, bench, + lambda bench, conf, run: config_data_run(bench, conf, run, warmup), first, + last, step) + plt.title(bench + " " + conf) + plt.ylabel("Run time (ms)") + return plt + + +def gc_pause_time_chart(plt, configurations, bench, first=0, last=100, step=0.1): + plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_total, first, last, step) + plt.title(bench + ": Garbage Collector Pause Times") + plt.ylabel("GC pause time (ms)") + return plt + + +def gc_mark_batch_time_chart(plt, configurations, bench, first=0, last=100, step=0.1): + plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_mark_batches, first, last, step) + plt.title(bench + ": Mark Batch Times") + plt.ylabel("Mark Batch Time (ms)") + return plt + +def gc_sweep_batch_time_chart(plt, configurations, bench, first=0, last=100, step=0.1): + plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_sweep_batches, first, last, step) + plt.title(bench + ": Sweep Batch Times") + plt.ylabel("Sweep Batch Time (ms)") + return plt + + +def print_table(configurations, benchmarks, data): leading = ['name'] for conf in configurations: leading.append(conf) print ','.join(leading) - for bench, res in zip(benchmarks, peak_performance()): + for bench, res in zip(benchmarks, data): print ','.join([bench] + list(map(str, res))) + +def thread_id_tostring(n): + if int(n) < 0: + return "mutator" + n + else: + return n + + +def gc_gantt_chart(plt, conf, bench, data, only_batches = False): + plt.clf() + plt.cla() + plt.figure(figsize=(100, 24)) + labels = [] + collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread = data + + values = [] + event_type_to_color = { + "mark": ("red", "darkred"), "sweep": ("blue", "darkblue"), "concmark": ("red", "darkred"), + "concsweep": ("blue", "darkblue"), + "mark_batch": ("red", "darkred"), "sweep_batch": ("blue", "darkblue"), "coalesce_batch": ("green", "darkgreen"), + "mark_waiting": ("grey", "dimgrey"), "sync": ("yellow", "gold"), + } + + all_keys = phase_events_by_thread.keys() + batch_events_by_thread.keys() + internal_events_by_thread.keys() + all_threads = sorted(list(set(all_keys))) + + for thread in all_threads: + end = len(labels) + labels.append(thread_id_tostring(thread)) + phase_values = phase_events_by_thread.get(thread, []) + batch_values = batch_events_by_thread.get(thread, []) + internal_values = internal_events_by_thread.get(thread, []) + if not only_batches: + values = [] + for e in collection_events: + # [event, start, time] => (start, time) + values.append((e[1], e[2])) + plt.broken_barh(values, (end, 0.25), color="black", label="collection") + for et in phase_event_types: + values = [] + for e in phase_values: + event = e[0] + start = e[1] + time = e[2] + if event == et: + values.append((start, time)) + plt.broken_barh(values, (end + 0.25, 0.25), facecolors=event_type_to_color[et], label=et) + for et in batch_events_types: + values = [] + for e in batch_values: + event = e[0] + start = e[1] + time = e[2] + if event == et: + values.append((start, time)) + if only_batches: + plt.broken_barh(values, (end, 0.5), facecolors=event_type_to_color[et], label=et) + else: + plt.broken_barh(values, (end + 0.50, 0.25), facecolors=event_type_to_color[et], label=et) + for et in internal_events_types: + values = [] + for e in internal_values: + event = e[0] + start = e[1] + time = e[2] + if event == et: + values.append((start, time)) + if only_batches: + plt.broken_barh(values, (end + 0.5, 0.5), facecolors=event_type_to_color[et], label=et) + else: + plt.broken_barh(values, (end + 0.75, 0.25), facecolors=event_type_to_color[et], label=et) + + plt.yticks(np.arange(len(labels)), labels) + plt.xlabel("Time since start (ms)") + plt.title(conf + " " + bench + " last garbage collection") + plt.legend(handles=[(mpatches.Patch(color='black', label='collection')), + (mpatches.Patch(color='red', label='mark')), + (mpatches.Patch(color='blue', label='sweep')), + (mpatches.Patch(color='green', label='coalesce')), + (mpatches.Patch(color='grey', label='mark waiting')), + (mpatches.Patch(color='yellow', label='sync')), + ]) + + return plt + + +def write_md_table(file, configurations, benchmarks, data): + header = ['name'] + header.append(configurations[0]) + for conf in configurations[1:]: + header.append(conf) + header.append("") + file.write('|') + file.write(' | '.join(header)) + file.write('|\n') + + file.write('|') + for _ in header: + file.write(' -- |') + file.write('\n') + + gmul = np.ones(len(configurations) - 1) + gcount = np.zeros(len(configurations) - 1) + for bench, res0 in zip(benchmarks, data): + base = res0[0] + res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) + file.write('|') + file.write('|'.join([benchmark_md_link(bench)] + list(res))) + file.write('|\n') + + for i, d0 in enumerate(res0[1:]): + if d0 != 0 and base != 0: + gmul[i] *= (float(d0) / base) + gcount[i] += 1 + + file.write('| __Geometrical mean:__|') + for gm, count in zip(gmul, gcount): + file.write('| |') + if count > 0: + gmean = float(gm) ** (1.0 / count) + percent_diff = (gmean - 1) * 100 + precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ( + "" if percent_diff > 0 else "__") + file.write(precent_diff_cell) + else: + file.write(" ") + file.write("|\n") + + +def write_md_table_gc(file, configurations, benchmarks, mark_data, sweep_data, total_data): + header = ['name', ""] + header.append(configurations[0]) + for conf in configurations[1:]: + header.append(conf) + header.append("") + file.write('|') + file.write(' | '.join(header)) + file.write('|\n') + + file.write('|') + for _ in header: + file.write(' -- |') + file.write('\n') + + mark_gmul = np.ones(len(configurations) - 1) + mark_gcount = np.zeros(len(configurations) - 1) + sweep_gmul = np.ones(len(configurations) - 1) + sweep_gcount = np.zeros(len(configurations) - 1) + total_gmul = np.ones(len(configurations) - 1) + total_gcount = np.zeros(len(configurations) - 1) + for bench, mark_res0, sweep_res0, total_res0 in zip(benchmarks, mark_data, sweep_data, total_data): + for name, res0, gmul, gcount in zip(["mark", "sweep", "total"], [mark_res0, sweep_res0, total_res0], + [mark_gmul, sweep_gmul, total_gmul], + [mark_gcount, sweep_gcount, total_gcount]): + base = res0[0] + res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) + + if name == "mark": + link = [benchmark_md_link(bench)] + else: + link = [""] + + file.write('|') + file.write('|'.join(link + list([name]) + list(res))) + file.write('|\n') + + for i, d0 in enumerate(res0[1:]): + if d0 != 0 and base != 0: + gmul[i] *= (float(d0) / base) + gcount[i] += 1 + + for name, gmul, gcount in zip(["mark", "sweep", "total"], + [mark_gmul, sweep_gmul, total_gmul], + [mark_gcount, sweep_gcount, total_gcount]): + if name == "mark": + link = "__Geometrical mean:__" + else: + link = "" + + file.write('|' + link + '|' + name + '|') + for gm, count in zip(gmul, gcount): + file.write('| |') + if count > 0: + gmean = float(gm) ** (1.0 / count) + percent_diff = (gmean - 1) * 100 + precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ( + "" if percent_diff > 0 else "__") + file.write(precent_diff_cell) + else: + file.write(" ") + file.write("|\n") + + +def cell(x, base): + if base > 0: + percent_diff = (float(x) / base - 1) * 100 + precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ( + "" if percent_diff > 0 else "__") + else: + precent_diff_cell = "N/A" + return [("%.4f" % x), precent_diff_cell] + + +def benchmark_md_link(bench): + return "[{}](#{})".format(bench, bench.replace(".", "").lower()) + + +def benchmark_short_name(bench): + return bench.split(".")[0] + + +def chart_md(md_file, plt, rootdir, name): + plt.savefig(rootdir + name, pad_inches=0, bbox_inches='tight') + plt.close("all") + plt.figure(figsize=(32, 24)) + md_file.write("![Chart]({})\n\n".format(name)) + + +def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, warmup, gc_charts=False, + size_charts=False): + interesting_percentiles = [50, 90, 99, 99.9] + md_file.write("# Summary\n") + for p in interesting_percentiles: + md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) + data = percentile(configurations, benchmarks, warmup, p) + chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data, p), rootdir, + "relative_percentile_" + str(p) + ".png") + write_md_table(md_file, configurations, benchmarks, data) + + md_file.write("## Benchmark total run time (ms) \n") + data = totals(configurations, benchmarks, warmup) + chart_md(md_file, total_execution_times(plt, configurations, benchmarks, data), rootdir, + "relative_total.png") + write_md_table(md_file, configurations, benchmarks, data) + + if gc_charts: + md_file.write("## Total GC time on Application thread (ms) \n") + mark, sweep, total = total_gc(configurations, benchmarks) + chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, mark, total), rootdir, + "relative_gc_total.png") + write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total) + + for p in interesting_percentiles: + md_file.write("## GC pause time (ms) at {} percentile \n".format(p)) + _, _, total = percentile_gc(configurations, benchmarks, p) + chart_md(md_file, relative_gc_pauses(plt, configurations, benchmarks, total, p), rootdir, + "relative_gc_percentile_" + str(p) + ".png") + write_md_table(md_file, configurations, benchmarks, total) + + md_file.write("# Individual benchmarks\n") + for bench in benchmarks: + if not any_run_exists(bench, configurations, 0): + continue + + md_file.write("## ") + md_file.write(bench) + md_file.write("\n") + + chart_md(md_file, percentiles_chart(plt, configurations, bench, warmup), rootdir, "percentile_" + bench + ".png") + chart_md(md_file, percentiles_chart(plt, configurations, bench, warmup, first=95, step=0.01), rootdir, "percentile_95plus_" + bench + ".png") + if gc_charts: + chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, + "gc_pause_times_" + bench + ".png") + chart_md(md_file, gc_pause_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir, + "gc_pause_times_95plus_" + bench + ".png") + chart_md(md_file, gc_mark_batch_time_chart(plt, configurations, bench), rootdir, + "gc_mark_batches_" + bench + ".png") + chart_md(md_file, gc_mark_batch_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir, + "gc_mark_batches_95plus_" + bench + ".png") + chart_md(md_file, gc_sweep_batch_time_chart(plt, configurations, bench), rootdir, + "gc_sweep_batches_" + bench + ".png") + chart_md(md_file, gc_sweep_batch_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir, + "gc_sweep_batches_95plus_" + bench + ".png") + if size_charts: + for p in interesting_percentiles: + chart_md(md_file, size_compare_chart_gc_mark(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart" + bench + "percentile_" + str(p) + "_mark.png") + for p in interesting_percentiles: + chart_md(md_file, size_compare_chart_gc_sweep(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart" + bench + "percentile_" + str(p) + "_sweep.png") + for p in interesting_percentiles: + chart_md(md_file, size_compare_chart_gc_sweep(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart" + bench + "percentile_" + str(p) + "_total.png") + chart_md(md_file, size_compare_chart_gc_combined(plt, parent_configurations, bench), rootdir, + "gc_size_chart_total" + bench + ".png") + + if size_charts: + for p in interesting_percentiles: + chart_md(md_file, size_compare_chart(plt, parent_configurations, bench, warmup, p), rootdir, + "size_chart_" + bench + "percentile_" + str(p) + ".png") + + run = 3 + while run >= 0 and not any_run_exists(bench, configurations, run): + run -= 1 + + if run >= 0: + # chart_md(md_file, example_run_plot(plt, configurations, bench, run, 1000), rootdir, + # "example_run_last1000_" + str(run) + "_" + bench + ".png") + chart_md(md_file, example_run_plot(plt, configurations, bench, run), rootdir, + "example_run_full_" + str(run) + "_" + bench + ".png") + for conf in configurations: + chart_md(md_file, percentiles_chart_runs(plt, conf, bench, warmup), rootdir, "percentile_" + bench + "_conf" + str(configurations.index(conf))+ ".png") + chart_md(md_file, percentiles_chart_runs(plt, conf, bench, warmup, first=95, step=0.01), rootdir, "percentile_95plus_" + bench + "_conf" + str(configurations.index(conf))+ ".png") + # chart_md(md_file, example_all_runs_plot(plt, conf, bench, 1000), rootdir, + # "example_allruns_last1000_conf" + str(configurations.index(conf)) + "_" + bench + ".png") + # chart_md(md_file, example_all_runs_plot(plt, conf, bench), rootdir, + # "example_allruns_full_conf" + str(configurations.index(conf)) + "_" + bench + ".png") + if gc_charts: + gc_data = gc_events_for_last_n_collections(bench, conf, run) + chart_md(md_file, + gc_gantt_chart(plt, conf, bench, gc_data), + rootdir, + "example_gc_last_" + "_conf" + str(configurations.index(conf)) + "_" + str( + run) + "_" + bench + ".png") + chart_md(md_file, + gc_gantt_chart(plt, conf, bench, gc_data, only_batches=True), + rootdir, + "example_gc_last_batches" + "_conf" + str(configurations.index(conf)) + "_" + str( + run) + "_" + bench + ".png") + + +def any_run_exists(bench, configurations, run): + exits = False + for conf in configurations: + file = 'results/{}/{}/{}'.format(conf, bench, run) + if os.path.exists(file): + exits = True + break + return exits + + +def find_last_run(conf, bench): + max_run = 0 + while True: + file = 'results/{}/{}/{}'.format(conf, bench, max_run) + if not os.path.exists(file): + break + max_run += 1 + max_run -= 1 + + return max_run + + + +def discover_benchmarks(configurations): + benchmarks = [] + for conf in configurations: + parent_folders = next(os.walk(os.path.join("results", conf)))[1] + for pf in parent_folders: + if is_subconfig(pf): + for child in next(os.walk(os.path.join("results", conf, pf)))[1]: + if child not in benchmarks: + benchmarks.append(child) + else: + if pf not in benchmarks: + benchmarks.append(pf) + + return benchmarks + + +def is_subconfig(subconf): + return subconf.startswith("size_") or subconf.startswith("gcthreads_") + + +default_warmup = 2000 + + +if __name__ == '__main__': + all_configs = next(os.walk("results"))[1] + # added subconfigurations + for conf in all_configs: + folder = os.path.join("results", conf) + subfolders = next(os.walk(folder))[1] + for subconf in subfolders: + if is_subconfig(subconf): + all_configs.append(os.path.join(conf, subconf)) + + results = generate_choices(all_configs) + + parser = argparse.ArgumentParser() + parser.add_argument("--comment", help="comment at the suffix of the report name") + parser.add_argument("--gc", help="enable charts about garbage collector", action="store_true") + parser.add_argument("--vssize", help="enable charts against heap size", action="store_true") + parser.add_argument("--warmup", help="number of iterations to skip before calculating percentiles", type=int, default=default_warmup) + parser.add_argument("--benchmark", help="benchmarks to use in comparision", action='append') + parser.add_argument("comparisons", nargs='*', choices=results + ["all"], + default="all") + args = parser.parse_args() + + configurations = [] + if args.comparisons == "all": + configurations = all_configs + else: + for arg in args.comparisons: + configurations.append(expand_wild_cards(arg)) + + comment = "_vs_".join(configurations).replace(os.sep, "_") + if args.comment is not None: + comment = args.comment + + parent_configurations = [] + for conf in configurations: + if os.sep in conf: + parent = os.path.split(conf)[0] + else: + parent = conf + if parent not in parent_configurations: + parent_configurations.append(parent) + + all_benchmarks = discover_benchmarks(parent_configurations) + + if args.benchmark != None: + benchmarks = [] + for b in args.benchmark: + benchmarks += filter(lambda s: s.startswith(b), all_benchmarks) + else: + excluded_benchmarks = ['list.ListBenchmark', 'mandelbrot.MandelbrotBenchmark'] + benchmarks = [x for x in all_benchmarks if x not in excluded_benchmarks] + + report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + comment + "/" + plt.figure(figsize=(32, 24)) + plt.rcParams["font.size"] = 20.0 + mkdir(report_dir) + with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: + write_md_file(report_dir, md_file, parent_configurations, configurations, benchmarks, args.warmup, args.gc, args.vssize) + + print report_dir diff --git a/src/main/scala/histogram/Histogram.scala b/src/main/scala/histogram/Histogram.scala new file mode 100644 index 0000000..b5b4e54 --- /dev/null +++ b/src/main/scala/histogram/Histogram.scala @@ -0,0 +1,21 @@ +package histogram + +import scala.util.Random + +object Histogram extends communitybench.Benchmark{ + override def run(input: String): Any = { + val Array(items, k) = input.split(",").map(_.toInt) + var histogram = Map.empty[Int, Int] + val random = new Random(13371337) + (1 to items).foreach { + _ => + val key = random.nextInt(k) + val newValue = histogram.getOrElse(key, 0) + 1 + histogram += key -> newValue + } + histogram.values.sum == items + } + + override def main(args: Array[String]): Unit = + super.main(args) +}