diff --git a/.gitignore b/.gitignore
index c37ddf5..8542310 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,9 @@
 bin/.coursier
 bin/.scalafmt*
 results/
+reports/
 *.iprof
-.idea
\ No newline at end of file
+.idea
+*.pid
+jobs.sh
+logs/
\ No newline at end of file
diff --git a/README.md b/README.md
index d200b09..bc560a1 100644
--- a/README.md
+++ b/README.md
@@ -13,8 +13,89 @@ as-is and summarized in a separate post-processing step.
 python scripts/run.py
 ```
 
-## Viewing result summary
+## Creating result summary
 
 ```
 python scripts/summary.py
 ```
+
+The reports can be viewed in the `reports` directory.
+
+## Advanced use
+
+### Comparing specific versions
+
+You can run just the configurations you are interested in
+```bash
+scripts/run.py stable latest
+```
+
+Compare the lastest `stable` relesea vs `latest` snapshot
+```bash
+REPORT=$(scripts/summary.py stable latest)
+```
+
+### Specifying number of runs, batches, tests run in parallel
+```bash
+scripts/run.py --par 4 --runs 50 --batches 3000 stable
+# 50 runs (4 in parallel) each with 3000 batches for the stable release.
+```
+
+These settings will impact accuracy, this is why the names of the results folders will include the settings, in this case `scala-native-0.3.8-r50-p40-b3000`.
+Note that you can also use `stable-r50-p40-b3000` when using the `summary.py`.
+
+### Comparing an experimental feature with latest from master
+1. specify a suffix to identify it
+```bash
+NAME=PR9001-adding-a-kitchen-sink
+SHA1=adcd124eee
+```
+2. run the benchmark and get the summary report
+
+It will automatically compile the project in `../scala-native` if you provide the SHA1 or reference.
+```bash
+SHA1=adcd124eee
+scripts/run.py latest@master
+scripts/run.py --suffix "$NAME" latest@"$SHA1" &&
+REPORT=$(scripts/summary.py --comment "$NAME" latest latest@"$SHA1"_"$NAME")
+```
+
+Alternatively you can build the scala-native project from your branch manually and run
+```bash
+scripts/run.py --suffix "$NAME" latest &&
+REPORT=$(scripts/summary.py --comment "$NAME" latest latest_"$NAME")
+```
+
+## Persisting reports
+The following commands assume that you have a git repository checked out at `gh-pages` under `../scala-native-benchmark-results`.
+
+Also that there is an executable script `just-upload.sh` in the root of that repository.
+```bash
+#just-upload.sh
+
+
+#!/bin/bash
+# move to the directory of the script
+cd $(dirname "$0")
+
+git add . &&
+git commit -m "automated commit" && git push
+```
+
+### saving experiment data
+```bash
+cp -r results/ ../scala-native-benchmark-results &&
+../scala-native-benchmark-results/just-upload.sh
+```
+
+### restoring experiment data
+```bash
+cp -r ../scala-native-benchmark-results results/
+```
+
+### uploading a report
+```bash
+mkdir -p ../scala-native-benchmark-results/reports
+cp -r "$REPORT" ../scala-native-benchmark-results/reports &&
+../scala-native-benchmark-results/just-upload.sh
+```
\ No newline at end of file
diff --git a/build.sbt b/build.sbt
index 9874e27..ae87f31 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1 +1,6 @@
 scalaVersion := "2.11.12"
+enablePlugins(ScalaNativePlugin)
+nativeLinkStubs := true
+nativeGC := "immix"
+nativeMode := "release"
+nativeLTO := "thin"
diff --git a/confs/jvm/run b/confs/jvm/run
index a4d5cce..3700501 100644
--- a/confs/jvm/run
+++ b/confs/jvm/run
@@ -1 +1 @@
-java -Xmx1024M -Xms1024M -classpath target/scala-2.11/classes:$HOME/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.11.12.jar $BENCH
+java $JAVA_ARGS -classpath target/scala-2.11/classes:$HOME/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.11.12.jar $BENCH
diff --git a/confs/native-image-pgo/run b/confs/native-image-pgo/run
index a6b6dfc..baed793 100644
--- a/confs/native-image-pgo/run
+++ b/confs/native-image-pgo/run
@@ -1,2 +1,2 @@
-target/native-image-pgo-bench -Xmx1g -Xmx1g
+target/native-image-pgo-bench $JAVA_SIZE_ARGS
 
diff --git a/confs/native-image/run b/confs/native-image/run
index 456cc5a..99bbc6e 100644
--- a/confs/native-image/run
+++ b/confs/native-image/run
@@ -1 +1 @@
-target/native-image-bench -Xmx1g -Xmx1g
+target/native-image-bench $JAVA_SIZE_ARGS
diff --git a/confs/scala-native-0.3.7/plugins.sbt b/confs/scala-native-0.3.7/plugins.sbt
deleted file mode 100644
index afc9d5a..0000000
--- a/confs/scala-native-0.3.7/plugins.sbt
+++ /dev/null
@@ -1 +0,0 @@
-addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.7")
diff --git a/confs/scala-native-0.3.7/build.sbt b/confs/scala-native-0.3.8/build.sbt
similarity index 100%
rename from confs/scala-native-0.3.7/build.sbt
rename to confs/scala-native-0.3.8/build.sbt
diff --git a/confs/scala-native-0.3.7/compile b/confs/scala-native-0.3.8/compile
similarity index 100%
rename from confs/scala-native-0.3.7/compile
rename to confs/scala-native-0.3.8/compile
diff --git a/confs/scala-native-0.3.8/plugins.sbt b/confs/scala-native-0.3.8/plugins.sbt
new file mode 100644
index 0000000..2d38aa0
--- /dev/null
+++ b/confs/scala-native-0.3.8/plugins.sbt
@@ -0,0 +1 @@
+addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.8")
diff --git a/confs/scala-native-0.3.7/run b/confs/scala-native-0.3.8/run
similarity index 100%
rename from confs/scala-native-0.3.7/run
rename to confs/scala-native-0.3.8/run
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt
new file mode 100644
index 0000000..2104b7a
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt
@@ -0,0 +1,6 @@
+scalaVersion := "2.11.12"
+enablePlugins(ScalaNativePlugin)
+nativeLinkStubs := true
+nativeGC := "commix"
+nativeMode := "release"
+nativeLTO := "thin"
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/compile b/confs/scala-native-0.3.9-SNAPSHOT-commix/compile
new file mode 100644
index 0000000..2f3f09f
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/compile
@@ -0,0 +1 @@
+nativeLink
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt
new file mode 100644
index 0000000..c1423b6
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt
@@ -0,0 +1 @@
+addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT")
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/run b/confs/scala-native-0.3.9-SNAPSHOT-commix/run
new file mode 100644
index 0000000..ae89e34
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/run
@@ -0,0 +1 @@
+target/scala-2.11/scala-native-benchmarks-out
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-lto/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/build.sbt
new file mode 100644
index 0000000..fae449a
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/build.sbt
@@ -0,0 +1,6 @@
+scalaVersion := "2.11.12"
+enablePlugins(ScalaNativePlugin)
+nativeLinkStubs := true
+nativeGC := "immix"
+nativeMode := "release"
+nativeLTO := "full"
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-lto/compile b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/compile
new file mode 100644
index 0000000..2f3f09f
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/compile
@@ -0,0 +1 @@
+nativeLink
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-lto/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/plugins.sbt
new file mode 100644
index 0000000..c1423b6
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/plugins.sbt
@@ -0,0 +1 @@
+addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT")
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-lto/run b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/run
new file mode 100644
index 0000000..ae89e34
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/run
@@ -0,0 +1 @@
+target/scala-2.11/scala-native-benchmarks-out
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-no-lto/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/build.sbt
new file mode 100644
index 0000000..2fc1873
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/build.sbt
@@ -0,0 +1,6 @@
+scalaVersion := "2.11.12"
+enablePlugins(ScalaNativePlugin)
+nativeLinkStubs := true
+nativeGC := "immix"
+nativeMode := "release"
+nativeLTO := "none"
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-no-lto/compile b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/compile
new file mode 100644
index 0000000..2f3f09f
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/compile
@@ -0,0 +1 @@
+nativeLink
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-no-lto/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/plugins.sbt
new file mode 100644
index 0000000..c1423b6
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/plugins.sbt
@@ -0,0 +1 @@
+addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT")
diff --git a/confs/scala-native-0.3.9-SNAPSHOT-no-lto/run b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/run
new file mode 100644
index 0000000..ae89e34
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/run
@@ -0,0 +1 @@
+target/scala-2.11/scala-native-benchmarks-out
diff --git a/confs/scala-native-0.3.9-SNAPSHOT/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT/build.sbt
new file mode 100644
index 0000000..ae87f31
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT/build.sbt
@@ -0,0 +1,6 @@
+scalaVersion := "2.11.12"
+enablePlugins(ScalaNativePlugin)
+nativeLinkStubs := true
+nativeGC := "immix"
+nativeMode := "release"
+nativeLTO := "thin"
diff --git a/confs/scala-native-0.3.9-SNAPSHOT/compile b/confs/scala-native-0.3.9-SNAPSHOT/compile
new file mode 100644
index 0000000..2f3f09f
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT/compile
@@ -0,0 +1 @@
+nativeLink
diff --git a/confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt
new file mode 100644
index 0000000..c1423b6
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt
@@ -0,0 +1 @@
+addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT")
diff --git a/confs/scala-native-0.3.9-SNAPSHOT/run b/confs/scala-native-0.3.9-SNAPSHOT/run
new file mode 100644
index 0000000..ae89e34
--- /dev/null
+++ b/confs/scala-native-0.3.9-SNAPSHOT/run
@@ -0,0 +1 @@
+target/scala-2.11/scala-native-benchmarks-out
diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt b/confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt
new file mode 100644
index 0000000..2104b7a
--- /dev/null
+++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt
@@ -0,0 +1,6 @@
+scalaVersion := "2.11.12"
+enablePlugins(ScalaNativePlugin)
+nativeLinkStubs := true
+nativeGC := "commix"
+nativeMode := "release"
+nativeLTO := "thin"
diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/compile b/confs/scala-native-0.4.0-SNAPSHOT-commix/compile
new file mode 100644
index 0000000..2f3f09f
--- /dev/null
+++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/compile
@@ -0,0 +1 @@
+nativeLink
diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt b/confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt
new file mode 100644
index 0000000..2a63bf0
--- /dev/null
+++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt
@@ -0,0 +1 @@
+addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT")
diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/run b/confs/scala-native-0.4.0-SNAPSHOT-commix/run
new file mode 100644
index 0000000..ae89e34
--- /dev/null
+++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/run
@@ -0,0 +1 @@
+target/scala-2.11/scala-native-benchmarks-out
diff --git a/confs/scala-native-0.4.0-SNAPSHOT/build.sbt b/confs/scala-native-0.4.0-SNAPSHOT/build.sbt
new file mode 100644
index 0000000..ae87f31
--- /dev/null
+++ b/confs/scala-native-0.4.0-SNAPSHOT/build.sbt
@@ -0,0 +1,6 @@
+scalaVersion := "2.11.12"
+enablePlugins(ScalaNativePlugin)
+nativeLinkStubs := true
+nativeGC := "immix"
+nativeMode := "release"
+nativeLTO := "thin"
diff --git a/confs/scala-native-0.4.0-SNAPSHOT/compile b/confs/scala-native-0.4.0-SNAPSHOT/compile
new file mode 100644
index 0000000..2f3f09f
--- /dev/null
+++ b/confs/scala-native-0.4.0-SNAPSHOT/compile
@@ -0,0 +1 @@
+nativeLink
diff --git a/confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt b/confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt
new file mode 100644
index 0000000..2a63bf0
--- /dev/null
+++ b/confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt
@@ -0,0 +1 @@
+addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT")
diff --git a/confs/scala-native-0.4.0-SNAPSHOT/run b/confs/scala-native-0.4.0-SNAPSHOT/run
new file mode 100644
index 0000000..ae89e34
--- /dev/null
+++ b/confs/scala-native-0.4.0-SNAPSHOT/run
@@ -0,0 +1 @@
+target/scala-2.11/scala-native-benchmarks-out
diff --git a/input/histogram.Histogram b/input/histogram.Histogram
new file mode 100644
index 0000000..3365996
--- /dev/null
+++ b/input/histogram.Histogram
@@ -0,0 +1 @@
+300000,100000
\ No newline at end of file
diff --git a/output/histogram.Histogram b/output/histogram.Histogram
new file mode 100644
index 0000000..f32a580
--- /dev/null
+++ b/output/histogram.Histogram
@@ -0,0 +1 @@
+true
\ No newline at end of file
diff --git a/project/plugins.sbt b/project/plugins.sbt
index afc9d5a..2a63bf0 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -1 +1 @@
-addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.7")
+addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT")
diff --git a/scripts/install-dependecies.sh b/scripts/install-dependecies.sh
new file mode 100755
index 0000000..3b348d2
--- /dev/null
+++ b/scripts/install-dependecies.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+sudo pip2 install numpy matplotlib
+sudo apt update && sudo apt install python-tk
\ No newline at end of file
diff --git a/scripts/notebook.ipynb b/scripts/notebook.ipynb
index 75c9347..d667762 100644
--- a/scripts/notebook.ipynb
+++ b/scripts/notebook.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {
     "collapsed": false,
     "scrolled": true
@@ -11,7 +11,6 @@
     {
      "ename": "ModuleNotFoundError",
      "evalue": "No module named 'matplotlib'",
-     "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
@@ -24,7 +23,8 @@
       "\u001b[0;32m/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36menable_matplotlib\u001b[0;34m(self, gui)\u001b[0m\n\u001b[1;32m   2933\u001b[0m         \"\"\"\n\u001b[1;32m   2934\u001b[0m         \u001b[0;32mfrom\u001b[0m \u001b[0mIPython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpylabtools\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2935\u001b[0;31m         \u001b[0mgui\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbackend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_gui_and_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgui\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpylab_gui_select\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2936\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2937\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mgui\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m/usr/lib/python3/dist-packages/IPython/core/pylabtools.py\u001b[0m in \u001b[0;36mfind_gui_and_backend\u001b[0;34m(gui, gui_select)\u001b[0m\n\u001b[1;32m    257\u001b[0m     \"\"\"\n\u001b[1;32m    258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 259\u001b[0;31m     \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    261\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mgui\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mgui\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'auto'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'"
-     ]
+     ],
+     "output_type": "error"
     }
    ],
    "source": [
diff --git a/scripts/run.py b/scripts/run.py
index 187069b..bac70cf 100755
--- a/scripts/run.py
+++ b/scripts/run.py
@@ -1,23 +1,29 @@
-#!/usr/bin/env python
-import sys
+#!/usr/bin/env python2
 import os
 import errno
 import subprocess as subp
 import shutil as sh
+import argparse
+import multiprocessing as mp
+import itertools
+
+
 
 def mkdir(path):
     try:
         os.makedirs(path)
-    except OSError as exc: # Python >2.5
+    except OSError as exc:  # Python >2.5
         if exc.errno == errno.EEXIST and os.path.isdir(path):
             pass
         else:
             raise
 
+
 def slurp(path):
     with open(path) as f:
         return f.read().strip()
 
+
 def where(cmd):
     if os.path.isfile(cmd):
         return cmd
@@ -30,82 +36,619 @@ def where(cmd):
         else:
             return None
 
-def run(cmd):
+
+def try_run(cmd, env=None, wd=None):
+    try:
+        print run(cmd, env, wd)
+        return True
+    except subp.CalledProcessError as err:
+        print err.output
+        return False
+
+
+def try_run_silent(cmd, env=None, wd=None):
+    try:
+        run(cmd, env, wd)
+        return True
+    except subp.CalledProcessError as err:
+        print err.output
+        return False
+
+
+def run(cmd, env=None, wd=None):
     print(">>> " + str(cmd))
-    return subp.check_output(cmd)
+    if wd == None:
+        return subp.check_output(cmd, stderr=subp.STDOUT, env=env)
+    else:
+        return subp.check_output(cmd, stderr=subp.STDOUT, env=env, cwd=wd)
+
+
+scala_native_dir = os.path.join("..", "scala-native")
+upload_dir = os.path.abspath(os.path.join("..", "scala-native-benchmark-results"))
+local_scala_repo_dir = os.path.abspath(os.path.join("..", "scala-2.11.11-only"))
+
+
+def git_add(dir, *items):
+    return try_run(["git", "add"] + list(items), wd=dir)
+
+
+def git_commit(dir, msg):
+    return try_run(["git", "commit", "-m", msg], wd=dir)
+
+
+def git_pull(dir):
+    my_env = os.environ.copy()
+    my_env["GIT_MERGE_AUTOEDIT"] = "no"
+    return try_run(["git", "pull"], env=my_env, wd=dir)
+
+
+def git_push(dir):
+    return try_run(['git', 'push'], wd=dir)
+
+
+def git_fetch(dir):
+    return try_run(['git', 'fetch', '--all'], wd=dir)
+
+
+def get_ref(ref):
+    git_rev_parse = ['git', 'rev-parse', '--short', ref]
+    try:
+        return run(git_rev_parse, wd=scala_native_dir).strip()
+    except subp.CalledProcessError as err:
+        out = err.output
+        print "Cannot find", ref, "!"
+        print out
+        return None
+
+
+def compile_scala_native(ref, sha1):
+    if ref != "HEAD":
+        git_checkout = ['git', 'checkout', sha1]
+        try:
+            print run(git_checkout, wd=scala_native_dir)
+        except subp.CalledProcessError as err:
+            out = err.output
+            print "Cannot checkout", sha1, "!"
+            print out
+            return False
+
+    compile_cmd = [sbt, '-no-colors', '-J-Xmx2G', 'rebuild', 'sandbox/run']
+    compile_env = os.environ.copy()
+    compile_env["SCALANATIVE_GC"] = "immix"
+    if os.path.isdir(local_scala_repo_dir):
+        compile_env["SCALANATIVE_SCALAREPO"] = local_scala_repo_dir
+
+    try:
+        run(compile_cmd, compile_env, wd=scala_native_dir)
+        return True
+    except subp.CalledProcessError as err:
+        out = err.output
+        print "Compilation failure!"
+        print out
+        return False
 
-def compile(bench, compilecmd):
-    cmd = [sbt, '-J-Xmx2G', 'clean']
+
+def compile(conf, bench, compilecmd, gcstats, debug, trace, extra_args):
+    cmd = [sbt, '-no-colors', '-J-Xmx2G', 'clean']
     cmd.append('set mainClass in Compile := Some("{}")'.format(bench))
+    if conf.startswith("scala-native"):
+        if debug or trace:
+            cmd.append('set nativeCompileOptions ++= Seq("-g", "-DDEBUG_ASSERT")')
+        if trace:
+            cmd.append('set nativeCompileOptions +="-DDEBUG_PRINT"')
+        if gcstats != None:
+            cmd.append('set nativeCompileOptions +="-DENABLE_GC_STATS{}"'.format(gcstats))
+        for k,v in extra_args.iteritems():
+            if k.endswith("?"):
+                cmd.append('set nativeCompileOptions +="-D{}"'.format(k[:-1]))
+            else:
+                cmd.append('set nativeCompileOptions +="-D{}={}"'.format(k,v))
     cmd.append(compilecmd)
-    return run(cmd)
+    return try_run_silent(cmd)
+
 
 sbt = where('sbt')
 
-benchmarks = [
-        'bounce.BounceBenchmark',
-        'list.ListBenchmark',
-        'richards.RichardsBenchmark',
-        'queens.QueensBenchmark',
-        'permute.PermuteBenchmark',
-        'deltablue.DeltaBlueBenchmark',
-        'tracer.TracerBenchmark',
-        'brainfuck.BrainfuckBenchmark',
-        'json.JsonBenchmark',
-        'cd.CDBenchmark',
-        'kmeans.KmeansBenchmark',
-        'gcbench.GCBenchBenchmark',
-        'mandelbrot.MandelbrotBenchmark',
-        'nbody.NbodyBenchmark',
-        'sudoku.SudokuBenchmark',
+default_benchmarks = [
+    'bounce.BounceBenchmark',
+    'richards.RichardsBenchmark',
+    'queens.QueensBenchmark',
+    'permute.PermuteBenchmark',
+    'deltablue.DeltaBlueBenchmark',
+    'tracer.TracerBenchmark',
+    'brainfuck.BrainfuckBenchmark',
+    'json.JsonBenchmark',
+    'cd.CDBenchmark',
+    'kmeans.KmeansBenchmark',
+    'gcbench.GCBenchBenchmark',
+    'nbody.NbodyBenchmark',
+    'sudoku.SudokuBenchmark',
+]
+
+all_benchmarks = default_benchmarks + [
+    'histogram.Histogram',
+    'list.ListBenchmark',
+    'mandelbrot.MandelbrotBenchmark',
+]
+
+stable = 'scala-native-0.3.8'
+latest = 'scala-native-0.4.0-SNAPSHOT'
+baseline = [
+    'jvm',
+    stable,
 ]
+default = baseline + [latest]
+
+confs_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/confs"
 
-configurations = [
-        'jvm',
-        'scala-native-0.3.7',
+configurations = all_configs = next(os.walk(confs_path))[1]
+
+graalvm = [
+    'native-image',
+    'native-image-pgo',
 ]
 
 if 'GRAALVM_HOME' in os.environ:
-    configurations += [
-            'native-image',
-            'native-image-pgo',
-    ]
+    baseline += graalvm
+else:
+    for g in graalvm:
+        all_configs.remove(g)
 
-runs = 20
-batches = 3000
+default_runs = 20
+default_batches = 3000
+default_par = 1
 batch_size = 1
 
+
+def expand_wild_cards(arg):
+    if arg.startswith("latest"):
+        return latest + arg[len("latest"):]
+    elif arg.startswith("stable"):
+        return stable + arg[len("stable"):]
+    else:
+        return arg
+
+
+def benchmark_parse(arg):
+    parts = arg.split("@")
+    if len(parts) == 2:
+        return parts[0], parts[1]
+    else:
+        return arg, None
+
+
+def ref_parse(arg):
+    parts = arg.split("@")
+    if len(parts) == 3:
+        return parts[0], (parts[2] + "/" + parts[1])
+    elif len(parts) == 2:
+        return parts[0], parts[1]
+    else:
+        return arg, None
+
+
+def size_parse(arg):
+    parts = arg.split(":")
+    if len(parts) == 1:
+        return [arg, arg]
+    else:
+        return parts
+
+
+def generate_choices(direct_choices):
+    results = direct_choices
+    for dir in direct_choices:
+        if dir.startswith(latest):
+            results += ["latest" + dir[len(latest):]]
+        if dir.startswith(stable):
+            results += ["stable" + dir[len(stable):]]
+    return results
+
+
+def single_run(to_run):
+    n = to_run["n"]
+    runs = to_run["runs"]
+    unexpanded_cmd = to_run["cmd"]
+    resultsdir = to_run["resultsdir"]
+    conf = to_run["conf"]
+    bench = to_run["bench"]
+    gcstats = to_run["gcstats"]
+    minsize = to_run["size"][0]
+    maxsize = to_run["size"][1]
+    gcThreads = to_run["gcThreads"]
+    perf = to_run["perf"]
+
+    print('--- run {}/{}'.format(n, runs))
+    my_env = os.environ.copy()
+    if gcstats != None:
+        my_env["SCALANATIVE_STATS_FILE"] = os.path.join(resultsdir, str(n) + ".gc.csv")
+
+    if minsize != "default":
+        my_env["SCALANATIVE_MIN_HEAP_SIZE"] = minsize
+        # in 0.4.0 the heap settings names changed.
+        my_env["SCALANATIVE_MIN_SIZE"] = minsize
+    else:
+        if "SCALANATIVE_MIN_HEAP_SIZE" in my_env:
+            del my_env["SCALANATIVE_MIN_HEAP_SIZE"]
+        if "SCALANATIVE_MIN_SIZE" in my_env:
+            del my_env["SCALANATIVE_MIN_SIZE"]
+
+
+    if maxsize != "default":
+        my_env["SCALANATIVE_MAX_HEAP_SIZE"] = maxsize
+        # in 0.4.0 the heap settings names changed.
+        my_env["SCALANATIVE_MAX_SIZE"] = maxsize
+    else:
+        if "SCALANATIVE_MAX_HEAP_SIZE" in my_env:
+            del my_env["SCALANATIVE_MAX_HEAP_SIZE"]
+        if "SCALANATIVE_MAX_SIZE" in my_env:
+            del my_env["SCALANATIVE_MAX_SIZE"]
+
+    if gcThreads != "default":
+        my_env["SCALANATIVE_GC_THREADS"] = gcThreads
+    elif "SCALANATIVE_GC_THREADS" in my_env:
+        del my_env["SCALANATIVE_GC_THREADS"]
+
+    if perf == "sudo":
+        myuser = os.environ.get('USER')
+        cmd = ["sudo", "-E", "perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "15000", "--", "sudo", "-E", "-u", str(myuser)]
+    elif perf == "normal":
+        cmd = ["perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "15000", "--"]
+    else:
+        cmd = []
+    for token in unexpanded_cmd:
+        if token == "$JAVA_ARGS":
+            if minsize != "default":
+                cmd += ["-Xms" + minsize]
+            if maxsize != "default":
+                cmd += ["-Xmx" + maxsize]
+            if gcstats:
+                cmd += ["-XX:+PrintGCApplicationStoppedTime", "-Xloggc:" + os.path.join(resultsdir, str(n) + ".gc.txt")]
+        else:
+            cmd += [token]
+
+    try:
+        out = run(cmd, my_env)
+        with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile:
+            resultfile.write(out)
+        return []
+    except subp.CalledProcessError as err:
+        out = err.output
+        print "Failure!"
+        print out
+        with open(os.path.join(resultsdir, str(n) + ".failed"), 'w+') as failfile:
+            failfile.write(out)
+        return [dict(conf=conf, bench=bench, run=n)]
+
+
+def upload(subconfig_dir, symlink, use_git, overwrite):
+    if os.path.isdir(upload_dir):
+        target = os.path.join(upload_dir, subconfig_dir)
+        targetComplete = os.path.isfile(os.path.join(target, ".complete"))
+        targetExisted = os.path.isdir(target)
+        if (targetComplete and overwrite) or targetExisted:
+            mkdir(os.path.join("..", target))
+            sh.rmtree(target, ignore_errors=True)
+        if not targetExisted or overwrite:
+            sh.copytree(subconfig_dir, target, symlinks=True)
+            if use_git:
+                if symlink != None:
+                    git_add(upload_dir, symlink)
+                if git_add(upload_dir, target) \
+                        and git_commit(upload_dir, "automated commit " + subconfig_dir) \
+                        and git_pull(upload_dir) \
+                        and git_push(upload_dir):
+                    pass
+    else:
+        print "WARN", upload_dir, "does not exist!"
+
+
+def create_symlink(generalized_dir, root_dir):
+    try:
+        os.unlink(generalized_dir)
+    except:
+        pass
+    print "creating symlink", generalized_dir, "->", root_dir
+    os.symlink(os.path.split(root_dir)[1], generalized_dir)
+
+
 if __name__ == "__main__":
-    for conf in configurations:
-        for bench in benchmarks:
-            print('--- conf: {}, bench: {}'.format(conf, bench))
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--suffix", help="suffix added to results")
+    parser.add_argument("--perf", help="records perf data", action="store_true")
+    parser.add_argument("--sperf", help="records perf data using sudo rights", action="store_true")
+    parser.add_argument("--runs", help="number of runs", type=int, default=default_runs)
+    parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches)
+    parser.add_argument("--benchmark", help="benchmarks to run", action='append')
+    parser.add_argument("--argnames", help="compile arguments to set, mark flags with a '?' at the end, split with ','", type=str)
+    parser.add_argument("--argv", help="argument values, split with ',', booleans as true or false", action='append')
+    parser.add_argument("--size", help="different size settings to use", action='append')
+    parser.add_argument("--gcthreads", help="different number of garbage collection threads to use", action='append')
+    parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par)
+    parser.add_argument("--gc", help="gather gc statistics", action="store_true")
+    parser.add_argument("--gcv", help="gather gc statistics verbose - batches", action="store_true")
+    parser.add_argument("--gcvv", help="gather gc statistics very verbose - sync events", action="store_true")
+    parser.add_argument("--upload", help="copy the results to ../scala-native-benchmark-results", action="store_true")
+    parser.add_argument("--gitupload", help="copy the results to ../scala-native-benchmark-results and commit and push to git", action="store_true")
+    parser.add_argument("--overwrite", help="overwrite old results", action="store_true")
+    parser.add_argument("--append", help="do not delete old data", action="store_true")
+    parser.add_argument("--gcdebug", help="enable debug for GCs", action="store_true")
+    parser.add_argument("--gctrace", help="verbose logging for GCs to stdout", action="store_true")
+    parser.add_argument("set", nargs='*', default="default")
+    args = parser.parse_args()
+    print args
 
-            input = slurp(os.path.join('input', bench))
-            output = slurp(os.path.join('output', bench))
-            compilecmd = slurp(os.path.join('confs', conf, 'compile'))
-            runcmd = slurp(os.path.join('confs', conf, 'run')).replace('$BENCH', bench).replace('$HOME', os.environ['HOME']).split(' ')
+    runs = args.runs
+    batches = args.batches
+    par = args.par
 
-            if os.path.exists(os.path.join('confs', conf, 'build.sbt')):
-                sh.copyfile(os.path.join('confs', conf, 'build.sbt'), 'build.sbt')
+    if args.benchmark != None:
+        benchmarks = []
+        for b in args.benchmark:
+            if b == "default":
+                benchmarks += default_benchmarks
             else:
-                os.remove('build.sbt')
+                bname, bargs = benchmark_parse(b)
+                matching = filter(lambda s: s.startswith(bname), all_benchmarks)
+                if bargs != None:
+                    benchmarks += map(lambda x: (x, bargs), matching)
+                else:
+                    benchmarks += matching
+    else:
+        benchmarks = default_benchmarks
+
+    if args.size != None:
+        sizes = []
+        for subconf_str in args.size:
+            parsed = size_parse(subconf_str)
+            if parsed == ["default", "default"]:
+                sizes = [parsed] + sizes
+            else:
+                sizes += [parsed]
+    else:
+        sizes = [["default", "default"]]
+
+    if args.gcthreads != None:
+        gcThreadCounts = args.gcthreads
+    else:
+        gcThreadCounts = ["default"]
+
+    configurations = []
+    for choice in args.set:
+        expanded = expand_wild_cards(choice)
+        if expanded == "baseline":
+            configurations += baseline
+        elif expanded == "default":
+            configurations = default
+        else:
+            configurations += [expanded]
+
+    print "configurations:", configurations
+    print "benchmarks:", benchmarks
+    print "heap sizes:", sizes
+    print "GC thread counts:", gcThreadCounts
+
+    should_fetch = False
+    for conf in configurations:
+        if '@' in conf and not conf.endswith("@HEAD"):
+            should_fetch = True
+            break
+
+    if args.argnames != None and args.argv != None:
+        derived_configs = []
+        argnames = args.argnames.split(",")
+        for valset in args.argv :
+            values = valset.split(",")
+            suffix = "-a" + ("-".join(values))
+            extra_args = dict()
+            for (name, value) in zip(argnames, values):
+                if name.endswith("?"):
+                    if value in ["1", "true", "TRUE", "True"]:
+                        extra_args[name[:-1]] = True
+                else:
+                    extra_args[name] = value
+            derived_configs.append((suffix, extra_args))
+    else:
+        derived_configs = [("", dict())]
+
+    if should_fetch:
+        git_fetch(scala_native_dir)
+
+    suffix = ""
+    if runs != default_runs:
+        suffix += "-r" + str(runs)
+    if batches != default_batches:
+        suffix += "-b" + str(batches)
+    if par != default_par:
+        suffix += "-p" + str(par)
+
+    if args.sperf:
+        perf = "sudo"
+        suffix +="-Perf"
+    elif args.perf:
+        perf = "normal"
+        suffix +="-Perf"
+    else:
+        perf = None
 
-            if os.path.exists(os.path.join('confs', conf, 'plugins.sbt')):
-                sh.copyfile(os.path.join('confs', conf, 'plugins.sbt'), 'project/plugins.sbt')
+    if args.gcvv:
+        suffix += "-gcvv"
+        gcstats = "_SYNC"
+    elif args.gcv:
+        suffix += "-gcv"
+        gcstats = "_BATCHES"
+    elif args.gc:
+        suffix += "-gc"
+        gcstats = ""
+    else:
+        gcstats = None
+
+    if args.gcdebug:
+        suffix += "-gcdebug"
+    if args.gctrace:
+        suffix += "-gctrace"
+    if args.suffix is not None:
+        suffix += "_" + args.suffix
+
+    failed = []
+    skipped = []
+    compile_fail = []
+    result_dirs = []
+    symlinks = []
+
+    pool = None
+    if par > 1:
+        pool = mp.Pool(par)
+
+    for conf in configurations:
+        conf_name, ref = ref_parse(conf)
+
+        if ref == None:
+            sha1 = None
+        else:
+            sha1 = get_ref(ref)
+            if sha1 == None:
+                compile_fail += [conf]
+                continue
+
+        if sha1 != None:
+            success = compile_scala_native(ref, sha1)
+            if not success:
+                compile_fail += [conf]
+                continue
+
+        # derived configurations
+        for (der_suffix, extra_args) in derived_configs:
+            generalized_dir = os.path.join('results', conf + suffix + der_suffix)
+            if sha1 == None:
+                root_dir = generalized_dir + der_suffix
             else:
-                os.remove('project/plugins.sbt')
+                root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix + der_suffix)
+
+            mkdir(root_dir)
+            symlink = None
+            if generalized_dir != root_dir:
+                create_symlink(generalized_dir, root_dir)
+                symlinks += [[generalized_dir, root_dir]]
+                symlink = generalized_dir
+                if args.upload or args.gitupload:
+                    create_symlink(os.path.join(upload_dir, generalized_dir), root_dir)
+
+            # subconfigurations
+            for (size, gcThreads) in itertools.product(sizes, gcThreadCounts):
+
+                if size == ["default", "default"] and gcThreads == "default":
+                    subconfig_dir = root_dir
+                else:
+                    size_str = []
+                    if size != ["default", "default"] :
+                        size_str = ["size_" + size[0] + "-" + size[1]]
+                    gcThreads_str = []
+                    if gcThreads != "default":
+                        gcThreads_str = ["gcthreads_" + gcThreads]
+                    subconf_str = "_".join(size_str + gcThreads_str)
+                    subconfig_dir = os.path.join(root_dir, subconf_str)
+
+                if not args.overwrite and os.path.isfile(os.path.join(subconfig_dir, ".complete")):
+                    print  subconfig_dir, "already complete, skipping"
+                    skipped += [subconfig_dir]
+                    continue
+
+                if not args.append:
+                    sh.rmtree(subconfig_dir, ignore_errors=True)
+
+                mkdir(subconfig_dir)
+
+                for bconf in benchmarks:
+                    if type(bconf) is tuple:
+                        bench, input = bconf
+                        bfullname = bench + "@" + input
+                    else:
+                        bench = bconf
+                        input = slurp(os.path.join('input', bench))
+                        bfullname = bench
+                    print('--- heap size: {} GC threads: {} conf: {}, bench: {}'.format(size, gcThreads, conf, bfullname))
+
+                    output = slurp(os.path.join('output', bench))
+                    compilecmd = slurp(os.path.join('confs', conf_name, 'compile'))
+                    runcmd = slurp(os.path.join('confs', conf_name, 'run')) \
+                        .replace('$BENCH', bench) \
+                        .replace('$HOME', os.environ['HOME']).split(' ')
+
+                    if os.path.exists(os.path.join('confs', conf_name, 'build.sbt')):
+                        sh.copyfile(os.path.join('confs', conf_name, 'build.sbt'), 'build.sbt')
+                    else:
+                        os.remove('build.sbt')
+
+                    if os.path.exists(os.path.join('confs', conf_name, 'plugins.sbt')):
+                        sh.copyfile(os.path.join('confs', conf_name, 'plugins.sbt'), 'project/plugins.sbt')
+                    else:
+                        os.remove('project/plugins.sbt')
+
+                    compile_success = compile(conf, bench, compilecmd, gcstats, args.gcdebug, args.gctrace, extra_args)
+                    if not compile_success:
+                        compile_fail += [conf]
+                        break
+
+                    resultsdir = os.path.join(subconfig_dir, bfullname)
+                    print "results in", resultsdir
+                    mkdir(resultsdir)
+
+                    if conf.startswith("scala-native") and perf in ["sudo", "normal"]:
+                        # perf needs the original binary for the reports to make any sense
+                        runnable = runcmd[0]
+                        saved_binary = os.path.join(resultsdir, "binary")
+                        sh.copyfile(runnable, saved_binary)
+                        os.chmod(saved_binary, 0775)
+                        runcmd = [saved_binary] + runcmd[1:]
+
+                    cmd = []
+                    cmd.extend(runcmd)
+                    cmd.extend([str(batches), str(batch_size), input, output])
+
+                    to_run = []
+                    for n in xrange(runs):
+                        to_run += [
+                            dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=gcstats,
+                                 size=size, gcThreads=gcThreads, perf = perf)]
+
+                    if par == 1:
+                        for tr in to_run:
+                            failed += single_run(tr)
+                    else:
+                        failed += sum(pool.map(single_run, to_run), [])
+
+                # mark it as complete
+                open(os.path.join(subconfig_dir, ".complete"), 'w+').close()
+                result_dirs += [subconfig_dir]
+
+                if args.upload or args.gitupload:
+                    upload(subconfig_dir, symlink, args.gitupload, args.overwrite)
+
+    print "results:"
+    for dir in result_dirs:
+        print dir
+
+    if len(symlinks) > 0:
+        print("{} symlinks ".format(len(symlinks)))
+        for symlink in symlinks:
+            print symlink[0], "->", symlink[1]
+
 
-            compile(bench, compilecmd)
-            resultsdir = os.path.join('results', conf, bench)
-            mkdir(resultsdir)
+    if len(compile_fail) > 0:
+        print("{} compilation failed ".format(len(failed)))
+        for skip in compile_fail:
+            print skip
 
-            for n in xrange(runs):
-                print('--- run {}/{}'.format(n, runs))
+    if len(skipped) > 0:
+        print("{} benchmarks skipped ".format(len(failed)))
+        for skip in skipped:
+            print skip
 
-                cmd = []
-                cmd.extend(runcmd)
-                cmd.extend([str(batches), str(batch_size), input, output])
-                out = run(cmd)
-                with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile:
-                    resultfile.write(out)
+    if len(failed) > 0:
+        print("{} benchmarks failed ".format(len(failed)))
+        for fail in failed:
+            print fail
 
+    if len(compile_fail) > 0 or len(failed) > 0:
+        exit(1)
diff --git a/scripts/start.sh b/scripts/start.sh
new file mode 100755
index 0000000..d6c4d46
--- /dev/null
+++ b/scripts/start.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+setsid scripts/start0.sh
\ No newline at end of file
diff --git a/scripts/start0.sh b/scripts/start0.sh
new file mode 100755
index 0000000..3c50eca
--- /dev/null
+++ b/scripts/start0.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+echo $$ | tee .pid
+
+if [ -f jobs.sh ]; then
+  mkdir -p logs
+  ./jobs.sh | tee logs/job_$(date +%Y%m%d_%H%M%S).log
+fi
+
+rm .pid
\ No newline at end of file
diff --git a/scripts/stop.sh b/scripts/stop.sh
new file mode 100755
index 0000000..b6deb96
--- /dev/null
+++ b/scripts/stop.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+if [ -f .pid ]; then
+  PID=$(cat .pid)
+  kill -9 $(ps -s $PID -o pid=)
+  rm .pid
+fi
+
diff --git a/scripts/summary.py b/scripts/summary.py
old mode 100644
new mode 100755
index b953295..cec2423
--- a/scripts/summary.py
+++ b/scripts/summary.py
@@ -1,43 +1,1270 @@
-from run import benchmarks, runs, configurations
+#!/usr/bin/env python2
+from run import mkdir, expand_wild_cards, generate_choices
 
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
 import numpy as np
+import time
+import os
+import argparse
 
-def config_data(bench, conf):
-    out = []
-    for run in xrange(runs):
+
+def config_data_goodruns(bench, conf, warmup, p=50):
+    benchmark_dir = os.path.join("results", conf, bench)
+    files = next(os.walk(benchmark_dir), [[], [], []])[2]
+    runs = []
+    for file in files:
+        if "." not in file:
+            # regular benchmark data
+            runs.append(file)
+
+    points_with_50percentile = []
+    for run in runs:
         try:
-            points = []
-            with open('results/{}/{}/{}'.format(conf, bench, run)) as data:
+            raw_points = []
+            with open(os.path.join("results", conf, bench, run)) as data:
                 for line in data.readlines():
-                    points.append(float(line))
-            # take only last 1000 to account for startup
-            points = points[-1000:]
-            # filter out 1% worst measurements as outliers
-            pmax = np.percentile(points, 99)
-            for point in points:
-                if point <= pmax:
-                    out.append(point)
+                    try:
+                        # in ms
+                        raw_points.append(float(line) / 1000000)
+                    except Exception as e:
+                        print e
+            points = raw_points[warmup:]
+            points_with_50percentile += [(points, np.percentile(points, p))]
         except IOError:
             pass
+    to_discard = int(0.2 * len(points_with_50percentile))
+    if to_discard > 0:
+        sorted_arr = sorted(points_with_50percentile, key=lambda x: -x[1])
+        out = map(lambda x: x[0], sorted_arr[to_discard:])
+    else:
+        out = map(lambda x: x[0], points_with_50percentile)
+    return np.array(sum(out, []))
+
+
+def config_data_run(bench, conf, run, warmup):
+    out = []
+    try:
+        points = []
+        with open(os.path.join("results", conf, bench, str(run))) as data:
+            for line in data.readlines():
+                try:
+                    # in ms
+                    points.append(float(line) / 1000000)
+                except Exception as e:
+                    print e
+        out = points[warmup:]
+    except IOError:
+        pass
     return np.array(out)
 
-def peak_performance():
+
+def gc_pauses_main_thread(bench, conf):
+    benchmark_dir = os.path.join("results", conf, bench)
+    files = next(os.walk(benchmark_dir), [[], [], []])[2]
+    runs = []
+    for file in files:
+        if file.endswith(".gc.csv"):
+            # gc stats data
+            runs.append(file)
+
+    mark_times = []
+    sweep_times = []
+    gc_times = []
+    for run in runs:
+        try:
+            file = os.path.join("results", conf, bench, run)
+            with open(file) as data:
+                mark, sweep, total = gc_parse_file(data, file)
+                mark_times += mark
+                sweep_times += sweep
+            gc_times += total
+        except IOError:
+            pass
+    return np.array(mark_times), np.array(sweep_times), np.array(gc_times)
+
+
+def gc_parse_file(data, file):
+    header = data.readline().strip()
+    if header.startswith("event_type,"):
+        return parse_gc_pause_events(data, file, header)
+    else:
+        return parse_gc_tabular(data, file, header)
+
+
+def parse_gc_pause_events(data, file, header):
+    mark_times = []
+    sweep_times = []
+    gc_times = []
+    event_type_index = 0
+    time_ns_index = -1
+    ns_to_ms_div = 1000 * 1000
+    for i, h in enumerate(header.split(',')):
+        if h == "time_ns":
+            time_ns_index = i
+    if time_ns_index == -1:
+        print "Header does not have time_ns", header, "at", file
+        return mark_times, sweep_times, gc_times
+
+    for line in data.readlines():
+        arr = line.split(",")
+        event = arr[event_type_index]
+        try:
+            time = float(arr[time_ns_index]) / ns_to_ms_div
+            if event == "mark":
+                mark_times.append(time)
+            elif event == "sweep":
+                sweep_times.append(time)
+            if event == "mark" or event == "sweep":
+                gc_times.append(time)
+        except Exception as e:
+            print e
+
+    return mark_times, sweep_times, gc_times
+
+
+def parse_gc_tabular(data, file, header):
+    mark_times = []
+    sweep_times = []
+    gc_times = []
+    # analise header
+    mark_index = -1
+    sweep_index = -1
+    mark_to_ms = 0
+    sweep_to_ms = 0
+    unit2div = dict(ms=1, us=1000, ns=1000 * 1000)
+    for i, h in enumerate(header.split(',')):
+        arr = h.rsplit('_', 1)
+        if len(arr) != 2:
+            continue
+        prefix = arr[0]
+        unit = arr[1]
+
+        if prefix == "mark_time":
+            mark_index = i
+            mark_to_ms = unit2div[unit]
+        elif prefix == "sweep_time":
+            sweep_index = i
+            sweep_to_ms = unit2div[unit]
+    if mark_index == -1:
+        print "Header does not have mark_time_<unit>", header, "at", file
+    if sweep_index == -1:
+        print "Header does not have sweep_time_<unit>", header, "at", file
+    if mark_index == -1 or sweep_index == -1:
+        return mark_times, sweep_times, gc_times
+    for line in data.readlines():
+        arr = line.split(",")
+        # in ms
+        mark_time = float(arr[mark_index]) / mark_to_ms
+        mark_times.append(mark_time)
+        sweep_time = float(arr[sweep_index]) / sweep_to_ms
+        sweep_times.append(sweep_time)
+        gc_times.append(mark_time + sweep_time)
+    return mark_times, sweep_times, gc_times
+
+
+def merge_or_create(dict1, dict2):
+    for key1 in dict1.keys():
+        if dict2.has_key(key1):
+            dict1[key1].append(dict2[key1])
+    for key2 in dict2.keys():
+        if not dict1.has_key(key2):
+            dict1[key2] = dict2[key2]
+
+
+def gc_events_for_last_n_collections(bench, conf, run=3, n=1):
+    benchmark_dir = os.path.join("results", conf, bench)
+    files = next(os.walk(benchmark_dir), [[], [], []])[2]
+    main_file_name = str(run) + ".gc.csv"
+    parts = []
+    for file in files:
+        if file.startswith(main_file_name):
+            parts.append(file)
+
+    collection_events = []
+    for part in parts:
+        try:
+            file = os.path.join("results", conf, bench, part)
+            with open(file) as data:
+                header = data.readline().strip()
+                collection_events0, _, _, _ = parse_events(data, file, header)
+                collection_events += collection_events0
+        except IOError:
+            pass
+
+    collection_events = collection_events[-n:]
+    if len(collection_events) == 0:
+        return [], dict(), dict(), dict()
+
+    min_time = collection_events[0][1]
+    time_filter = (lambda t: t > min_time)
+
+    phase_events_by_thread = dict()
+    batch_events_by_thread = dict()
+    internal_events_by_thread = dict()
+
+    for part in parts:
+        try:
+            file = os.path.join("results", conf, bench, part)
+            with open(file) as data:
+                header = data.readline().strip()
+                # no collection events on other threads
+                _, phase_events_by_thread0, batch_events_by_thread0, internal_events_by_thread0 = parse_events(data, file, header, time_filter)
+                merge_or_create(phase_events_by_thread, phase_events_by_thread0)
+                merge_or_create(batch_events_by_thread, batch_events_by_thread0)
+                merge_or_create(internal_events_by_thread, internal_events_by_thread0)
+        except IOError:
+            pass
+
+    return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread
+
+
+def append_or_create(dict, key, value):
+    if dict.has_key(key):
+        dict[key].append(value)
+    else:
+        dict[key] = [value]
+
+
+phase_event_types = ["mark", "sweep", "concmark", "concsweep"]
+batch_events_types = ["mark_batch", "sweep_batch", "coalesce_batch", "mark_waiting"]
+internal_events_types = ["sync"]
+
+
+# event =  [type, start, end]
+def parse_events(data, file, header, timeFilter=(lambda t: True)):
+    collection_types = ["collection"]
+
+    collection_events = []
+    phase_events_by_thread = dict()
+    batch_events_by_thread = dict()
+    internal_events_by_thread = dict()
+
+    event_type_index = 0
+    start_ns_index = -1
+    time_ns_index = -1
+    thread_index = -1
+    ns_to_ms_div = 1000 * 1000
+    for i, h in enumerate(header.split(',')):
+        if h == "start_ns":
+            start_ns_index = i
+        if h == "time_ns":
+            time_ns_index = i
+        if h == "gc_thread":
+            thread_index = i
+
+    if start_ns_index == -1:
+        print "Header does not have start_ns", header, "at", file
+    if time_ns_index == -1:
+        print "Header does not have time_ns", header, "at", file
+    if thread_index == -1:
+        print "Header does not have gc_thread", header, "at", file
+    if start_ns_index == -1 or time_ns_index == -1 or thread_index == -1:
+        return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread
+
+    for line in data.readlines():
+        arr = line.split(",")
+        event = arr[event_type_index]
+        start = float(arr[start_ns_index]) / ns_to_ms_div
+        if not timeFilter(start):
+            continue
+        time = float(arr[time_ns_index]) / ns_to_ms_div
+        thread = arr[thread_index]
+        if event in collection_types:
+            collection_events.append([event, start, time])
+        elif event in phase_event_types:
+            append_or_create(phase_events_by_thread, thread, [event, start, time])
+        elif event in batch_events_types:
+            append_or_create(batch_events_by_thread, thread, [event, start, time])
+        elif event in internal_events_types:
+            append_or_create(internal_events_by_thread, thread, [event, start, time])
+
+    return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread
+
+
+def parse_batch_times(data, file, header):
+
+    mark_batches = []
+    sweep_batches = []
+
+    event_type_index = 0
+    start_ns_index = -1
+    time_ns_index = -1
+    thread_index = -1
+    ns_to_ms_div = 1000 * 1000
+    for i, h in enumerate(header.split(',')):
+        if h == "start_ns":
+            start_ns_index = i
+        if h == "time_ns":
+            time_ns_index = i
+        if h == "gc_thread":
+            thread_index = i
+
+    if start_ns_index == -1:
+        print "Header does not have start_ns", header, "at", file
+    if time_ns_index == -1:
+        print "Header does not have time_ns", header, "at", file
+    if thread_index == -1:
+        print "Header does not have gc_thread", header, "at", file
+    if start_ns_index == -1 or time_ns_index == -1 or thread_index == -1:
+        return mark_batches, sweep_batches
+
+    for line in data.readlines():
+        arr = line.split(",")
+        event = arr[event_type_index]
+        time = float(arr[time_ns_index]) / ns_to_ms_div
+        if event == "mark_batch":
+            mark_batches.append(time)
+        elif event == "sweep_batch":
+            sweep_batches.append(time)
+
+    return mark_batches, sweep_batches
+
+
+def gc_batch_times(bench, conf):
+    benchmark_dir = os.path.join("results", conf, bench)
+    files = next(os.walk(benchmark_dir), [[], [], []])[2]
+    parts = []
+    for file in files:
+        if ".gc.csv" in file:
+            parts.append(file)
+    sweep_batches = []
+    mark_batches = []
+    for part in parts:
+        try:
+            file = os.path.join("results", conf, bench, part)
+            with open(file) as data:
+                header = data.readline().strip()
+                mark_batches0, sweep_batches0 = parse_batch_times(data, file, header)
+                mark_batches += mark_batches0
+                sweep_batches += sweep_batches0
+        except IOError:
+            pass
+
+    return mark_batches, sweep_batches
+
+
+def gc_stats_total(bench, conf):
+    _, _, total = gc_pauses_main_thread(bench, conf)
+    return total
+
+
+def gc_stats_mark_batches(bench, conf):
+    mark,_ = gc_batch_times(bench, conf)
+    return np.array(mark)
+
+
+def gc_stats_sweep_batches(bench, conf):
+    _, sweep = gc_batch_times(bench, conf)
+    return np.array(sweep)
+
+
+def percentile_gc(configurations, benchmarks, percentile):
+    out_mark = []
+    out_sweep = []
+    out_total = []
+    for bench in benchmarks:
+        res_mark, res_sweep, res_total = percentile_gc_bench(configurations, bench, percentile)
+        out_mark.append(res_mark)
+        out_sweep.append(res_sweep)
+        out_total.append(res_total)
+
+    return out_mark, out_sweep, out_total
+
+
+def total_gc(configurations, benchmarks):
+    out_mark = []
+    out_sweep = []
+    out_total = []
+    for bench in benchmarks:
+        res_mark, res_sweep, res_total = total_gc_bench(configurations, bench)
+        out_mark.append(res_mark)
+        out_sweep.append(res_sweep)
+        out_total.append(res_total)
+    return out_mark, out_sweep, out_total
+
+
+def percentile_gc_bench(configurations, bench, p):
+    res_mark = []
+    res_sweep = []
+    res_total = []
+    for conf in configurations:
+        try:
+            mark, sweep, total = gc_pauses_main_thread(bench, conf)
+            res_mark.append(np.percentile(mark, p))
+            res_sweep.append(np.percentile(sweep, p))
+            res_total.append(np.percentile(total, p))
+        except IndexError:
+            res_mark.append(0)
+            res_sweep.append(0)
+            res_total.append(0)
+    return res_mark, res_sweep, res_total
+
+
+def total_gc_bench(configurations, bench):
+    res_mark = []
+    res_sweep = []
+    res_total = []
+    for conf in configurations:
+        try:
+            mark, sweep, total = gc_pauses_main_thread(bench, conf)
+            res_mark.append(np.sum(mark))
+            res_sweep.append(np.sum(sweep))
+            res_total.append(np.sum(total))
+        except IndexError:
+            res_mark.append(0)
+            res_sweep.append(0)
+            res_total.append(0)
+    return res_mark, res_sweep, res_total
+
+
+def percentile_gc_bench_mark(configurations, bench, p):
+    mark, _, _ = percentile_gc_bench(configurations, bench, p)
+    return mark
+
+
+def percentile_gc_bench_sweep(configurations, bench, p):
+    _, sweep, _ = percentile_gc_bench(configurations, bench, p)
+    return sweep
+
+
+def percentile_gc_bench_total(configurations, bench, p):
+    _, _, total = percentile_gc_bench(configurations, bench, p)
+    return total
+
+
+def percentile(configurations, benchmarks, warmup, p):
+    out = []
+    for bench in benchmarks:
+        out.append(percentile_bench(configurations, bench, warmup, p))
+    return out
+
+
+def percentile_bench(configurations, bench, warmup, p):
+    res = []
+    for conf in configurations:
+        try:
+            res.append(np.percentile(config_data_goodruns(bench, conf, warmup, p), p))
+        except IndexError:
+            res.append(0)
+    return res
+
+
+def totals(configurations, benchmarks, warmup):
     out = []
     for bench in benchmarks:
+        out.append(totals_bench(configurations, bench, warmup))
+    return out
+
+
+def totals_bench(configurations, bench, warmup):
+    res = []
+    for conf in configurations:
+        try:
+            res.append(np.sum(config_data_goodruns(bench, conf, warmup)))
+        except IndexError:
+            res.append(0)
+    return res
+
+
+def bar_chart_relative(plt, configurations, benchmarks, data):
+    plt.clf()
+    plt.cla()
+    ind = np.arange(len(benchmarks))
+    conf_count = len(configurations) + 1
+    base = []
+    ref = []
+    for bench_idx, bench in enumerate(benchmarks):
+        try:
+            base_val = data[bench_idx][0]
+            if base_val > 0:
+                base.append(base_val)
+                ref.append(1.0)
+            else:
+                base.append(0.0)
+                ref.append(0.0)
+        except IndexError:
+            base.append(0.0)
+            ref.append(0.0)
+    plt.bar(ind * conf_count, ref, label=configurations[0])
+
+    for i, conf in enumerate(configurations[1:]):
+        conf_idx = i + 1
         res = []
-        for conf in configurations:
+        for bench_idx, (bench, base_val) in enumerate(zip(benchmarks, base)):
             try:
-                res.append(np.percentile(config_data(bench, conf), 50))
+                if base_val > 0:
+                    res.append(data[bench_idx][conf_idx] / base_val)
+                else:
+                    res.append(0.0)
             except IndexError:
                 res.append(0)
-        out.append(res)
-    return out
+        plt.bar(ind * conf_count + conf_idx, res, label=conf)
+    plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks))
+    plt.legend()
+    return plt
 
-if __name__ == '__main__':
+
+def total_execution_times(plt, configurations, benchmarks, data):
+    plt = bar_chart_relative(plt, configurations, benchmarks, data)
+    plt.title("Total test execution times against " + configurations[0])
+    return plt
+
+
+def relative_execution_times(plt, configurations, benchmarks, data, p):
+    plt = bar_chart_relative(plt, configurations, benchmarks, data)
+    plt.title("Relative test execution times against " + configurations[0] + " at " + str(p) + " percentile")
+    return plt
+
+
+def relative_gc_pauses(plt, configurations, benchmarks, data, p):
+    plt = bar_chart_relative(plt, configurations, benchmarks, data)
+    plt.title("Relative GC pauses against " + configurations[0] + " at " + str(p) + " percentile")
+    return plt
+
+
+def bar_chart_gc_relative(plt, configurations, benchmarks, mark_data, total_data):
+    plt.clf()
+    plt.cla()
+    ind = np.arange(len(benchmarks))
+    conf_count = len(configurations) + 1
+    base = []
+    ref = []
+    mark_ref = []
+    for bench_idx, bench in enumerate(benchmarks):
+        mark = mark_data[bench_idx][0]
+        total = total_data[bench_idx][0]
+        if total > 0:
+            base.append(total)
+            ref.append(1.0)
+            mark_ref.append(mark / total)
+        else:
+            base.append(0)
+            ref.append(0.0)
+            mark_ref.append(0.0)
+    plt.bar(ind * conf_count, ref, label=configurations[0] + "-sweep")  # total (look like sweep)
+    plt.bar(ind * conf_count, mark_ref, label=configurations[0] + "-mark")  # mark time
+
+    for i, conf in enumerate(configurations[1:]):
+        conf_idx = i + 1
+        res = []
+        mark_res = []
+        for bench_idx, (bench, base_val) in enumerate(zip(benchmarks, base)):
+            if base_val > 0:
+                mark, _, total = gc_pauses_main_thread(bench, conf)
+                mark = mark_data[bench_idx][conf_idx]
+                total = total_data[bench_idx][conf_idx]
+                res.append(np.array(total) / base_val)
+                mark_res.append(np.array(mark) / base_val)
+            else:
+                res.append(0)
+                mark_res.append(0)
+        plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep")  # total (look like sweep)
+        plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark")  # mark time
+    plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks))
+    plt.title("Relative gc times against " + configurations[0])
+    plt.legend()
+    return plt
+
+
+def bar_chart_gc_absolute(plt, configurations, benchmarks, percentile):
+    plt.clf()
+    plt.cla()
+    ind = np.arange(len(benchmarks))
+    conf_count = len(configurations) + 1
+
+    for i, conf in enumerate(configurations):
+        res = []
+        mark_res = []
+        for bench in benchmarks:
+            try:
+                mark, _, total = gc_pauses_main_thread(bench, conf)
+                res.append(np.percentile(total, percentile))
+                mark_res.append(np.percentile(mark, percentile))
+            except IndexError:
+                res.append(0)
+        plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep")  # total (look like sweep)
+        plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark")  # mark time
+    plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks))
+    plt.title("Garbage collector pause times at " + str(percentile) + " percentile")
+    plt.legend()
+    return plt
+
+
+def example_run_plot(plt, configurations, bench, run=3, lastn=-1):
+    plt.clf()
+    plt.cla()
+
+    for conf in configurations:
+        rawpoints = []
+        try:
+            with open('results/{}/{}/{}'.format(conf, bench, run)) as data:
+                for line in data.readlines():
+                    try:
+                        rawpoints.append(float(line) / 1000000)
+                    except Exception as e:
+                        print e
+        except IOError:
+            pass
+
+        total_len = len(rawpoints)
+        if lastn != -1:
+            first = total_len - lastn
+        else:
+            first = 0
+        ind = np.arange(first, total_len)
+        points = rawpoints[first:]
+        plt.plot(ind, points, label=conf)
+    plt.title("{} run #{}".format(bench, str(run)))
+    plt.xlabel("Iteration")
+    plt.ylabel("Run time (ms)")
+    plt.legend()
+    return plt
+
+
+def example_all_runs_plot(plt, conf, bench, lastn=-1):
+    plt.clf()
+    plt.cla()
+    max_run = find_last_run(conf, bench)
+
+    for run in np.arange(0, max_run + 1):
+        rawpoints = []
+        try:
+            with open('results/{}/{}/{}'.format(conf, bench, run)) as data:
+                for line in data.readlines():
+                    try:
+                        rawpoints.append(float(line) / 1000000)
+                    except Exception as e:
+                        print e
+        except IOError:
+            pass
+
+        total_len = len(rawpoints)
+        if total_len == 0:
+            continue
+        if lastn != -1:
+            first = total_len - lastn
+        else:
+            first = 0
+        ind = np.arange(first, total_len)
+        points = rawpoints[first:]
+        plt.plot(ind, points, label=run)
+    plt.title("{} all runs for {}".format(bench, conf))
+    plt.xlabel("Iteration")
+    plt.ylabel("Run time (ms)")
+    plt.legend()
+    return plt
+
+
+def to_gb(size_str):
+    if size_str[-1] == "k" or size_str[-1] == "K":
+        return float(size_str[:-1]) / 1024 / 1024
+    elif size_str[-1] == "m" or size_str[-1] == "M":
+        return float(size_str[:-1]) / 1024
+    elif size_str[-1] == "g" or size_str[-1] == "G":
+        return float(size_str[:-1])
+    else:
+        # bytes
+        return float(size_str) / 1024 / 1024 / 1024
+
+
+def sizes_per_conf(parent_configuration):
+    parent_folder = os.path.join("results", parent_configuration)
+    min_sizes = []
+    max_sizes = []
+    child_confs = []
+    folders = next(os.walk(parent_folder))[1]
+    for f in folders:
+        if f.startswith("size_"):
+            parts = f[len("size_"):].split("-")
+            min_sizes.append(to_gb(parts[0]))
+            max_sizes.append(to_gb(parts[1]))
+            child_confs.append(os.path.join(parent_configuration, f))
+    return min_sizes, max_sizes, child_confs
+
+
+def size_compare_chart_generic(plt, parent_configurations, bench, get_percentile, p):
+    plt.clf()
+    plt.cla()
+    for parent_conf in parent_configurations:
+        min_sizes, max_sizes, child_confs = sizes_per_conf(parent_conf)
+        equal_sizes = []
+        equal_confs = []
+        for min_size, max_size, child_conf in zip(min_sizes, max_sizes, child_confs):
+            if min_size == max_size:
+                equal_sizes.append(min_size)
+                equal_confs.append(child_conf)
+
+        # sorts all by size in GB
+        equal_sizes, equal_confs = zip(*[(x, y) for x, y in sorted(zip(equal_sizes, equal_confs))])
+        percentiles = get_percentile(equal_confs, bench, p)
+        plt.plot(np.array(equal_sizes), percentiles, label=parent_conf)
+    plt.legend()
+    plt.xlim(xmin=0)
+    plt.ylim(ymin=0)
+    plt.xlabel("Heap Size (GB)")
+
+    return plt
+
+
+def size_compare_chart_gc_combined(plt, parent_configurations, bench):
+    plt.clf()
+    plt.cla()
+    for parent_conf in parent_configurations:
+        min_sizes, max_sizes, child_confs = sizes_per_conf(parent_conf)
+        equal_sizes = []
+        equal_confs = []
+        for min_size, max_size, child_conf in zip(min_sizes, max_sizes, child_confs):
+            if min_size == max_size:
+                equal_sizes.append(min_size)
+                equal_confs.append(child_conf)
+
+        # sorts all by size in GB
+        equal_sizes, equal_confs = zip(*[(x, y) for x, y in sorted(zip(equal_sizes, equal_confs))])
+
+        mark, _, total = total_gc_bench(equal_confs, bench)
+        plt.plot(np.array(equal_sizes), total, label=parent_conf + "-sweep")  # total (look like sweep)
+        plt.plot(np.array(equal_sizes), mark, label=parent_conf + "-mark")  # mark time
+    plt.legend()
+    plt.xlim(xmin=0)
+    plt.ylim(ymin=0)
+    plt.xlabel("Heap Size (GB)")
+    plt.title("{}: GC total time".format(bench))
+    plt.ylabel("Time (ms)")
+
+    return plt
+
+
+def size_compare_chart(plt, parent_configurations, bench, warmup, p):
+    plt = size_compare_chart_generic(plt, parent_configurations, bench, lambda configurations, benchmark, p: percentile_bench(configurations, benchmark, warmup, p), p)
+    plt.title("{} at {} percentile".format(bench, p))
+    plt.ylabel("Run time (ms)")
+    return plt
+
+
+def size_compare_chart_gc(plt, parent_configurations, bench, p):
+    plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_total, p)
+    plt.title("{}: GC pause time at {} percentile".format(bench, p))
+    plt.ylabel("GC pause time (ms)")
+    return plt
+
+
+def size_compare_chart_gc_mark(plt, parent_configurations, bench, p):
+    plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_mark, p)
+    plt.title("{}: GC mark pause time at {} percentile".format(bench, p))
+    plt.ylabel("GC mark time (ms)")
+    return plt
+
+
+def size_compare_chart_gc_sweep(plt, parent_configurations, bench, p):
+    plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_sweep, p)
+    plt.title("{}: GC sweep pause time at {} percentile".format(bench, p))
+    plt.ylabel("GC sweep time (ms)")
+    return plt
+
+
+def percentiles_chart_generic_runs(plt, conf, bench, get_data, first, last, step):
+    plt.clf()
+    plt.cla()
+    max_run = find_last_run(conf, bench)
+
+    for run in np.arange(0, max_run + 1):
+        data = get_data(bench, conf, run)
+        if data.size > 0:
+            percentiles = filter(lambda x: 0 <= x <= 100, np.arange(first, last + step, step))
+            percvalue = np.array([np.percentile(data, perc) for perc in percentiles])
+            plt.plot(percentiles, percvalue, label=run)
+    plt.legend()
+    plt.ylim(ymin=0)
+    plt.xlabel("Percentile")
+    return plt
+
+
+def percentiles_chart_generic(plt, configurations, bench, get_data, first, last, step):
+    plt.clf()
+    plt.cla()
+    for conf in configurations:
+        data = get_data(bench, conf)
+        if data.size > 0:
+            percentiles = filter(lambda x: 0 <= x <= 100, np.arange(first, last + step, step))
+            percvalue = np.array([np.percentile(data, perc) for perc in percentiles])
+            plt.plot(percentiles, percvalue, label=conf)
+    plt.legend()
+    plt.ylim(ymin=0)
+    plt.xlabel("Percentile")
+    return plt
+
+
+def percentiles_chart(plt, configurations, bench, warmup, first=0, last=100, step=0.1):
+    plt = percentiles_chart_generic(plt, configurations, bench, lambda bench, conf : config_data_goodruns(bench, conf, warmup), first, last, step)
+    plt.title(bench)
+    plt.ylabel("Run time (ms)")
+    return plt
+
+
+def percentiles_chart_runs(plt, conf, bench, warmup, first=0, last=100, step=0.1):
+    plt = percentiles_chart_generic_runs(plt, conf, bench,
+                                         lambda bench, conf, run: config_data_run(bench, conf, run, warmup), first,
+                                         last, step)
+    plt.title(bench + " " + conf)
+    plt.ylabel("Run time (ms)")
+    return plt
+
+
+def gc_pause_time_chart(plt, configurations, bench, first=0, last=100, step=0.1):
+    plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_total, first, last, step)
+    plt.title(bench + ": Garbage Collector Pause Times")
+    plt.ylabel("GC pause time (ms)")
+    return plt
+
+
+def gc_mark_batch_time_chart(plt, configurations, bench, first=0, last=100, step=0.1):
+    plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_mark_batches, first, last, step)
+    plt.title(bench + ": Mark Batch Times")
+    plt.ylabel("Mark Batch Time (ms)")
+    return plt
+
+def gc_sweep_batch_time_chart(plt, configurations, bench, first=0, last=100, step=0.1):
+    plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_sweep_batches, first, last, step)
+    plt.title(bench + ": Sweep Batch Times")
+    plt.ylabel("Sweep Batch Time (ms)")
+    return plt
+
+
+def print_table(configurations, benchmarks, data):
     leading = ['name']
     for conf in configurations:
         leading.append(conf)
     print ','.join(leading)
-    for bench, res in zip(benchmarks, peak_performance()):
+    for bench, res in zip(benchmarks, data):
         print ','.join([bench] + list(map(str, res)))
 
+
+def thread_id_tostring(n):
+    if int(n) < 0:
+        return "mutator" + n
+    else:
+        return n
+
+
+def gc_gantt_chart(plt, conf, bench, data, only_batches = False):
+    plt.clf()
+    plt.cla()
+    plt.figure(figsize=(100, 24))
+    labels = []
+    collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread = data
+
+    values = []
+    event_type_to_color = {
+        "mark": ("red", "darkred"), "sweep": ("blue", "darkblue"), "concmark": ("red", "darkred"),
+        "concsweep": ("blue", "darkblue"),
+        "mark_batch": ("red", "darkred"), "sweep_batch": ("blue", "darkblue"), "coalesce_batch": ("green", "darkgreen"),
+        "mark_waiting": ("grey", "dimgrey"), "sync": ("yellow", "gold"),
+    }
+
+    all_keys = phase_events_by_thread.keys() + batch_events_by_thread.keys() + internal_events_by_thread.keys()
+    all_threads = sorted(list(set(all_keys)))
+
+    for thread in all_threads:
+        end = len(labels)
+        labels.append(thread_id_tostring(thread))
+        phase_values = phase_events_by_thread.get(thread, [])
+        batch_values = batch_events_by_thread.get(thread, [])
+        internal_values = internal_events_by_thread.get(thread, [])
+        if not only_batches:
+            values = []
+            for e in collection_events:
+                # [event, start, time] => (start, time)
+                values.append((e[1], e[2]))
+            plt.broken_barh(values, (end, 0.25), color="black", label="collection")
+            for et in phase_event_types:
+                values = []
+                for e in phase_values:
+                    event = e[0]
+                    start = e[1]
+                    time = e[2]
+                    if event == et:
+                        values.append((start, time))
+                plt.broken_barh(values, (end + 0.25, 0.25), facecolors=event_type_to_color[et], label=et)
+        for et in batch_events_types:
+            values = []
+            for e in batch_values:
+                event = e[0]
+                start = e[1]
+                time = e[2]
+                if event == et:
+                    values.append((start, time))
+            if only_batches:
+                plt.broken_barh(values, (end, 0.5), facecolors=event_type_to_color[et], label=et)
+            else:
+                plt.broken_barh(values, (end + 0.50, 0.25), facecolors=event_type_to_color[et], label=et)
+        for et in internal_events_types:
+            values = []
+            for e in internal_values:
+                event = e[0]
+                start = e[1]
+                time = e[2]
+                if event == et:
+                    values.append((start, time))
+            if only_batches:
+                plt.broken_barh(values, (end + 0.5, 0.5), facecolors=event_type_to_color[et], label=et)
+            else:
+                plt.broken_barh(values, (end + 0.75, 0.25), facecolors=event_type_to_color[et], label=et)
+
+    plt.yticks(np.arange(len(labels)), labels)
+    plt.xlabel("Time since start (ms)")
+    plt.title(conf + " " + bench + " last garbage collection")
+    plt.legend(handles=[(mpatches.Patch(color='black', label='collection')),
+                        (mpatches.Patch(color='red', label='mark')),
+                        (mpatches.Patch(color='blue', label='sweep')),
+                        (mpatches.Patch(color='green', label='coalesce')),
+                        (mpatches.Patch(color='grey', label='mark waiting')),
+                        (mpatches.Patch(color='yellow', label='sync')),
+                        ])
+
+    return plt
+
+
+def write_md_table(file, configurations, benchmarks, data):
+    header = ['name']
+    header.append(configurations[0])
+    for conf in configurations[1:]:
+        header.append(conf)
+        header.append("")
+    file.write('|')
+    file.write(' | '.join(header))
+    file.write('|\n')
+
+    file.write('|')
+    for _ in header:
+        file.write(' -- |')
+    file.write('\n')
+
+    gmul = np.ones(len(configurations) - 1)
+    gcount = np.zeros(len(configurations) - 1)
+    for bench, res0 in zip(benchmarks, data):
+        base = res0[0]
+        res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), [])
+        file.write('|')
+        file.write('|'.join([benchmark_md_link(bench)] + list(res)))
+        file.write('|\n')
+
+        for i, d0 in enumerate(res0[1:]):
+            if d0 != 0 and base != 0:
+                gmul[i] *= (float(d0) / base)
+                gcount[i] += 1
+
+    file.write('| __Geometrical mean:__|')
+    for gm, count in zip(gmul, gcount):
+        file.write('| |')
+        if count > 0:
+            gmean = float(gm) ** (1.0 / count)
+            percent_diff = (gmean - 1) * 100
+            precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + (
+                "" if percent_diff > 0 else "__")
+            file.write(precent_diff_cell)
+        else:
+            file.write(" ")
+    file.write("|\n")
+
+
+def write_md_table_gc(file, configurations, benchmarks, mark_data, sweep_data, total_data):
+    header = ['name', ""]
+    header.append(configurations[0])
+    for conf in configurations[1:]:
+        header.append(conf)
+        header.append("")
+    file.write('|')
+    file.write(' | '.join(header))
+    file.write('|\n')
+
+    file.write('|')
+    for _ in header:
+        file.write(' -- |')
+    file.write('\n')
+
+    mark_gmul = np.ones(len(configurations) - 1)
+    mark_gcount = np.zeros(len(configurations) - 1)
+    sweep_gmul = np.ones(len(configurations) - 1)
+    sweep_gcount = np.zeros(len(configurations) - 1)
+    total_gmul = np.ones(len(configurations) - 1)
+    total_gcount = np.zeros(len(configurations) - 1)
+    for bench, mark_res0, sweep_res0, total_res0 in zip(benchmarks, mark_data, sweep_data, total_data):
+        for name, res0, gmul, gcount in zip(["mark", "sweep", "total"], [mark_res0, sweep_res0, total_res0],
+                                            [mark_gmul, sweep_gmul, total_gmul],
+                                            [mark_gcount, sweep_gcount, total_gcount]):
+            base = res0[0]
+            res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), [])
+
+            if name == "mark":
+                link = [benchmark_md_link(bench)]
+            else:
+                link = [""]
+
+            file.write('|')
+            file.write('|'.join(link + list([name]) + list(res)))
+            file.write('|\n')
+
+            for i, d0 in enumerate(res0[1:]):
+                if d0 != 0 and base != 0:
+                    gmul[i] *= (float(d0) / base)
+                    gcount[i] += 1
+
+    for name, gmul, gcount in zip(["mark", "sweep", "total"],
+                                  [mark_gmul, sweep_gmul, total_gmul],
+                                  [mark_gcount, sweep_gcount, total_gcount]):
+        if name == "mark":
+            link = "__Geometrical mean:__"
+        else:
+            link = ""
+
+        file.write('|' + link + '|' + name + '|')
+        for gm, count in zip(gmul, gcount):
+            file.write('| |')
+            if count > 0:
+                gmean = float(gm) ** (1.0 / count)
+                percent_diff = (gmean - 1) * 100
+                precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + (
+                    "" if percent_diff > 0 else "__")
+                file.write(precent_diff_cell)
+            else:
+                file.write(" ")
+        file.write("|\n")
+
+
+def cell(x, base):
+    if base > 0:
+        percent_diff = (float(x) / base - 1) * 100
+        precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + (
+            "" if percent_diff > 0 else "__")
+    else:
+        precent_diff_cell = "N/A"
+    return [("%.4f" % x), precent_diff_cell]
+
+
+def benchmark_md_link(bench):
+    return "[{}](#{})".format(bench, bench.replace(".", "").lower())
+
+
+def benchmark_short_name(bench):
+    return bench.split(".")[0]
+
+
+def chart_md(md_file, plt, rootdir, name):
+    plt.savefig(rootdir + name, pad_inches=0, bbox_inches='tight')
+    plt.close("all")
+    plt.figure(figsize=(32, 24))
+    md_file.write("![Chart]({})\n\n".format(name))
+
+
+def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, warmup, gc_charts=False,
+                  size_charts=False):
+    interesting_percentiles = [50, 90, 99, 99.9]
+    md_file.write("# Summary\n")
+    for p in interesting_percentiles:
+        md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p))
+        data = percentile(configurations, benchmarks, warmup, p)
+        chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data, p), rootdir,
+                 "relative_percentile_" + str(p) + ".png")
+        write_md_table(md_file, configurations, benchmarks, data)
+
+    md_file.write("## Benchmark total run time (ms) \n")
+    data = totals(configurations, benchmarks, warmup)
+    chart_md(md_file, total_execution_times(plt, configurations, benchmarks, data), rootdir,
+             "relative_total.png")
+    write_md_table(md_file, configurations, benchmarks, data)
+
+    if gc_charts:
+        md_file.write("## Total GC time on Application thread (ms) \n")
+        mark, sweep, total = total_gc(configurations, benchmarks)
+        chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, mark, total), rootdir,
+                 "relative_gc_total.png")
+        write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total)
+
+        for p in interesting_percentiles:
+            md_file.write("## GC pause time (ms) at {} percentile \n".format(p))
+            _, _, total = percentile_gc(configurations, benchmarks, p)
+            chart_md(md_file, relative_gc_pauses(plt, configurations, benchmarks, total, p), rootdir,
+                     "relative_gc_percentile_" + str(p) + ".png")
+            write_md_table(md_file, configurations, benchmarks, total)
+
+    md_file.write("# Individual benchmarks\n")
+    for bench in benchmarks:
+        if not any_run_exists(bench, configurations, 0):
+            continue
+
+        md_file.write("## ")
+        md_file.write(bench)
+        md_file.write("\n")
+
+        chart_md(md_file, percentiles_chart(plt, configurations, bench, warmup), rootdir, "percentile_" + bench + ".png")
+        chart_md(md_file, percentiles_chart(plt, configurations, bench, warmup, first=95, step=0.01), rootdir, "percentile_95plus_" + bench + ".png")
+        if gc_charts:
+            chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir,
+                     "gc_pause_times_" + bench + ".png")
+            chart_md(md_file, gc_pause_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir,
+                     "gc_pause_times_95plus_" + bench + ".png")
+            chart_md(md_file, gc_mark_batch_time_chart(plt, configurations, bench), rootdir,
+                     "gc_mark_batches_" + bench + ".png")
+            chart_md(md_file, gc_mark_batch_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir,
+                     "gc_mark_batches_95plus_" + bench + ".png")
+            chart_md(md_file, gc_sweep_batch_time_chart(plt, configurations, bench), rootdir,
+                     "gc_sweep_batches_" + bench + ".png")
+            chart_md(md_file, gc_sweep_batch_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir,
+                     "gc_sweep_batches_95plus_" + bench + ".png")
+            if size_charts:
+                for p in interesting_percentiles:
+                    chart_md(md_file, size_compare_chart_gc_mark(plt, parent_configurations, bench, p), rootdir,
+                             "gc_size_chart" + bench + "percentile_" + str(p) + "_mark.png")
+                for p in interesting_percentiles:
+                    chart_md(md_file, size_compare_chart_gc_sweep(plt, parent_configurations, bench, p), rootdir,
+                             "gc_size_chart" + bench + "percentile_" + str(p) + "_sweep.png")
+                for p in interesting_percentiles:
+                    chart_md(md_file, size_compare_chart_gc_sweep(plt, parent_configurations, bench, p), rootdir,
+                             "gc_size_chart" + bench + "percentile_" + str(p) + "_total.png")
+                chart_md(md_file, size_compare_chart_gc_combined(plt, parent_configurations, bench), rootdir,
+                         "gc_size_chart_total" + bench + ".png")
+
+        if size_charts:
+            for p in interesting_percentiles:
+                chart_md(md_file, size_compare_chart(plt, parent_configurations, bench, warmup, p), rootdir,
+                         "size_chart_" + bench + "percentile_" + str(p) + ".png")
+
+        run = 3
+        while run >= 0 and not any_run_exists(bench, configurations, run):
+            run -= 1
+
+        if run >= 0:
+            # chart_md(md_file, example_run_plot(plt, configurations, bench, run, 1000), rootdir,
+            #          "example_run_last1000_" + str(run) + "_" + bench + ".png")
+            chart_md(md_file, example_run_plot(plt, configurations, bench, run), rootdir,
+                     "example_run_full_" + str(run) + "_" + bench + ".png")
+            for conf in configurations:
+                chart_md(md_file, percentiles_chart_runs(plt, conf, bench, warmup), rootdir, "percentile_" + bench + "_conf" + str(configurations.index(conf))+ ".png")
+                chart_md(md_file, percentiles_chart_runs(plt, conf, bench, warmup, first=95, step=0.01), rootdir, "percentile_95plus_" + bench + "_conf" + str(configurations.index(conf))+ ".png")
+                # chart_md(md_file, example_all_runs_plot(plt, conf, bench, 1000), rootdir,
+                #          "example_allruns_last1000_conf" + str(configurations.index(conf)) + "_" + bench + ".png")
+                # chart_md(md_file, example_all_runs_plot(plt, conf, bench), rootdir,
+                #          "example_allruns_full_conf" + str(configurations.index(conf)) + "_" + bench + ".png")
+                if gc_charts:
+                    gc_data = gc_events_for_last_n_collections(bench, conf, run)
+                    chart_md(md_file,
+                             gc_gantt_chart(plt, conf, bench, gc_data),
+                             rootdir,
+                             "example_gc_last_" + "_conf" + str(configurations.index(conf)) + "_" + str(
+                                 run) + "_" + bench + ".png")
+                    chart_md(md_file,
+                             gc_gantt_chart(plt, conf, bench, gc_data, only_batches=True),
+                             rootdir,
+                             "example_gc_last_batches" + "_conf" + str(configurations.index(conf)) + "_" + str(
+                                 run) + "_" + bench + ".png")
+
+
+def any_run_exists(bench, configurations, run):
+    exits = False
+    for conf in configurations:
+        file = 'results/{}/{}/{}'.format(conf, bench, run)
+        if os.path.exists(file):
+            exits = True
+            break
+    return exits
+
+
+def find_last_run(conf, bench):
+    max_run = 0
+    while True:
+        file = 'results/{}/{}/{}'.format(conf, bench, max_run)
+        if not os.path.exists(file):
+            break
+        max_run += 1
+    max_run -= 1
+
+    return max_run
+
+
+
+def discover_benchmarks(configurations):
+    benchmarks = []
+    for conf in configurations:
+        parent_folders = next(os.walk(os.path.join("results", conf)))[1]
+        for pf in parent_folders:
+            if is_subconfig(pf):
+                for child in next(os.walk(os.path.join("results", conf, pf)))[1]:
+                    if child not in benchmarks:
+                        benchmarks.append(child)
+            else:
+                if pf not in benchmarks:
+                    benchmarks.append(pf)
+
+    return benchmarks
+
+
+def is_subconfig(subconf):
+    return subconf.startswith("size_") or subconf.startswith("gcthreads_")
+
+
+default_warmup = 2000
+
+
+if __name__ == '__main__':
+    all_configs = next(os.walk("results"))[1]
+    # added subconfigurations
+    for conf in all_configs:
+        folder = os.path.join("results", conf)
+        subfolders = next(os.walk(folder))[1]
+        for subconf in subfolders:
+            if is_subconfig(subconf):
+                all_configs.append(os.path.join(conf, subconf))
+
+    results = generate_choices(all_configs)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--comment", help="comment at the suffix of the report name")
+    parser.add_argument("--gc", help="enable charts about garbage collector", action="store_true")
+    parser.add_argument("--vssize", help="enable charts against heap size", action="store_true")
+    parser.add_argument("--warmup", help="number of iterations to skip before calculating percentiles", type=int, default=default_warmup)
+    parser.add_argument("--benchmark", help="benchmarks to use in comparision", action='append')
+    parser.add_argument("comparisons", nargs='*', choices=results + ["all"],
+                        default="all")
+    args = parser.parse_args()
+
+    configurations = []
+    if args.comparisons == "all":
+        configurations = all_configs
+    else:
+        for arg in args.comparisons:
+            configurations.append(expand_wild_cards(arg))
+
+    comment = "_vs_".join(configurations).replace(os.sep, "_")
+    if args.comment is not None:
+        comment = args.comment
+
+    parent_configurations = []
+    for conf in configurations:
+        if os.sep in conf:
+            parent = os.path.split(conf)[0]
+        else:
+            parent = conf
+        if parent not in parent_configurations:
+            parent_configurations.append(parent)
+
+    all_benchmarks = discover_benchmarks(parent_configurations)
+
+    if args.benchmark != None:
+        benchmarks = []
+        for b in args.benchmark:
+            benchmarks += filter(lambda s: s.startswith(b), all_benchmarks)
+    else:
+        excluded_benchmarks = ['list.ListBenchmark', 'mandelbrot.MandelbrotBenchmark']
+        benchmarks = [x for x in all_benchmarks if x not in excluded_benchmarks]
+
+    report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + comment + "/"
+    plt.figure(figsize=(32, 24))
+    plt.rcParams["font.size"] = 20.0
+    mkdir(report_dir)
+    with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file:
+        write_md_file(report_dir, md_file, parent_configurations, configurations, benchmarks, args.warmup, args.gc, args.vssize)
+
+    print report_dir
diff --git a/src/main/scala/histogram/Histogram.scala b/src/main/scala/histogram/Histogram.scala
new file mode 100644
index 0000000..b5b4e54
--- /dev/null
+++ b/src/main/scala/histogram/Histogram.scala
@@ -0,0 +1,21 @@
+package histogram
+
+import scala.util.Random
+
+object Histogram extends communitybench.Benchmark{
+  override def run(input: String): Any = {
+    val Array(items, k) = input.split(",").map(_.toInt)
+    var histogram = Map.empty[Int, Int]
+    val random = new Random(13371337)
+    (1 to items).foreach {
+      _ =>
+        val key = random.nextInt(k)
+        val newValue = histogram.getOrElse(key, 0) + 1
+        histogram += key -> newValue
+    }
+    histogram.values.sum == items
+  }
+
+  override def main(args: Array[String]): Unit =
+    super.main(args)
+}