From 59f0883202dcf6c39585b74834bd4ca05e3d9e1d Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 14 Sep 2018 16:35:55 +0200 Subject: [PATCH 001/169] latest snapshot --- confs/scala-native-0.3.9-SNAPSHOT/build.sbt | 5 +++++ confs/scala-native-0.3.9-SNAPSHOT/compile | 1 + confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt | 1 + confs/scala-native-0.3.9-SNAPSHOT/run | 1 + 4 files changed, 8 insertions(+) create mode 100644 confs/scala-native-0.3.9-SNAPSHOT/build.sbt create mode 100644 confs/scala-native-0.3.9-SNAPSHOT/compile create mode 100644 confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt create mode 100644 confs/scala-native-0.3.9-SNAPSHOT/run diff --git a/confs/scala-native-0.3.9-SNAPSHOT/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT/build.sbt new file mode 100644 index 0000000..b4a5690 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT/build.sbt @@ -0,0 +1,5 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" diff --git a/confs/scala-native-0.3.9-SNAPSHOT/compile b/confs/scala-native-0.3.9-SNAPSHOT/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt new file mode 100644 index 0000000..c1423b6 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") diff --git a/confs/scala-native-0.3.9-SNAPSHOT/run b/confs/scala-native-0.3.9-SNAPSHOT/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out From 497300d74aa14818f67acc40b3478e72c7b11b29 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 21 Sep 2018 13:03:54 +0200 Subject: [PATCH 002/169] using latest snapshot and latest stable version --- confs/scala-native-0.3.7/plugins.sbt | 1 - confs/{scala-native-0.3.7 => scala-native-0.3.8}/build.sbt | 0 confs/{scala-native-0.3.7 => scala-native-0.3.8}/compile | 0 confs/scala-native-0.3.8/plugins.sbt | 1 + confs/{scala-native-0.3.7 => scala-native-0.3.8}/run | 0 scripts/run.py | 3 ++- 6 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 confs/scala-native-0.3.7/plugins.sbt rename confs/{scala-native-0.3.7 => scala-native-0.3.8}/build.sbt (100%) rename confs/{scala-native-0.3.7 => scala-native-0.3.8}/compile (100%) create mode 100644 confs/scala-native-0.3.8/plugins.sbt rename confs/{scala-native-0.3.7 => scala-native-0.3.8}/run (100%) diff --git a/confs/scala-native-0.3.7/plugins.sbt b/confs/scala-native-0.3.7/plugins.sbt deleted file mode 100644 index afc9d5a..0000000 --- a/confs/scala-native-0.3.7/plugins.sbt +++ /dev/null @@ -1 +0,0 @@ -addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.7") diff --git a/confs/scala-native-0.3.7/build.sbt b/confs/scala-native-0.3.8/build.sbt similarity index 100% rename from confs/scala-native-0.3.7/build.sbt rename to confs/scala-native-0.3.8/build.sbt diff --git a/confs/scala-native-0.3.7/compile b/confs/scala-native-0.3.8/compile similarity index 100% rename from confs/scala-native-0.3.7/compile rename to confs/scala-native-0.3.8/compile diff --git a/confs/scala-native-0.3.8/plugins.sbt b/confs/scala-native-0.3.8/plugins.sbt new file mode 100644 index 0000000..2d38aa0 --- /dev/null +++ b/confs/scala-native-0.3.8/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.8") diff --git a/confs/scala-native-0.3.7/run b/confs/scala-native-0.3.8/run similarity index 100% rename from confs/scala-native-0.3.7/run rename to confs/scala-native-0.3.8/run diff --git a/scripts/run.py b/scripts/run.py index 187069b..c6bf667 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -62,7 +62,8 @@ def compile(bench, compilecmd): configurations = [ 'jvm', - 'scala-native-0.3.7', + 'scala-native-0.3.8', + 'scala-native-0.3.9-SNAPSHOT', ] if 'GRAALVM_HOME' in os.environ: From f9f49c4a11b2b93bc25e1cc69b8f3985348d6acc Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 21 Sep 2018 14:18:31 +0200 Subject: [PATCH 003/169] enable running just a part of the benchmarks --- scripts/run.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index c6bf667..9e889f8 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -60,14 +60,20 @@ def compile(bench, compilecmd): 'sudoku.SudokuBenchmark', ] -configurations = [ + +baseline = [ 'jvm', 'scala-native-0.3.8', +] + +latest = [ 'scala-native-0.3.9-SNAPSHOT', ] +configurations = baseline + latest + if 'GRAALVM_HOME' in os.environ: - configurations += [ + baseline += [ 'native-image', 'native-image-pgo', ] @@ -77,6 +83,11 @@ def compile(bench, compilecmd): batch_size = 1 if __name__ == "__main__": + if "baseline" in sys.argv: + configurations = baseline + elif "latest" in sys.argv: + configurations = latest + for conf in configurations: for bench in benchmarks: print('--- conf: {}, bench: {}'.format(conf, bench)) From cfcf3d432718770ee4b92d17da5f5802a52d21a3 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 22 Sep 2018 15:17:31 +0200 Subject: [PATCH 004/169] dependency install script --- scripts/install-dependecies.sh | 2 ++ 1 file changed, 2 insertions(+) create mode 100755 scripts/install-dependecies.sh diff --git a/scripts/install-dependecies.sh b/scripts/install-dependecies.sh new file mode 100755 index 0000000..c0dea34 --- /dev/null +++ b/scripts/install-dependecies.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +sudo pip2 install numpy matplotlib \ No newline at end of file From 875c64e6d0f0d0f13939dffa0eb4171cff4b3369 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 22 Sep 2018 15:18:08 +0200 Subject: [PATCH 005/169] attempt at a barchart --- scripts/run.py | 4 ++-- scripts/summary.py | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) mode change 100644 => 100755 scripts/summary.py diff --git a/scripts/run.py b/scripts/run.py index 9e889f8..749ef7f 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 import sys import os import errno @@ -62,7 +62,7 @@ def compile(bench, compilecmd): baseline = [ - 'jvm', +# 'jvm', 'scala-native-0.3.8', ] diff --git a/scripts/summary.py b/scripts/summary.py old mode 100644 new mode 100755 index b953295..1eaa024 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1,6 +1,9 @@ +#!/usr/bin/env python2 from run import benchmarks, runs, configurations import numpy as np +import matplotlib +import matplotlib.pyplot as plt def config_data(bench, conf): out = [] @@ -33,11 +36,27 @@ def peak_performance(): out.append(res) return out +def p50_chart(plt): + ind = np.arange(len(benchmarks)) + for conf in configurations: + res = [] + for bench in benchmarks: + try: + res.append(np.percentile(config_data(bench, conf), 50)) + except IndexError: + res.append(0) + plt.bar(ind, res, align='center', label=conf) + plt.xticks(ind, map(lambda x: x.split(".")[0],benchmarks)) + plt.legend() + plt.show() + + if __name__ == '__main__': leading = ['name'] for conf in configurations: leading.append(conf) print ','.join(leading) for bench, res in zip(benchmarks, peak_performance()): - print ','.join([bench] + list(map(str, res))) + print ','.join([bench.split(".")[0]] + list(map(str, res))) + p50_chart(plt) From dda3461a90f256c02d12d005c77d55cd67e35788 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 13:10:40 +0200 Subject: [PATCH 006/169] save charts to reports --- .gitignore | 1 + scripts/install-dependecies.sh | 3 +- scripts/summary.py | 63 ++++++++++++++++++++++++++++------ 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index c37ddf5..dfc26b9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ bin/.coursier bin/.scalafmt* results/ +reports/ *.iprof .idea \ No newline at end of file diff --git a/scripts/install-dependecies.sh b/scripts/install-dependecies.sh index c0dea34..3b348d2 100755 --- a/scripts/install-dependecies.sh +++ b/scripts/install-dependecies.sh @@ -1,2 +1,3 @@ #!/usr/bin/env bash -sudo pip2 install numpy matplotlib \ No newline at end of file +sudo pip2 install numpy matplotlib +sudo apt update && sudo apt install python-tk \ No newline at end of file diff --git a/scripts/summary.py b/scripts/summary.py index 1eaa024..84376da 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1,10 +1,11 @@ #!/usr/bin/env python2 -from run import benchmarks, runs, configurations +from run import benchmarks, runs, configurations, mkdir import numpy as np import matplotlib import matplotlib.pyplot as plt + def config_data(bench, conf): out = [] for run in xrange(runs): @@ -24,39 +25,79 @@ def config_data(bench, conf): pass return np.array(out) -def peak_performance(): + +def hot_config_data(bench, conf): + out = [] + for run in xrange(runs): + try: + points = [] + with open('results/{}/{}/{}'.format(conf, bench, run)) as data: + for line in data.readlines(): + points.append(float(line)) + # take only last 1000 to account for startup + out += points[-1000:] + except IOError: + pass + return np.array(out) + + +def peak_performance(percentile): out = [] for bench in benchmarks: res = [] for conf in configurations: try: - res.append(np.percentile(config_data(bench, conf), 50)) + res.append(np.percentile(config_data(bench, conf), percentile)) except IndexError: res.append(0) out.append(res) return out -def p50_chart(plt): + +# not good +def bar_chart(plt, percentile): ind = np.arange(len(benchmarks)) for conf in configurations: res = [] for bench in benchmarks: try: - res.append(np.percentile(config_data(bench, conf), 50)) + res.append(np.percentile(config_data(bench, conf), percentile)) except IndexError: res.append(0) plt.bar(ind, res, align='center', label=conf) - plt.xticks(ind, map(lambda x: x.split(".")[0],benchmarks)) + plt.xticks(ind, map(benchmark_short_name, benchmarks)) plt.legend() - plt.show() + return plt -if __name__ == '__main__': +def percentiles_chart(plt, bench, limit=99): + for conf in configurations: + data = hot_config_data(bench, conf) + percentiles = np.arange(0, limit) + percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) + plt.plot(percentiles, percvalue, label = conf) + plt.legend() + plt.title(bench) + return plt + +def print_table(data): leading = ['name'] for conf in configurations: leading.append(conf) print ','.join(leading) - for bench, res in zip(benchmarks, peak_performance()): - print ','.join([bench.split(".")[0]] + list(map(str, res))) - p50_chart(plt) + for bench, res in zip(benchmarks, data): + print ','.join([benchmark_short_name(bench)] + list(map(str, res))) + + +def benchmark_short_name(bench): + return bench.split(".")[0] + +if __name__ == '__main__': + print_table(peak_performance(50)) + # bar_chart(plt, 50).show() + mkdir("reports") + for bench in benchmarks: + percentiles_chart(plt, bench).savefig("reports/percentile_" + bench + ".png") + plt.clf() + plt.cla() From ba7af040e0a7cb33681cdaef5b05a5b22669b99c Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 13:31:45 +0200 Subject: [PATCH 007/169] include jvm as well --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 749ef7f..0107187 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -62,7 +62,7 @@ def compile(bench, compilecmd): baseline = [ -# 'jvm', + 'jvm', 'scala-native-0.3.8', ] From a1962d9755455c9ec59a71119f21bdc503203a83 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 13:34:28 +0200 Subject: [PATCH 008/169] do not remove 1% outliers when doing percentiles anyway --- scripts/summary.py | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 84376da..c91fcd5 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -2,31 +2,12 @@ from run import benchmarks, runs, configurations, mkdir import numpy as np +import time import matplotlib import matplotlib.pyplot as plt def config_data(bench, conf): - out = [] - for run in xrange(runs): - try: - points = [] - with open('results/{}/{}/{}'.format(conf, bench, run)) as data: - for line in data.readlines(): - points.append(float(line)) - # take only last 1000 to account for startup - points = points[-1000:] - # filter out 1% worst measurements as outliers - pmax = np.percentile(points, 99) - for point in points: - if point <= pmax: - out.append(point) - except IOError: - pass - return np.array(out) - - -def hot_config_data(bench, conf): out = [] for run in xrange(runs): try: @@ -41,7 +22,7 @@ def hot_config_data(bench, conf): return np.array(out) -def peak_performance(percentile): +def percentile(percentile): out = [] for bench in benchmarks: res = [] @@ -72,14 +53,17 @@ def bar_chart(plt, percentile): def percentiles_chart(plt, bench, limit=99): for conf in configurations: - data = hot_config_data(bench, conf) + data = config_data(bench, conf) percentiles = np.arange(0, limit) percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) - plt.plot(percentiles, percvalue, label = conf) + plt.plot(percentiles, percvalue, label=conf) plt.legend() plt.title(bench) + plt.xlabel("Percentile (%)") + plt.ylabel("Run time (s)") return plt + def print_table(data): leading = ['name'] for conf in configurations: @@ -94,10 +78,11 @@ def benchmark_short_name(bench): if __name__ == '__main__': - print_table(peak_performance(50)) + print_table(percentile(50)) # bar_chart(plt, 50).show() - mkdir("reports") + rootdir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "/" + mkdir(rootdir) for bench in benchmarks: - percentiles_chart(plt, bench).savefig("reports/percentile_" + bench + ".png") + percentiles_chart(plt, bench).savefig(rootdir + "percentile_" + bench + ".png") plt.clf() plt.cla() From 055d434d8b1e366172472afea4c93c5c895b88b7 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 16:58:03 +0200 Subject: [PATCH 009/169] all the charts inside markdown --- scripts/summary.py | 55 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index c91fcd5..da974bf 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -3,8 +3,10 @@ import numpy as np import time +import sys import matplotlib import matplotlib.pyplot as plt +import os def config_data(bench, conf): @@ -59,7 +61,7 @@ def percentiles_chart(plt, bench, limit=99): plt.plot(percentiles, percvalue, label=conf) plt.legend() plt.title(bench) - plt.xlabel("Percentile (%)") + plt.xlabel("Percentile") plt.ylabel("Run time (s)") return plt @@ -70,7 +72,26 @@ def print_table(data): leading.append(conf) print ','.join(leading) for bench, res in zip(benchmarks, data): - print ','.join([benchmark_short_name(bench)] + list(map(str, res))) + print ','.join([bench] + list(map(str, res))) + + +def write_md_table(file, data): + leading = ['name'] + for conf in configurations: + leading.append(conf) + file.write('|') + file.write(' | '.join(leading)) + file.write('|\n') + + file.write('|') + for _ in leading: + file.write(' -- |') + file.write('\n') + + for bench, res in zip(benchmarks, data): + file.write('|') + file.write('|'.join([bench] + list(map(str, res)))) + file.write('|\n') def benchmark_short_name(bench): @@ -78,11 +99,31 @@ def benchmark_short_name(bench): if __name__ == '__main__': + if len(sys.argv) > 1: + configurations = sys.argv[1:] print_table(percentile(50)) # bar_chart(plt, 50).show() - rootdir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "/" + rootdir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + "_vs_".join(configurations) + "/" mkdir(rootdir) - for bench in benchmarks: - percentiles_chart(plt, bench).savefig(rootdir + "percentile_" + bench + ".png") - plt.clf() - plt.cla() + with open(os.path.join(rootdir, "Readme.md"), 'w+') as md_file: + md_file.write("# Summary\n") + md_file.write("## Benchmark run time (s) at 50 percentile \n") + write_md_table(md_file, percentile(50)) + md_file.write("## Benchmark run time (s) at 90 percentile \n") + write_md_table(md_file, percentile(90)) + md_file.write("## Benchmark run time (s) at 99 percentile \n") + write_md_table(md_file, percentile(99)) + + md_file.write("# Individual benchmarks\n") + for bench in benchmarks: + md_file.write("## ") + md_file.write(bench) + md_file.write("\n") + + chart_name = "percentile_" + bench + ".png" + chart_file = rootdir + chart_name + percentiles_chart(plt, bench).savefig(chart_file) + plt.clf() + plt.cla() + + md_file.write("![Chart]({})\n".format(chart_name)) \ No newline at end of file From 448977d01d667595580aee0e1f2b80f47699d672 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 17:25:15 +0200 Subject: [PATCH 010/169] links, using seconds, correct yaxis --- scripts/summary.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index da974bf..964c484 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -16,7 +16,7 @@ def config_data(bench, conf): points = [] with open('results/{}/{}/{}'.format(conf, bench, run)) as data: for line in data.readlines(): - points.append(float(line)) + points.append(float(line) / 1000000) # take only last 1000 to account for startup out += points[-1000:] except IOError: @@ -61,6 +61,7 @@ def percentiles_chart(plt, bench, limit=99): plt.plot(percentiles, percvalue, label=conf) plt.legend() plt.title(bench) + plt.ylim(ymin=0) plt.xlabel("Percentile") plt.ylabel("Run time (s)") return plt @@ -90,10 +91,14 @@ def write_md_table(file, data): for bench, res in zip(benchmarks, data): file.write('|') - file.write('|'.join([bench] + list(map(str, res)))) + file.write('|'.join([benchmark_md_link(bench)] + list(map(str, res)))) file.write('|\n') +def benchmark_md_link(bench): + return "[{}]({})".format(bench, bench.replace(".","").lower()) + + def benchmark_short_name(bench): return bench.split(".")[0] From 1cd997bc746391aa7b2a199b60755de611aed5ad Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 17:49:55 +0200 Subject: [PATCH 011/169] decimal places and replative difference --- scripts/summary.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 964c484..36a51e1 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -77,26 +77,35 @@ def print_table(data): def write_md_table(file, data): - leading = ['name'] - for conf in configurations: - leading.append(conf) + header = ['name'] + header.append(configurations[0]) + for conf in configurations[1:]: + header.append(conf) + header.append("") file.write('|') - file.write(' | '.join(leading)) + file.write(' | '.join(header)) file.write('|\n') file.write('|') - for _ in leading: + for _ in header: file.write(' -- |') file.write('\n') - for bench, res in zip(benchmarks, data): + for bench, res0 in zip(benchmarks, data): + base = res0[0] + res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) file.write('|') - file.write('|'.join([benchmark_md_link(bench)] + list(map(str, res)))) + file.write('|'.join([benchmark_md_link(bench)] + list(res))) file.write('|\n') +def cell(x, base): + percent_diff = (float(x) / base - 1) * 100 + return [("%.4f" % x), ("+" if percent_diff > 0 else "") + ("%.2f" % percent_diff) + "%"] + + def benchmark_md_link(bench): - return "[{}]({})".format(bench, bench.replace(".","").lower()) + return "[{}]({})".format(bench, bench.replace(".", "").lower()) def benchmark_short_name(bench): @@ -131,4 +140,4 @@ def benchmark_short_name(bench): plt.clf() plt.cla() - md_file.write("![Chart]({})\n".format(chart_name)) \ No newline at end of file + md_file.write("![Chart]({})\n".format(chart_name)) From c7422394dbd539a039fe4385c2e4192afd732635 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 18:02:19 +0200 Subject: [PATCH 012/169] make it bold when there is an improvement --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 36a51e1..d674a60 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -101,7 +101,7 @@ def write_md_table(file, data): def cell(x, base): percent_diff = (float(x) / base - 1) * 100 - return [("%.4f" % x), ("+" if percent_diff > 0 else "") + ("%.2f" % percent_diff) + "%"] + return [("%.4f" % x), ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__")] def benchmark_md_link(bench): From 67e8199eb058a82280aa19d0a323de53e7550a55 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 18:29:02 +0200 Subject: [PATCH 013/169] relative bar chart --- scripts/summary.py | 81 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index d674a60..13ec850 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -53,6 +53,34 @@ def bar_chart(plt, percentile): return plt +def bar_chart_relative(plt, percentile): + ind = np.arange(len(benchmarks)) + conf_count = len(configurations) + 1 + base = [] + ref = [] + for bench in benchmarks: + try: + base.append(np.percentile(config_data(bench, configurations[0]), percentile)) + ref.append(1.0) + except IndexError: + base.append(0) + ref.append(0.0) + plt.bar(ind * conf_count, ref, label=configurations[0]) + + for i,conf in enumerate(configurations[1:]): + res = [] + for bench, base_val in zip(benchmarks, base): + try: + res.append(np.percentile(config_data(bench, conf), percentile) / base_val) + except IndexError: + res.append(0) + plt.bar(ind * conf_count + i + 1, res, label=conf) + plt.xticks((ind * conf_count + (conf_count - 1)/2.0), map(benchmark_short_name, benchmarks)) + plt.title("Relative performance against " + configurations[0] + " at " + str(percentile) + " percentile") + plt.legend() + return plt + + def percentiles_chart(plt, bench, limit=99): for conf in configurations: data = config_data(bench, conf) @@ -101,7 +129,8 @@ def write_md_table(file, data): def cell(x, base): percent_diff = (float(x) / base - 1) * 100 - return [("%.4f" % x), ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__")] + return [("%.4f" % x), + ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__")] def benchmark_md_link(bench): @@ -112,32 +141,36 @@ def benchmark_short_name(bench): return bench.split(".")[0] +def write_md_file(md_file): + md_file.write("# Summary\n") + md_file.write("## Benchmark run time (s) at 50 percentile \n") + write_md_table(md_file, percentile(50)) + md_file.write("## Benchmark run time (s) at 90 percentile \n") + write_md_table(md_file, percentile(90)) + md_file.write("## Benchmark run time (s) at 99 percentile \n") + write_md_table(md_file, percentile(99)) + md_file.write("# Individual benchmarks\n") + for bench in benchmarks: + md_file.write("## ") + md_file.write(bench) + md_file.write("\n") + + chart_name = "percentile_" + bench + ".png" + chart_file = rootdir + chart_name + percentiles_chart(plt, bench).savefig(chart_file) + plt.clf() + plt.cla() + + md_file.write("![Chart]({})\n".format(chart_name)) + + if __name__ == '__main__': if len(sys.argv) > 1: configurations = sys.argv[1:] - print_table(percentile(50)) + # print_table(percentile(50)) # bar_chart(plt, 50).show() rootdir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + "_vs_".join(configurations) + "/" mkdir(rootdir) - with open(os.path.join(rootdir, "Readme.md"), 'w+') as md_file: - md_file.write("# Summary\n") - md_file.write("## Benchmark run time (s) at 50 percentile \n") - write_md_table(md_file, percentile(50)) - md_file.write("## Benchmark run time (s) at 90 percentile \n") - write_md_table(md_file, percentile(90)) - md_file.write("## Benchmark run time (s) at 99 percentile \n") - write_md_table(md_file, percentile(99)) - - md_file.write("# Individual benchmarks\n") - for bench in benchmarks: - md_file.write("## ") - md_file.write(bench) - md_file.write("\n") - - chart_name = "percentile_" + bench + ".png" - chart_file = rootdir + chart_name - percentiles_chart(plt, bench).savefig(chart_file) - plt.clf() - plt.cla() - - md_file.write("![Chart]({})\n".format(chart_name)) + # with open(os.path.join(rootdir, "Readme.md"), 'w+') as md_file: + # write_md_file(md_file) + bar_chart_relative(plt, 50).show() From 14451c9cdaf31c3ba045c8ef615b3f9217dfc18a Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 18:59:11 +0200 Subject: [PATCH 014/169] good bar charts for summary --- scripts/summary.py | 48 +++++++++++++++++----------------------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 13ec850..8436a2f 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -37,22 +37,6 @@ def percentile(percentile): return out -# not good -def bar_chart(plt, percentile): - ind = np.arange(len(benchmarks)) - for conf in configurations: - res = [] - for bench in benchmarks: - try: - res.append(np.percentile(config_data(bench, conf), percentile)) - except IndexError: - res.append(0) - plt.bar(ind, res, align='center', label=conf) - plt.xticks(ind, map(benchmark_short_name, benchmarks)) - plt.legend() - return plt - - def bar_chart_relative(plt, percentile): ind = np.arange(len(benchmarks)) conf_count = len(configurations) + 1 @@ -67,7 +51,7 @@ def bar_chart_relative(plt, percentile): ref.append(0.0) plt.bar(ind * conf_count, ref, label=configurations[0]) - for i,conf in enumerate(configurations[1:]): + for i, conf in enumerate(configurations[1:]): res = [] for bench, base_val in zip(benchmarks, base): try: @@ -75,8 +59,8 @@ def bar_chart_relative(plt, percentile): except IndexError: res.append(0) plt.bar(ind * conf_count + i + 1, res, label=conf) - plt.xticks((ind * conf_count + (conf_count - 1)/2.0), map(benchmark_short_name, benchmarks)) - plt.title("Relative performance against " + configurations[0] + " at " + str(percentile) + " percentile") + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) + plt.title("Relative test execution times against " + configurations[0] + " at " + str(percentile) + " percentile") plt.legend() return plt @@ -141,14 +125,19 @@ def benchmark_short_name(bench): return bench.split(".")[0] -def write_md_file(md_file): +def write_md_file(rootdir, md_file): md_file.write("# Summary\n") - md_file.write("## Benchmark run time (s) at 50 percentile \n") - write_md_table(md_file, percentile(50)) - md_file.write("## Benchmark run time (s) at 90 percentile \n") - write_md_table(md_file, percentile(90)) - md_file.write("## Benchmark run time (s) at 99 percentile \n") - write_md_table(md_file, percentile(99)) + for p in [50, 90, 99]: + md_file.write("## Benchmark run time (s) at {} percentile \n".format(p)) + chart_name = "relative_percentile_" + str(p) + ".png" + bar_chart_relative(plt, p).savefig(rootdir + chart_name) + plt.clf() + plt.cla() + + md_file.write("![Chart]({})\n\n".format(chart_name)) + + write_md_table(md_file, percentile(p)) + md_file.write("# Individual benchmarks\n") for bench in benchmarks: md_file.write("## ") @@ -168,9 +157,8 @@ def write_md_file(md_file): if len(sys.argv) > 1: configurations = sys.argv[1:] # print_table(percentile(50)) - # bar_chart(plt, 50).show() + plt.rcParams["figure.figsize"] = [16.0, 12.0] rootdir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + "_vs_".join(configurations) + "/" mkdir(rootdir) - # with open(os.path.join(rootdir, "Readme.md"), 'w+') as md_file: - # write_md_file(md_file) - bar_chart_relative(plt, 50).show() + with open(os.path.join(rootdir, "Readme.md"), 'w+') as md_file: + write_md_file(rootdir, md_file) From 836b66f18ab3ba9b5b29cc41a14a774c64835128 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 19:34:07 +0200 Subject: [PATCH 015/169] fixed aligment --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 0107187..14fde56 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -62,7 +62,7 @@ def compile(bench, compilecmd): baseline = [ - 'jvm', + 'jvm', 'scala-native-0.3.8', ] From 5e01ccecbbdb898a4ee8ae94fea64046c2f06a9e Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 21:01:34 +0200 Subject: [PATCH 016/169] proper argument parsing --- scripts/run.py | 39 +++++++++++++++++++++++++++------------ scripts/summary.py | 33 +++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 14fde56..0626a8e 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -4,6 +4,7 @@ import errno import subprocess as subp import shutil as sh +import argparse def mkdir(path): try: @@ -60,17 +61,16 @@ def compile(bench, compilecmd): 'sudoku.SudokuBenchmark', ] - +stable = 'scala-native-0.3.8' baseline = [ - 'jvm', - 'scala-native-0.3.8', + 'jvm', + stable, ] -latest = [ - 'scala-native-0.3.9-SNAPSHOT', -] +latest = 'scala-native-0.3.9-SNAPSHOT' + -configurations = baseline + latest +configurations = all_configs = baseline + [latest] if 'GRAALVM_HOME' in os.environ: baseline += [ @@ -83,10 +83,25 @@ def compile(bench, compilecmd): batch_size = 1 if __name__ == "__main__": - if "baseline" in sys.argv: - configurations = baseline - elif "latest" in sys.argv: - configurations = latest + parser = argparse.ArgumentParser() + parser.add_argument("--suffix", help="suffix added to results") + parser.add_argument("set", nargs='*', choices=configurations + ["baseline", "latest", "stable", "all"], + default="all") + args = parser.parse_args() + + if args.set != all_configs: + configurations = [] + for choice in args.set: + if choice == "baseline": + configurations += baseline + elif choice == "latest" in args.set: + configurations += [latest] + elif choice == "stable" in args.set: + configurations += [stable] + else: + configurations + [choice] + else: + configurations = all_configs for conf in configurations: for bench in benchmarks: @@ -108,7 +123,7 @@ def compile(bench, compilecmd): os.remove('project/plugins.sbt') compile(bench, compilecmd) - resultsdir = os.path.join('results', conf, bench) + resultsdir = os.path.join('results', conf + "_" + args.suffix, bench) mkdir(resultsdir) for n in xrange(runs): diff --git a/scripts/summary.py b/scripts/summary.py index 8436a2f..65cadc4 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1,5 +1,5 @@ #!/usr/bin/env python2 -from run import benchmarks, runs, configurations, mkdir +from run import benchmarks, runs, mkdir, all_configs, latest, stable import numpy as np import time @@ -24,7 +24,7 @@ def config_data(bench, conf): return np.array(out) -def percentile(percentile): +def percentile(configurations, percentile): out = [] for bench in benchmarks: res = [] @@ -37,7 +37,7 @@ def percentile(percentile): return out -def bar_chart_relative(plt, percentile): +def bar_chart_relative(plt, configurations, percentile): ind = np.arange(len(benchmarks)) conf_count = len(configurations) + 1 base = [] @@ -65,7 +65,7 @@ def bar_chart_relative(plt, percentile): return plt -def percentiles_chart(plt, bench, limit=99): +def percentiles_chart(plt, configurations, bench, limit=99): for conf in configurations: data = config_data(bench, conf) percentiles = np.arange(0, limit) @@ -79,7 +79,7 @@ def percentiles_chart(plt, bench, limit=99): return plt -def print_table(data): +def print_table(configurations, data): leading = ['name'] for conf in configurations: leading.append(conf) @@ -88,7 +88,7 @@ def print_table(data): print ','.join([bench] + list(map(str, res))) -def write_md_table(file, data): +def write_md_table(file, configurations, data): header = ['name'] header.append(configurations[0]) for conf in configurations[1:]: @@ -125,18 +125,18 @@ def benchmark_short_name(bench): return bench.split(".")[0] -def write_md_file(rootdir, md_file): +def write_md_file(rootdir, md_file, configurations): md_file.write("# Summary\n") for p in [50, 90, 99]: md_file.write("## Benchmark run time (s) at {} percentile \n".format(p)) chart_name = "relative_percentile_" + str(p) + ".png" - bar_chart_relative(plt, p).savefig(rootdir + chart_name) + bar_chart_relative(plt, configurations, p).savefig(rootdir + chart_name) plt.clf() plt.cla() md_file.write("![Chart]({})\n\n".format(chart_name)) - write_md_table(md_file, percentile(p)) + write_md_table(md_file, configurations, percentile(configurations, p)) md_file.write("# Individual benchmarks\n") for bench in benchmarks: @@ -146,7 +146,7 @@ def write_md_file(rootdir, md_file): chart_name = "percentile_" + bench + ".png" chart_file = rootdir + chart_name - percentiles_chart(plt, bench).savefig(chart_file) + percentiles_chart(plt, configurations, bench).savefig(chart_file) plt.clf() plt.cla() @@ -154,11 +154,20 @@ def write_md_file(rootdir, md_file): if __name__ == '__main__': + configurations = [] if len(sys.argv) > 1: - configurations = sys.argv[1:] + for arg in sys.argv[1:]: + if arg == "latest": + configurations += [latest] + elif arg == "stable": + configurations += [stable] + else: + configurations += arg + else: + configurations = all_configs # print_table(percentile(50)) plt.rcParams["figure.figsize"] = [16.0, 12.0] rootdir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + "_vs_".join(configurations) + "/" mkdir(rootdir) with open(os.path.join(rootdir, "Readme.md"), 'w+') as md_file: - write_md_file(rootdir, md_file) + write_md_file(rootdir, md_file, configurations) From 74b24e99b28ce781115df7d5151bbebdd2cf516d Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 21:16:39 +0200 Subject: [PATCH 017/169] prints resulting report path --- scripts/summary.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 65cadc4..c352b48 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -165,9 +165,10 @@ def write_md_file(rootdir, md_file, configurations): configurations += arg else: configurations = all_configs - # print_table(percentile(50)) plt.rcParams["figure.figsize"] = [16.0, 12.0] - rootdir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + "_vs_".join(configurations) + "/" - mkdir(rootdir) - with open(os.path.join(rootdir, "Readme.md"), 'w+') as md_file: - write_md_file(rootdir, md_file, configurations) + report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + "_vs_".join(configurations) + "/" + mkdir(report_dir) + with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: + write_md_file(report_dir, md_file, configurations) + + print report_dir \ No newline at end of file From 168e35769154cd4ef55a28e6059c59a5df18e906 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 21:26:13 +0200 Subject: [PATCH 018/169] allow "latest" and "stable" as dynamic suffixes --- scripts/summary.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index c352b48..ecbf735 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -157,10 +157,10 @@ def write_md_file(rootdir, md_file, configurations): configurations = [] if len(sys.argv) > 1: for arg in sys.argv[1:]: - if arg == "latest": - configurations += [latest] - elif arg == "stable": - configurations += [stable] + if arg.startswith("latest"): + configurations += [latest + arg[len("latest"):]] + elif arg.startswith("stable"): + configurations += [stable + arg[len("stable"):]] else: configurations += arg else: From 9596ba36ceeabe2d25fa870b1a29087567ea8c2f Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 21:26:25 +0200 Subject: [PATCH 019/169] formatting --- scripts/run.py | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 0626a8e..07f86ad 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -6,19 +6,22 @@ import shutil as sh import argparse + def mkdir(path): try: os.makedirs(path) - except OSError as exc: # Python >2.5 + except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise + def slurp(path): with open(path) as f: return f.read().strip() + def where(cmd): if os.path.isfile(cmd): return cmd @@ -31,34 +34,37 @@ def where(cmd): else: return None + def run(cmd): print(">>> " + str(cmd)) return subp.check_output(cmd) + def compile(bench, compilecmd): cmd = [sbt, '-J-Xmx2G', 'clean'] cmd.append('set mainClass in Compile := Some("{}")'.format(bench)) cmd.append(compilecmd) return run(cmd) + sbt = where('sbt') benchmarks = [ - 'bounce.BounceBenchmark', - 'list.ListBenchmark', - 'richards.RichardsBenchmark', - 'queens.QueensBenchmark', - 'permute.PermuteBenchmark', - 'deltablue.DeltaBlueBenchmark', - 'tracer.TracerBenchmark', - 'brainfuck.BrainfuckBenchmark', - 'json.JsonBenchmark', - 'cd.CDBenchmark', - 'kmeans.KmeansBenchmark', - 'gcbench.GCBenchBenchmark', - 'mandelbrot.MandelbrotBenchmark', - 'nbody.NbodyBenchmark', - 'sudoku.SudokuBenchmark', + 'bounce.BounceBenchmark', + 'list.ListBenchmark', + 'richards.RichardsBenchmark', + 'queens.QueensBenchmark', + 'permute.PermuteBenchmark', + 'deltablue.DeltaBlueBenchmark', + 'tracer.TracerBenchmark', + 'brainfuck.BrainfuckBenchmark', + 'json.JsonBenchmark', + 'cd.CDBenchmark', + 'kmeans.KmeansBenchmark', + 'gcbench.GCBenchBenchmark', + 'mandelbrot.MandelbrotBenchmark', + 'nbody.NbodyBenchmark', + 'sudoku.SudokuBenchmark', ] stable = 'scala-native-0.3.8' @@ -69,13 +75,12 @@ def compile(bench, compilecmd): latest = 'scala-native-0.3.9-SNAPSHOT' - configurations = all_configs = baseline + [latest] if 'GRAALVM_HOME' in os.environ: baseline += [ - 'native-image', - 'native-image-pgo', + 'native-image', + 'native-image-pgo', ] runs = 20 @@ -110,7 +115,8 @@ def compile(bench, compilecmd): input = slurp(os.path.join('input', bench)) output = slurp(os.path.join('output', bench)) compilecmd = slurp(os.path.join('confs', conf, 'compile')) - runcmd = slurp(os.path.join('confs', conf, 'run')).replace('$BENCH', bench).replace('$HOME', os.environ['HOME']).split(' ') + runcmd = slurp(os.path.join('confs', conf, 'run')).replace('$BENCH', bench).replace('$HOME', os.environ[ + 'HOME']).split(' ') if os.path.exists(os.path.join('confs', conf, 'build.sbt')): sh.copyfile(os.path.join('confs', conf, 'build.sbt'), 'build.sbt') @@ -135,4 +141,3 @@ def compile(bench, compilecmd): out = run(cmd) with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: resultfile.write(out) - From bd59e88f9c666f48a084e02026544ae296d0b538 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 21:44:34 +0200 Subject: [PATCH 020/169] can change the default comment on the suffix --- scripts/summary.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index ecbf735..5ba84ea 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -7,6 +7,7 @@ import matplotlib import matplotlib.pyplot as plt import os +import argparse def config_data(bench, conf): @@ -154,19 +155,38 @@ def write_md_file(rootdir, md_file, configurations): if __name__ == '__main__': + dirs = next(os.walk("results"))[1] + results = dirs + for dir in dirs: + if dir.startswith(latest): + results += ["latest" + dir[len(latest):]] + if dir.startswith(stable): + results += ["stable" + dir[len(stable):]] + + parser = argparse.ArgumentParser() + parser.add_argument("--comment", help="comment at the suffix of the report name") + parser.add_argument("comparisons", nargs='*', choices= results + ["all"], + default="all") + args = parser.parse_args() + configurations = [] - if len(sys.argv) > 1: - for arg in sys.argv[1:]: + if args.comparisons == "all": + configurations = all_configs + else: + for arg in args.comparisons: if arg.startswith("latest"): configurations += [latest + arg[len("latest"):]] elif arg.startswith("stable"): configurations += [stable + arg[len("stable"):]] else: configurations += arg - else: - configurations = all_configs + + comment = "_vs_".join(configurations) + if args.comment is not None: + comment = args.comment + + report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + comment + "/" plt.rcParams["figure.figsize"] = [16.0, 12.0] - report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + "_vs_".join(configurations) + "/" mkdir(report_dir) with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: write_md_file(report_dir, md_file, configurations) From fd96b79a848ad79096dd4970711a65e5599bbcef Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 22:17:48 +0200 Subject: [PATCH 021/169] include example runs --- scripts/summary.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 5ba84ea..8c5ff12 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -39,6 +39,8 @@ def percentile(configurations, percentile): def bar_chart_relative(plt, configurations, percentile): + plt.clf() + plt.cla() ind = np.arange(len(benchmarks)) conf_count = len(configurations) + 1 base = [] @@ -66,7 +68,30 @@ def bar_chart_relative(plt, configurations, percentile): return plt +def example_run_plot(plt, configurations, bench, run=3): + plt.clf() + plt.cla() + + for conf in configurations: + points = [] + try: + with open('results/{}/{}/{}'.format(conf, bench, run)) as data: + for line in data.readlines(): + points.append(float(line) / 1000000) + except IOError: + pass + ind = np.arange(len(points)) + plt.plot(ind, points, label=conf) + plt.title("{} run #{}".format(bench, str(run))) + plt.xlabel("Iteration") + plt.ylabel("Run time (s)") + plt.legend() + return plt + + def percentiles_chart(plt, configurations, bench, limit=99): + plt.clf() + plt.cla() for conf in configurations: data = config_data(bench, conf) percentiles = np.arange(0, limit) @@ -132,8 +157,6 @@ def write_md_file(rootdir, md_file, configurations): md_file.write("## Benchmark run time (s) at {} percentile \n".format(p)) chart_name = "relative_percentile_" + str(p) + ".png" bar_chart_relative(plt, configurations, p).savefig(rootdir + chart_name) - plt.clf() - plt.cla() md_file.write("![Chart]({})\n\n".format(chart_name)) @@ -148,11 +171,14 @@ def write_md_file(rootdir, md_file, configurations): chart_name = "percentile_" + bench + ".png" chart_file = rootdir + chart_name percentiles_chart(plt, configurations, bench).savefig(chart_file) - plt.clf() - plt.cla() md_file.write("![Chart]({})\n".format(chart_name)) + chart_name = "example_run_3_" + bench + ".png" + chart_file = rootdir + chart_name + example_run_plot(plt, configurations, bench).savefig(chart_file) + md_file.write("![Chart]({})\n".format(chart_name)) + if __name__ == '__main__': dirs = next(os.walk("results"))[1] From e3df4b62f040fe357184cdd6dac7070f82fda33e Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 22:22:26 +0200 Subject: [PATCH 022/169] fix links --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 8c5ff12..a905837 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -144,7 +144,7 @@ def cell(x, base): def benchmark_md_link(bench): - return "[{}]({})".format(bench, bench.replace(".", "").lower()) + return "[{}](#{})".format(bench, bench.replace(".", "").lower()) def benchmark_short_name(bench): From bde818f2f831e0849e147afd0507fe75640f9421 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 23 Sep 2018 22:48:11 +0200 Subject: [PATCH 023/169] explain usage --- README.md | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d200b09..053e3b9 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,75 @@ as-is and summarized in a separate post-processing step. python scripts/run.py ``` -## Viewing result summary +## Creating result summary ``` python scripts/summary.py ``` + +The reports can be viewed in the `reports` directory. + +## Advanced use + +### Comparing specific versions + +You can run just the configurations you are interested in +```bash +scripts/run.py stable latest +``` + +Compare the lastest `stable` relesea vs `latest` snapshot +```bash +REPORT=$(scripts/summary.py stable latest) +``` + +### Comparing an experimental feature with latest from master +1. build `scala-native` from latest master +2. run the benchmark for it +```bash +scripts/run.py latest +``` +3. build `scala-native` from your branch +4. specify a suffix to identify it +```bash +NAME=PR9001-adding-a-kitchen-sink +``` +5. run the benchmark and get the summary report +```bash +scripts/run.py --suffix "$NAME" latest && +REPORT=$(scripts/summary.py --comment "$NAME" latest latest_"$NAME") +``` + +## Persisting reports +The following commands assume that you have a git repository checked out at `gh-pages` under `../scala-native-benchmark-results`. + +Also that there is an executable script `just-upload.sh` in the root of that repository. +```bash +#just-upload.sh + + +#!/bin/bash +# move to the directory of the script +cd $(dirname "$0") + +git add . && +git commit -m "automated commit" && git push +``` + +### saving experiment data +```bash +cp -r results/ ../scala-native-benchmark-results && +../scala-native-benchmark-results/just-upload.sh +``` + +### restoring experiment data +```bash +cp -r ../scala-native-benchmark-results results/ +``` + +### uploading a report +```bash +mkdir -p ../scala-native-benchmark-results/reports +cp -r "$REPORT" ../scala-native-benchmark-results/reports && +../scala-native-benchmark-results/just-upload.sh +``` \ No newline at end of file From 43216a9d5610b041a4595caf8e5eb7cb53235300 Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 25 Sep 2018 13:46:14 +0200 Subject: [PATCH 024/169] handle empty suffixes --- scripts/run.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 07f86ad..1a9caf4 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -129,7 +129,11 @@ def compile(bench, compilecmd): os.remove('project/plugins.sbt') compile(bench, compilecmd) - resultsdir = os.path.join('results', conf + "_" + args.suffix, bench) + suffix = "" + if args.suffix is not None: + suffix = "_" + args.suffix + + resultsdir = os.path.join('results', conf + suffix, bench) mkdir(resultsdir) for n in xrange(runs): From 33b32e8da49d222ab18143672b39fb9dee19f22c Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 25 Sep 2018 22:35:49 +0200 Subject: [PATCH 025/169] support 2 new lto variants: full and thin --- .../build.sbt | 6 +++ .../compile | 1 + .../plugins.sbt | 1 + .../scala-native-0.3.9-SNAPSHOT-full-tlo/run | 1 + .../build.sbt | 6 +++ .../compile | 1 + .../plugins.sbt | 1 + .../scala-native-0.3.9-SNAPSHOT-thin-tlo/run | 1 + scripts/run.py | 51 ++++++++++++++----- scripts/summary.py | 18 ++----- 10 files changed, 59 insertions(+), 28 deletions(-) create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-full-tlo/build.sbt create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-full-tlo/compile create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-full-tlo/plugins.sbt create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-full-tlo/run create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/build.sbt create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/compile create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/plugins.sbt create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/run diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/build.sbt new file mode 100644 index 0000000..fae449a --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "full" diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/compile b/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/plugins.sbt new file mode 100644 index 0000000..c1423b6 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/run b/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/build.sbt new file mode 100644 index 0000000..ae87f31 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/compile b/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/plugins.sbt new file mode 100644 index 0000000..c1423b6 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/run b/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/scripts/run.py b/scripts/run.py index 1a9caf4..3a4dea8 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -68,43 +68,66 @@ def compile(bench, compilecmd): ] stable = 'scala-native-0.3.8' +latest = 'scala-native-0.3.9-SNAPSHOT' baseline = [ 'jvm', stable, ] -latest = 'scala-native-0.3.9-SNAPSHOT' +confs_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/confs" -configurations = all_configs = baseline + [latest] +configurations = all_configs = next(os.walk(confs_path))[1] + +graalvm = [ + 'native-image', + 'native-image-pgo', +] if 'GRAALVM_HOME' in os.environ: - baseline += [ - 'native-image', - 'native-image-pgo', - ] + baseline += graalvm +else: + for g in graalvm: + all_configs.remove(g) runs = 20 batches = 3000 batch_size = 1 + +def expand_wild_cards(arg): + if arg.startswith("latest"): + return latest + arg[len("latest"):] + elif arg.startswith("stable"): + return stable + arg[len("stable"):] + else: + return arg + + +def generate_choices(direct_choices): + results = direct_choices + for dir in direct_choices: + if dir.startswith(latest): + results += ["latest" + dir[len(latest):]] + if dir.startswith(stable): + results += ["stable" + dir[len(stable):]] + return results + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--suffix", help="suffix added to results") - parser.add_argument("set", nargs='*', choices=configurations + ["baseline", "latest", "stable", "all"], + parser.add_argument("set", nargs='*', choices=generate_choices(configurations) + ["baseline", "all"], default="all") args = parser.parse_args() - if args.set != all_configs: + if args.set != "all": configurations = [] for choice in args.set: - if choice == "baseline": + expanded = expand_wild_cards(choice) + if expanded == "baseline": configurations += baseline - elif choice == "latest" in args.set: - configurations += [latest] - elif choice == "stable" in args.set: - configurations += [stable] else: - configurations + [choice] + configurations += [expanded] else: configurations = all_configs diff --git a/scripts/summary.py b/scripts/summary.py index a905837..ce61859 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1,5 +1,5 @@ #!/usr/bin/env python2 -from run import benchmarks, runs, mkdir, all_configs, latest, stable +from run import benchmarks, runs, mkdir, expand_wild_cards, generate_choices import numpy as np import time @@ -181,13 +181,8 @@ def write_md_file(rootdir, md_file, configurations): if __name__ == '__main__': - dirs = next(os.walk("results"))[1] - results = dirs - for dir in dirs: - if dir.startswith(latest): - results += ["latest" + dir[len(latest):]] - if dir.startswith(stable): - results += ["stable" + dir[len(stable):]] + all_configs = next(os.walk("results"))[1] + results = generate_choices(all_configs) parser = argparse.ArgumentParser() parser.add_argument("--comment", help="comment at the suffix of the report name") @@ -200,12 +195,7 @@ def write_md_file(rootdir, md_file, configurations): configurations = all_configs else: for arg in args.comparisons: - if arg.startswith("latest"): - configurations += [latest + arg[len("latest"):]] - elif arg.startswith("stable"): - configurations += [stable + arg[len("stable"):]] - else: - configurations += arg + configurations += [expand_wild_cards(arg)] comment = "_vs_".join(configurations) if args.comment is not None: From 15e2b52e802142fdcfd175ce58b132b96d2510a8 Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 27 Sep 2018 16:43:28 +0200 Subject: [PATCH 026/169] fixed typo in the config names --- .../build.sbt | 0 .../compile | 0 .../plugins.sbt | 0 .../run | 0 .../build.sbt | 0 .../compile | 0 .../plugins.sbt | 0 .../run | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename confs/{scala-native-0.3.9-SNAPSHOT-full-tlo => scala-native-0.3.9-SNAPSHOT-full-lto}/build.sbt (100%) rename confs/{scala-native-0.3.9-SNAPSHOT-full-tlo => scala-native-0.3.9-SNAPSHOT-full-lto}/compile (100%) rename confs/{scala-native-0.3.9-SNAPSHOT-full-tlo => scala-native-0.3.9-SNAPSHOT-full-lto}/plugins.sbt (100%) rename confs/{scala-native-0.3.9-SNAPSHOT-full-tlo => scala-native-0.3.9-SNAPSHOT-full-lto}/run (100%) rename confs/{scala-native-0.3.9-SNAPSHOT-thin-tlo => scala-native-0.3.9-SNAPSHOT-thin-lto}/build.sbt (100%) rename confs/{scala-native-0.3.9-SNAPSHOT-thin-tlo => scala-native-0.3.9-SNAPSHOT-thin-lto}/compile (100%) rename confs/{scala-native-0.3.9-SNAPSHOT-thin-tlo => scala-native-0.3.9-SNAPSHOT-thin-lto}/plugins.sbt (100%) rename confs/{scala-native-0.3.9-SNAPSHOT-thin-tlo => scala-native-0.3.9-SNAPSHOT-thin-lto}/run (100%) diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/build.sbt similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-full-tlo/build.sbt rename to confs/scala-native-0.3.9-SNAPSHOT-full-lto/build.sbt diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/compile b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/compile similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-full-tlo/compile rename to confs/scala-native-0.3.9-SNAPSHOT-full-lto/compile diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/plugins.sbt similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-full-tlo/plugins.sbt rename to confs/scala-native-0.3.9-SNAPSHOT-full-lto/plugins.sbt diff --git a/confs/scala-native-0.3.9-SNAPSHOT-full-tlo/run b/confs/scala-native-0.3.9-SNAPSHOT-full-lto/run similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-full-tlo/run rename to confs/scala-native-0.3.9-SNAPSHOT-full-lto/run diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-thin-lto/build.sbt similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/build.sbt rename to confs/scala-native-0.3.9-SNAPSHOT-thin-lto/build.sbt diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/compile b/confs/scala-native-0.3.9-SNAPSHOT-thin-lto/compile similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/compile rename to confs/scala-native-0.3.9-SNAPSHOT-thin-lto/compile diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-thin-lto/plugins.sbt similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/plugins.sbt rename to confs/scala-native-0.3.9-SNAPSHOT-thin-lto/plugins.sbt diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/run b/confs/scala-native-0.3.9-SNAPSHOT-thin-lto/run similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-thin-tlo/run rename to confs/scala-native-0.3.9-SNAPSHOT-thin-lto/run From dbeaadd51a74ab54a467ca81510e12847ca2e612 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 30 Sep 2018 14:20:17 +0200 Subject: [PATCH 027/169] extra arguments --- scripts/run.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 3a4dea8..99fb0a0 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -89,8 +89,9 @@ def compile(bench, compilecmd): for g in graalvm: all_configs.remove(g) -runs = 20 -batches = 3000 +default_runs = 20 +default_batches = 3000 +default_par = 1 batch_size = 1 @@ -116,10 +117,27 @@ def generate_choices(direct_choices): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--suffix", help="suffix added to results") + parser.add_argument("--runs", help="number of runs", type=int, default=default_runs) + parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) + parser.add_argument("--par", help="number of parallel processes per run", type=int, default=default_par) parser.add_argument("set", nargs='*', choices=generate_choices(configurations) + ["baseline", "all"], default="all") args = parser.parse_args() + runs = args.runs + batches = args.batches + par = args.par + + suffix = "" + if runs != default_runs: + suffix += "-r" + runs + if batches != default_batches: + suffix += "-b" + batches + if par != default_par: + suffix += "-p" + par + if args.suffix is not None: + suffix += "_" + args.suffix + if args.set != "all": configurations = [] for choice in args.set: @@ -152,11 +170,9 @@ def generate_choices(direct_choices): os.remove('project/plugins.sbt') compile(bench, compilecmd) - suffix = "" - if args.suffix is not None: - suffix = "_" + args.suffix - resultsdir = os.path.join('results', conf + suffix, bench) + + resultsdir = os.path.join('results', conf + suffix, bench) mkdir(resultsdir) for n in xrange(runs): From 81b28ee3e6f5c0894a7be380bb8577a2a8a927e2 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 30 Sep 2018 14:40:47 +0200 Subject: [PATCH 028/169] handling failures --- scripts/run.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 99fb0a0..6a9cd95 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -149,6 +149,8 @@ def generate_choices(direct_choices): else: configurations = all_configs + failed = [] + for conf in configurations: for bench in benchmarks: print('--- conf: {}, bench: {}'.format(conf, bench)) @@ -171,7 +173,6 @@ def generate_choices(direct_choices): compile(bench, compilecmd) - resultsdir = os.path.join('results', conf + suffix, bench) mkdir(resultsdir) @@ -181,6 +182,19 @@ def generate_choices(direct_choices): cmd = [] cmd.extend(runcmd) cmd.extend([str(batches), str(batch_size), input, output]) - out = run(cmd) - with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: - resultfile.write(out) + try: + out = run(cmd) + with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: + resultfile.write(out) + except subp.CalledProcessError as err: + out = err.output + print "Failure!" + print out + with open(os.path.join(resultsdir, str(n) + ".failed"), 'w+') as failfile: + failfile.write(out) + failed += [dict(conf=conf, bench=bench, run=n)] + if len(failed) > 0: + print("{} benchmarks failed ".format(len(failed))) + for fail in failed: + print fail + exit(1) \ No newline at end of file From aef6e1e85afc4de8ecb6b8cfd545945305396f5d Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 30 Sep 2018 14:48:20 +0200 Subject: [PATCH 029/169] added missing string conversions --- scripts/run.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 6a9cd95..e9a548e 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -130,11 +130,11 @@ def generate_choices(direct_choices): suffix = "" if runs != default_runs: - suffix += "-r" + runs + suffix += "-r" + str(runs) if batches != default_batches: - suffix += "-b" + batches + suffix += "-b" + str(batches) if par != default_par: - suffix += "-p" + par + suffix += "-p" + str(par) if args.suffix is not None: suffix += "_" + args.suffix From fa281a0235c4b1bbbe3f7f82325489e1909413af Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 30 Sep 2018 15:30:37 +0200 Subject: [PATCH 030/169] added parrallel execution --- scripts/run.py | 61 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index e9a548e..3721c19 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -5,6 +5,7 @@ import subprocess as subp import shutil as sh import argparse +import multiprocessing as mp def mkdir(path): @@ -114,12 +115,35 @@ def generate_choices(direct_choices): return results +def single_run(to_run): + n = to_run["n"] + runs = to_run["runs"] + cmd = to_run["cmd"] + resultsdir = to_run["resultsdir"] + conf = to_run["conf"] + bench = to_run["bench"] + + print('--- run {}/{}'.format(n, runs)) + try: + out = run(cmd) + with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: + resultfile.write(out) + return [] + except subp.CalledProcessError as err: + out = err.output + print "Failure!" + print out + with open(os.path.join(resultsdir, str(n) + ".failed"), 'w+') as failfile: + failfile.write(out) + return [dict(conf=conf, bench=bench, run=n)] + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--suffix", help="suffix added to results") parser.add_argument("--runs", help="number of runs", type=int, default=default_runs) parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) - parser.add_argument("--par", help="number of parallel processes per run", type=int, default=default_par) + parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) parser.add_argument("set", nargs='*', choices=generate_choices(configurations) + ["baseline", "all"], default="all") args = parser.parse_args() @@ -150,6 +174,9 @@ def generate_choices(direct_choices): configurations = all_configs failed = [] + pool = None + if par > 1: + pool = mp.Pool(par) for conf in configurations: for bench in benchmarks: @@ -176,25 +203,23 @@ def generate_choices(direct_choices): resultsdir = os.path.join('results', conf + suffix, bench) mkdir(resultsdir) + cmd = [] + cmd.extend(runcmd) + cmd.extend([str(batches), str(batch_size), input, output]) + + to_run = [] for n in xrange(runs): - print('--- run {}/{}'.format(n, runs)) - - cmd = [] - cmd.extend(runcmd) - cmd.extend([str(batches), str(batch_size), input, output]) - try: - out = run(cmd) - with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: - resultfile.write(out) - except subp.CalledProcessError as err: - out = err.output - print "Failure!" - print out - with open(os.path.join(resultsdir, str(n) + ".failed"), 'w+') as failfile: - failfile.write(out) - failed += [dict(conf=conf, bench=bench, run=n)] + to_run += [dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n)] + + + if par == 1: + for tr in to_run: + failed += single_run(tr) + else: + sum(pool.map(single_run, to_run),[]) + if len(failed) > 0: print("{} benchmarks failed ".format(len(failed))) for fail in failed: print fail - exit(1) \ No newline at end of file + exit(1) From 7e18ca51d11c101094f5e5dd82aa68e66555eef9 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 30 Sep 2018 16:06:08 +0200 Subject: [PATCH 031/169] display the failed statistics for par as well --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 3721c19..865c33f 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -216,7 +216,7 @@ def single_run(to_run): for tr in to_run: failed += single_run(tr) else: - sum(pool.map(single_run, to_run),[]) + failed += sum(pool.map(single_run, to_run),[]) if len(failed) > 0: print("{} benchmarks failed ".format(len(failed))) From 08828b1e778ce608e2b9222dd460cfc276bdc9bc Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 30 Sep 2018 16:06:55 +0200 Subject: [PATCH 032/169] removed unused import --- scripts/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 865c33f..100c836 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -1,5 +1,4 @@ #!/usr/bin/env python2 -import sys import os import errno import subprocess as subp From e1c34643c2bb26d0db8bf475c3fbf0f607c21b95 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 30 Sep 2018 18:23:07 +0200 Subject: [PATCH 033/169] scan for all successful runs --- scripts/summary.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index ce61859..bcaf6ec 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1,5 +1,5 @@ #!/usr/bin/env python2 -from run import benchmarks, runs, mkdir, expand_wild_cards, generate_choices +from run import benchmarks, mkdir, expand_wild_cards, generate_choices import numpy as np import time @@ -11,8 +11,14 @@ def config_data(bench, conf): + files = next(os.walk("results/{}/{}".format(conf, bench)))[2] + runs = [] + for file in files: + if not file.endswith(".fail"): + runs += [file] + out = [] - for run in xrange(runs): + for run in runs: try: points = [] with open('results/{}/{}/{}'.format(conf, bench, run)) as data: @@ -186,7 +192,7 @@ def write_md_file(rootdir, md_file, configurations): parser = argparse.ArgumentParser() parser.add_argument("--comment", help="comment at the suffix of the report name") - parser.add_argument("comparisons", nargs='*', choices= results + ["all"], + parser.add_argument("comparisons", nargs='*', choices=results + ["all"], default="all") args = parser.parse_args() @@ -207,4 +213,4 @@ def write_md_file(rootdir, md_file, configurations): with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: write_md_file(report_dir, md_file, configurations) - print report_dir \ No newline at end of file + print report_dir From 5af8f25e2ac452b4b6a9606ab63dfb11aaf89a5d Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 30 Sep 2018 21:51:55 +0200 Subject: [PATCH 034/169] also collect stderr --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 100c836..f22042f 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -37,7 +37,7 @@ def where(cmd): def run(cmd): print(">>> " + str(cmd)) - return subp.check_output(cmd) + return subp.check_output(cmd, stderr=subp.STDOUT) def compile(bench, compilecmd): From eb30c1e1a3ac6248855d5b30e81a541e5ec2b486 Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 2 Oct 2018 11:41:18 +0200 Subject: [PATCH 035/169] readme --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 053e3b9..19a79d5 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,15 @@ Compare the lastest `stable` relesea vs `latest` snapshot REPORT=$(scripts/summary.py stable latest) ``` +### Specifying number of runs, batches, tests run in parallel +```bash +scripts/run.py --par 4 --runs 50 --batches 3000 stable +# 50 runs (4 in parallel) each with 3000 batches for the stable release. +``` + +These settings will impact accuracy, this is why the names of the results folders will include the settings, in this case `scala-native-0.3.8-r50-p40-b3000`. +Note that you can also use `stable-r50-p40-b3000` when using the `summary.py`. + ### Comparing an experimental feature with latest from master 1. build `scala-native` from latest master 2. run the benchmark for it From 6e7bf4a9905e5aa08af377e08b3754444a8e710f Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 3 Oct 2018 11:54:32 +0200 Subject: [PATCH 036/169] handle missing data --- scripts/summary.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index bcaf6ec..2c38bd5 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -11,7 +11,7 @@ def config_data(bench, conf): - files = next(os.walk("results/{}/{}".format(conf, bench)))[2] + files = next(os.walk("results/{}/{}".format(conf, bench)), [[],[],[]])[2] runs = [] for file in files: if not file.endswith(".fail"): @@ -100,9 +100,10 @@ def percentiles_chart(plt, configurations, bench, limit=99): plt.cla() for conf in configurations: data = config_data(bench, conf) - percentiles = np.arange(0, limit) - percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) - plt.plot(percentiles, percvalue, label=conf) + if data.size > 0: + percentiles = np.arange(0, limit) + percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) + plt.plot(percentiles, percvalue, label=conf) plt.legend() plt.title(bench) plt.ylim(ymin=0) From bf6afb1c7a516755c146103998b2bb722c854b14 Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 4 Oct 2018 23:25:02 +0200 Subject: [PATCH 037/169] just add the environment for GC logging to all --- scripts/run.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index f22042f..da574fc 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -35,9 +35,9 @@ def where(cmd): return None -def run(cmd): +def run(cmd, env = None): print(">>> " + str(cmd)) - return subp.check_output(cmd, stderr=subp.STDOUT) + return subp.check_output(cmd, stderr=subp.STDOUT, env = env) def compile(bench, compilecmd): @@ -123,8 +123,10 @@ def single_run(to_run): bench = to_run["bench"] print('--- run {}/{}'.format(n, runs)) + my_env = os.environ.copy() + my_env["SCALANATIVE_GC_STATS_FILE"] = os.path.join(resultsdir, str(n) + ".gc.csv") try: - out = run(cmd) + out = run(cmd, my_env) with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: resultfile.write(out) return [] From c9ebac5005d4bedcc0975842c44ccd27375e7d8a Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 5 Oct 2018 10:59:02 +0200 Subject: [PATCH 038/169] read gc data --- scripts/summary.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 2c38bd5..3a2f3c6 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -14,7 +14,8 @@ def config_data(bench, conf): files = next(os.walk("results/{}/{}".format(conf, bench)), [[],[],[]])[2] runs = [] for file in files: - if not file.endswith(".fail"): + if "." not in file: + # regular benchmark data runs += [file] out = [] @@ -23,6 +24,7 @@ def config_data(bench, conf): points = [] with open('results/{}/{}/{}'.format(conf, bench, run)) as data: for line in data.readlines(): + #in ms points.append(float(line) / 1000000) # take only last 1000 to account for startup out += points[-1000:] @@ -30,6 +32,36 @@ def config_data(bench, conf): pass return np.array(out) +def gc_stats(bench, conf): + files = next(os.walk("results/{}/{}".format(conf, bench)), [[],[],[]])[2] + runs = [] + for file in files: + if file.endswith(".gc.csv"): + # gc stats data + runs += [file] + + timestamps = [], mark_times = [], sweep_times = [], gc_times = [] + for run in runs: + try: + with open('results/{}/{}/{}'.format(conf, bench, run)) as data: + #skip header + #timestamp_us,collection,mark_time_us,sweep_time_us + data.readline() + for line in data.readlines(): + arr = line.split(",") + timestamps.append(int(arr[0])) + # collection = arr[1] + # in ms + mark_time = float(arr[2])/ 1000 + mark_times.append(mark_time) + sweep_time = float(arr[3])/ 1000 + sweep_times.append(sweep_time) + gc_times.append(mark_time + sweep_time) + except IOError: + pass + return np.array(timestamps), np.array(mark_times), np.array(sweep_times), np.array(gc_times) + + def percentile(configurations, percentile): out = [] From ecb3a3e43528a9ba6d2e53316ad97e1d0abbecf7 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 5 Oct 2018 10:59:56 +0200 Subject: [PATCH 039/169] all is actually measured in ms not s --- scripts/summary.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 3a2f3c6..67aab13 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -122,7 +122,7 @@ def example_run_plot(plt, configurations, bench, run=3): plt.plot(ind, points, label=conf) plt.title("{} run #{}".format(bench, str(run))) plt.xlabel("Iteration") - plt.ylabel("Run time (s)") + plt.ylabel("Run time (ms)") plt.legend() return plt @@ -140,7 +140,7 @@ def percentiles_chart(plt, configurations, bench, limit=99): plt.title(bench) plt.ylim(ymin=0) plt.xlabel("Percentile") - plt.ylabel("Run time (s)") + plt.ylabel("Run time (ms)") return plt @@ -193,7 +193,7 @@ def benchmark_short_name(bench): def write_md_file(rootdir, md_file, configurations): md_file.write("# Summary\n") for p in [50, 90, 99]: - md_file.write("## Benchmark run time (s) at {} percentile \n".format(p)) + md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) chart_name = "relative_percentile_" + str(p) + ".png" bar_chart_relative(plt, configurations, p).savefig(rootdir + chart_name) From 4a7ff6cc79ff5f43c1f4b11badbbb419f79d4047 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 5 Oct 2018 11:39:47 +0200 Subject: [PATCH 040/169] fixed some bugs --- scripts/summary.py | 57 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 67aab13..e11fef8 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -32,6 +32,7 @@ def config_data(bench, conf): pass return np.array(out) + def gc_stats(bench, conf): files = next(os.walk("results/{}/{}".format(conf, bench)), [[],[],[]])[2] runs = [] @@ -40,7 +41,10 @@ def gc_stats(bench, conf): # gc stats data runs += [file] - timestamps = [], mark_times = [], sweep_times = [], gc_times = [] + timestamps = [] + mark_times = [] + sweep_times = [] + gc_times = [] for run in runs: try: with open('results/{}/{}/{}'.format(conf, bench, run)) as data: @@ -106,6 +110,44 @@ def bar_chart_relative(plt, configurations, percentile): return plt +def bar_chart_gc_relative(plt, configurations, percentile): + plt.clf() + plt.cla() + ind = np.arange(len(benchmarks)) + conf_count = len(configurations) + 1 + base = [] + ref = [] + mark_ref = [] + for bench in benchmarks: + try: + _, mark, _, total = gc_stats(bench, configurations[0]) + base.append(np.percentile(total, percentile)) + ref.append(1.0) + mark_ref.append(np.percentile(mark/total, percentile)) + except IndexError: + base.append(0) + ref.append(0.0) + plt.bar(ind * conf_count, ref, label=configurations[0] + "-sweep") # total (look like sweep) + plt.bar(ind * conf_count, mark_ref, label=configurations[0] + "-mark") # mark time + + for i, conf in enumerate(configurations[1:]): + res = [] + mark_res = [] + for bench, base_val in zip(benchmarks, base): + try: + _, mark, _, total = gc_stats(bench, configurations[0]) + res.append(np.percentile(total, percentile) / base_val) + mark_res.append(np.percentile(mark, percentile) / base_val) + except IndexError: + res.append(0) + plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) + plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) + plt.title("Relative gc times against " + configurations[0] + " at " + str(percentile) + " percentile") + plt.legend() + return plt + + def example_run_plot(plt, configurations, bench, run=3): plt.clf() plt.cla() @@ -190,17 +232,20 @@ def benchmark_short_name(bench): return bench.split(".")[0] +def chart_md(md_file, plt, rootdir, name): + plt.savefig(rootdir + name) + md_file.write("![Chart]({})\n\n".format(name)) + + def write_md_file(rootdir, md_file, configurations): md_file.write("# Summary\n") for p in [50, 90, 99]: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) - chart_name = "relative_percentile_" + str(p) + ".png" - bar_chart_relative(plt, configurations, p).savefig(rootdir + chart_name) - - md_file.write("![Chart]({})\n\n".format(chart_name)) - + chart_md(md_file, bar_chart_relative(plt, configurations, p), rootdir, "relative_percentile_" + str(p) + ".png") write_md_table(md_file, configurations, percentile(configurations, p)) + chart_md(md_file, bar_chart_gc_relative(plt, configurations, p), rootdir, "relative_gc_percentile_" + str(p) + ".png") + md_file.write("# Individual benchmarks\n") for bench in benchmarks: md_file.write("## ") From d045567f52f99064f9811c45fcbece52443a6ff3 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 5 Oct 2018 11:40:06 +0200 Subject: [PATCH 041/169] removed unused imports --- scripts/summary.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index e11fef8..2ca09ca 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -3,8 +3,6 @@ import numpy as np import time -import sys -import matplotlib import matplotlib.pyplot as plt import os import argparse From 4d599b351f21d0a476e377aa884bd937ae1f6a33 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 5 Oct 2018 12:26:31 +0200 Subject: [PATCH 042/169] some gc plot --- scripts/summary.py | 85 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 12 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 2ca09ca..cbb4572 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -51,9 +51,9 @@ def gc_stats(bench, conf): data.readline() for line in data.readlines(): arr = line.split(",") - timestamps.append(int(arr[0])) - # collection = arr[1] # in ms + timestamps.append(int(arr[0])/ 1000) + # collection = arr[1] mark_time = float(arr[2])/ 1000 mark_times.append(mark_time) sweep_time = float(arr[3])/ 1000 @@ -64,6 +64,29 @@ def gc_stats(bench, conf): return np.array(timestamps), np.array(mark_times), np.array(sweep_times), np.array(gc_times) +def percentile_gc(configurations, percentile): + out_mark = [] + out_sweep = [] + out_total = [] + for bench in benchmarks: + res_mark = [] + res_sweep = [] + res_total = [] + for conf in configurations: + try: + _, mark, sweep, total = gc_stats(bench, conf) + res_mark.append(np.percentile(mark, percentile)) + res_sweep.append(np.percentile(sweep, percentile)) + res_total.append(np.percentile(total, percentile)) + except IndexError: + res_mark.append(0) + res_sweep.append(0) + res_total.append(0) + out_mark.append(res_mark) + out_sweep.append(res_sweep) + out_total.append(res_total) + return out_mark, out_sweep, out_total + def percentile(configurations, percentile): out = [] @@ -167,6 +190,24 @@ def example_run_plot(plt, configurations, bench, run=3): return plt +def example_gc_plot(plt, configurations, bench, run=3): + plt.clf() + plt.cla() + + for conf in configurations: + timestamps, mark, sweep, total = gc_stats(conf,bench) + if len(timestamps) > 0: + ind = timestamps - timestamps[0] + plt.plot(ind, mark, label=conf + "-mark") + plt.plot(ind, sweep, label=conf + "-sweep") + plt.plot(ind, total, label=conf + "-total") + plt.title("{} run #{} garbage collections".format(bench, str(run))) + plt.xlabel("Time since first GC (ms)") + plt.ylabel("Run time (ms)") + plt.legend() + return plt + + def percentiles_chart(plt, configurations, bench, limit=99): plt.clf() plt.cla() @@ -216,6 +257,30 @@ def write_md_table(file, configurations, data): file.write('|\n') +def write_md_table_gc(file, configurations, mark_data, sweep_data, total_data): + header = ['name', ""] + header.append(configurations[0]) + for conf in configurations[1:]: + header.append(conf) + header.append("") + file.write('|') + file.write(' | '.join(header)) + file.write('|\n') + + file.write('|') + for _ in header: + file.write(' -- |') + file.write('\n') + + for bench, mark_res0, sweep_res0, total_res0 in zip(benchmarks, mark_data, sweep_data, total_data): + for name, res0 in zip(["mark", "sweep", "total"], [mark_res0, sweep_res0, total_res0]): + base = res0[0] + res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) + file.write('|') + file.write('|'.join([benchmark_md_link(bench)] + list([name]) + list(res))) + file.write('|\n') + + def cell(x, base): percent_diff = (float(x) / base - 1) * 100 return [("%.4f" % x), @@ -242,7 +307,10 @@ def write_md_file(rootdir, md_file, configurations): chart_md(md_file, bar_chart_relative(plt, configurations, p), rootdir, "relative_percentile_" + str(p) + ".png") write_md_table(md_file, configurations, percentile(configurations, p)) + md_file.write("## GC time (ms) at {} percentile \n".format(p)) chart_md(md_file, bar_chart_gc_relative(plt, configurations, p), rootdir, "relative_gc_percentile_" + str(p) + ".png") + mark, sweep, total = percentile_gc(configurations, p) + write_md_table_gc(md_file, configurations, mark, sweep, total) md_file.write("# Individual benchmarks\n") for bench in benchmarks: @@ -250,16 +318,9 @@ def write_md_file(rootdir, md_file, configurations): md_file.write(bench) md_file.write("\n") - chart_name = "percentile_" + bench + ".png" - chart_file = rootdir + chart_name - percentiles_chart(plt, configurations, bench).savefig(chart_file) - - md_file.write("![Chart]({})\n".format(chart_name)) - - chart_name = "example_run_3_" + bench + ".png" - chart_file = rootdir + chart_name - example_run_plot(plt, configurations, bench).savefig(chart_file) - md_file.write("![Chart]({})\n".format(chart_name)) + chart_md(md_file, percentiles_chart(plt, configurations, bench), rootdir, "percentile_" + bench + ".png") + chart_md(md_file, example_run_plot(plt, configurations, bench), rootdir, "example_run_3_" + bench + ".png") + chart_md(md_file, example_gc_plot(plt, configurations, bench), rootdir, "example_gc_run_3_" + bench + ".png") if __name__ == '__main__': From dd980461ecf7b8bad645796c628da3307a2115b3 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 5 Oct 2018 12:52:18 +0200 Subject: [PATCH 043/169] fixed --- scripts/summary.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index cbb4572..6872260 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -195,9 +195,9 @@ def example_gc_plot(plt, configurations, bench, run=3): plt.cla() for conf in configurations: - timestamps, mark, sweep, total = gc_stats(conf,bench) + timestamps, mark, sweep, total = gc_stats(bench, conf) if len(timestamps) > 0: - ind = timestamps - timestamps[0] + ind = np.array(map(lambda x: x - timestamps[0], timestamps)) plt.plot(ind, mark, label=conf + "-mark") plt.plot(ind, sweep, label=conf + "-sweep") plt.plot(ind, total, label=conf + "-total") @@ -276,8 +276,14 @@ def write_md_table_gc(file, configurations, mark_data, sweep_data, total_data): for name, res0 in zip(["mark", "sweep", "total"], [mark_res0, sweep_res0, total_res0]): base = res0[0] res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) + + if name == "mark": + link = [benchmark_md_link(bench)] + else: + link = [] + file.write('|') - file.write('|'.join([benchmark_md_link(bench)] + list([name]) + list(res))) + file.write('|'.join(link + list([name]) + list(res))) file.write('|\n') From 635dc05238c07619690e17c5a7efe79d2a630bae Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 5 Oct 2018 13:02:48 +0200 Subject: [PATCH 044/169] gc_pause chart --- scripts/summary.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 6872260..569f133 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -9,7 +9,7 @@ def config_data(bench, conf): - files = next(os.walk("results/{}/{}".format(conf, bench)), [[],[],[]])[2] + files = next(os.walk("results/{}/{}".format(conf, bench)), [[], [], []])[2] runs = [] for file in files: if "." not in file: @@ -22,7 +22,7 @@ def config_data(bench, conf): points = [] with open('results/{}/{}/{}'.format(conf, bench, run)) as data: for line in data.readlines(): - #in ms + # in ms points.append(float(line) / 1000000) # take only last 1000 to account for startup out += points[-1000:] @@ -32,7 +32,7 @@ def config_data(bench, conf): def gc_stats(bench, conf): - files = next(os.walk("results/{}/{}".format(conf, bench)), [[],[],[]])[2] + files = next(os.walk("results/{}/{}".format(conf, bench)), [[], [], []])[2] runs = [] for file in files: if file.endswith(".gc.csv"): @@ -46,17 +46,17 @@ def gc_stats(bench, conf): for run in runs: try: with open('results/{}/{}/{}'.format(conf, bench, run)) as data: - #skip header - #timestamp_us,collection,mark_time_us,sweep_time_us + # skip header + # timestamp_us,collection,mark_time_us,sweep_time_us data.readline() for line in data.readlines(): arr = line.split(",") # in ms - timestamps.append(int(arr[0])/ 1000) + timestamps.append(int(arr[0]) / 1000) # collection = arr[1] - mark_time = float(arr[2])/ 1000 + mark_time = float(arr[2]) / 1000 mark_times.append(mark_time) - sweep_time = float(arr[3])/ 1000 + sweep_time = float(arr[3]) / 1000 sweep_times.append(sweep_time) gc_times.append(mark_time + sweep_time) except IOError: @@ -144,7 +144,7 @@ def bar_chart_gc_relative(plt, configurations, percentile): _, mark, _, total = gc_stats(bench, configurations[0]) base.append(np.percentile(total, percentile)) ref.append(1.0) - mark_ref.append(np.percentile(mark/total, percentile)) + mark_ref.append(np.percentile(mark / total, percentile)) except IndexError: base.append(0) ref.append(0.0) @@ -225,6 +225,23 @@ def percentiles_chart(plt, configurations, bench, limit=99): return plt +def gc_pause_time_chart(plt, configurations, bench, limit=100): + plt.clf() + plt.cla() + for conf in configurations: + _, _, _, pauses = gc_stats(bench, conf) + if pauses.size > 0: + percentiles = np.arange(0, limit) + percvalue = np.array([np.percentile(pauses, perc) for perc in percentiles]) + plt.plot(percentiles, percvalue, label=conf) + plt.legend() + plt.title(bench + ": Garbage Collector Pause Times") + plt.ylim(ymin=0) + plt.xlabel("Percentile") + plt.ylabel("GC pause time (ms)") + return plt + + def print_table(configurations, data): leading = ['name'] for conf in configurations: @@ -314,7 +331,8 @@ def write_md_file(rootdir, md_file, configurations): write_md_table(md_file, configurations, percentile(configurations, p)) md_file.write("## GC time (ms) at {} percentile \n".format(p)) - chart_md(md_file, bar_chart_gc_relative(plt, configurations, p), rootdir, "relative_gc_percentile_" + str(p) + ".png") + chart_md(md_file, bar_chart_gc_relative(plt, configurations, p), rootdir, + "relative_gc_percentile_" + str(p) + ".png") mark, sweep, total = percentile_gc(configurations, p) write_md_table_gc(md_file, configurations, mark, sweep, total) @@ -325,6 +343,7 @@ def write_md_file(rootdir, md_file, configurations): md_file.write("\n") chart_md(md_file, percentiles_chart(plt, configurations, bench), rootdir, "percentile_" + bench + ".png") + chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") chart_md(md_file, example_run_plot(plt, configurations, bench), rootdir, "example_run_3_" + bench + ".png") chart_md(md_file, example_gc_plot(plt, configurations, bench), rootdir, "example_gc_run_3_" + bench + ".png") From 6ce9012f2eec5443dd50822a2bdf947d101bf44a Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 5 Oct 2018 13:05:28 +0200 Subject: [PATCH 045/169] fixed bad offset --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 569f133..286d845 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -297,7 +297,7 @@ def write_md_table_gc(file, configurations, mark_data, sweep_data, total_data): if name == "mark": link = [benchmark_md_link(bench)] else: - link = [] + link = [""] file.write('|') file.write('|'.join(link + list([name]) + list(res))) From 4eefb29c12a5cb8f8640a398eb0e407c4c7b5184 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 5 Oct 2018 13:28:10 +0200 Subject: [PATCH 046/169] option to disable gc charts --- scripts/summary.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 286d845..14c0933 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -323,18 +323,19 @@ def chart_md(md_file, plt, rootdir, name): md_file.write("![Chart]({})\n\n".format(name)) -def write_md_file(rootdir, md_file, configurations): +def write_md_file(rootdir, md_file, configurations, gc_charts = True): md_file.write("# Summary\n") for p in [50, 90, 99]: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) chart_md(md_file, bar_chart_relative(plt, configurations, p), rootdir, "relative_percentile_" + str(p) + ".png") write_md_table(md_file, configurations, percentile(configurations, p)) - md_file.write("## GC time (ms) at {} percentile \n".format(p)) - chart_md(md_file, bar_chart_gc_relative(plt, configurations, p), rootdir, - "relative_gc_percentile_" + str(p) + ".png") - mark, sweep, total = percentile_gc(configurations, p) - write_md_table_gc(md_file, configurations, mark, sweep, total) + if gc_charts: + md_file.write("## GC time (ms) at {} percentile \n".format(p)) + chart_md(md_file, bar_chart_gc_relative(plt, configurations, p), rootdir, + "relative_gc_percentile_" + str(p) + ".png") + mark, sweep, total = percentile_gc(configurations, p) + write_md_table_gc(md_file, configurations, mark, sweep, total) md_file.write("# Individual benchmarks\n") for bench in benchmarks: @@ -343,9 +344,11 @@ def write_md_file(rootdir, md_file, configurations): md_file.write("\n") chart_md(md_file, percentiles_chart(plt, configurations, bench), rootdir, "percentile_" + bench + ".png") - chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") + if gc_charts: + chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") chart_md(md_file, example_run_plot(plt, configurations, bench), rootdir, "example_run_3_" + bench + ".png") - chart_md(md_file, example_gc_plot(plt, configurations, bench), rootdir, "example_gc_run_3_" + bench + ".png") + if gc_charts: + chart_md(md_file, example_gc_plot(plt, configurations, bench), rootdir, "example_gc_run_3_" + bench + ".png") if __name__ == '__main__': @@ -354,6 +357,7 @@ def write_md_file(rootdir, md_file, configurations): parser = argparse.ArgumentParser() parser.add_argument("--comment", help="comment at the suffix of the report name") + parser.add_argument("--nogc", help="disable charts about garbage collector", action="store_true") parser.add_argument("comparisons", nargs='*', choices=results + ["all"], default="all") args = parser.parse_args() @@ -373,6 +377,6 @@ def write_md_file(rootdir, md_file, configurations): plt.rcParams["figure.figsize"] = [16.0, 12.0] mkdir(report_dir) with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: - write_md_file(report_dir, md_file, configurations) + write_md_file(report_dir, md_file, configurations, not args.nogc) print report_dir From 07397857c2d0366e38224a159ff018d7f4f115cc Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 6 Oct 2018 12:36:46 +0200 Subject: [PATCH 047/169] made the gc examples less cluttered and fixed the relative gc charts --- scripts/summary.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 14c0933..be2b69c 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -156,7 +156,7 @@ def bar_chart_gc_relative(plt, configurations, percentile): mark_res = [] for bench, base_val in zip(benchmarks, base): try: - _, mark, _, total = gc_stats(bench, configurations[0]) + _, mark, _, total = gc_stats(bench, conf) res.append(np.percentile(total, percentile) / base_val) mark_res.append(np.percentile(mark, percentile) / base_val) except IndexError: @@ -195,12 +195,10 @@ def example_gc_plot(plt, configurations, bench, run=3): plt.cla() for conf in configurations: - timestamps, mark, sweep, total = gc_stats(bench, conf) + timestamps, _, _, total = gc_stats(bench, conf) if len(timestamps) > 0: ind = np.array(map(lambda x: x - timestamps[0], timestamps)) - plt.plot(ind, mark, label=conf + "-mark") - plt.plot(ind, sweep, label=conf + "-sweep") - plt.plot(ind, total, label=conf + "-total") + plt.plot(ind, total, label=conf) plt.title("{} run #{} garbage collections".format(bench, str(run))) plt.xlabel("Time since first GC (ms)") plt.ylabel("Run time (ms)") From 8e2189d651a46db706c7bd1a70bbcdec92a03c85 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 6 Oct 2018 12:53:45 +0200 Subject: [PATCH 048/169] absolute gc bar charts as well --- scripts/summary.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/scripts/summary.py b/scripts/summary.py index be2b69c..304e748 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -169,6 +169,30 @@ def bar_chart_gc_relative(plt, configurations, percentile): return plt +def bar_chart_gc_absolute(plt, configurations, percentile): + plt.clf() + plt.cla() + ind = np.arange(len(benchmarks)) + conf_count = len(configurations) + 1 + + for i, conf in enumerate(configurations): + res = [] + mark_res = [] + for bench in benchmarks: + try: + _, mark, _, total = gc_stats(bench, conf) + res.append(np.percentile(total, percentile)) + mark_res.append(np.percentile(mark, percentile)) + except IndexError: + res.append(0) + plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) + plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) + plt.title("Garbage collector pause times at " + str(percentile) + " percentile") + plt.legend() + return plt + + def example_run_plot(plt, configurations, bench, run=3): plt.clf() plt.cla() From 5c9db366fe0f4846ecbd9c3fc09090e5bca49845 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 12 Oct 2018 10:22:12 +0200 Subject: [PATCH 049/169] handle gc-less benchmarks --- scripts/summary.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 304e748..5f5066f 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -148,6 +148,7 @@ def bar_chart_gc_relative(plt, configurations, percentile): except IndexError: base.append(0) ref.append(0.0) + mark_ref.append(0.0) plt.bar(ind * conf_count, ref, label=configurations[0] + "-sweep") # total (look like sweep) plt.bar(ind * conf_count, mark_ref, label=configurations[0] + "-mark") # mark time @@ -156,11 +157,16 @@ def bar_chart_gc_relative(plt, configurations, percentile): mark_res = [] for bench, base_val in zip(benchmarks, base): try: - _, mark, _, total = gc_stats(bench, conf) - res.append(np.percentile(total, percentile) / base_val) - mark_res.append(np.percentile(mark, percentile) / base_val) + if base_val > 0: + _, mark, _, total = gc_stats(bench, conf) + res.append(np.percentile(total, percentile) / base_val) + mark_res.append(np.percentile(mark, percentile) / base_val) + else: + res.append(0) + mark_res.append(0) except IndexError: res.append(0) + mark_res.append(0) plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) @@ -327,9 +333,12 @@ def write_md_table_gc(file, configurations, mark_data, sweep_data, total_data): def cell(x, base): - percent_diff = (float(x) / base - 1) * 100 - return [("%.4f" % x), - ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__")] + if base > 0: + percent_diff = (float(x) / base - 1) * 100 + precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__") + else: + precent_diff_cell = "N/A" + return [("%.4f" % x), precent_diff_cell] def benchmark_md_link(bench): From 05d9cc582cd5f10a0b02f4b52e767f9c1fd30c7e Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 12 Oct 2018 14:46:23 +0200 Subject: [PATCH 050/169] geometrical mean --- scripts/summary.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/scripts/summary.py b/scripts/summary.py index 5f5066f..1f705d4 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -294,6 +294,7 @@ def write_md_table(file, configurations, data): file.write(' -- |') file.write('\n') + gmul = np.ones(len(configurations)-1) for bench, res0 in zip(benchmarks, data): base = res0[0] res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) @@ -301,6 +302,18 @@ def write_md_table(file, configurations, data): file.write('|'.join([benchmark_md_link(bench)] + list(res))) file.write('|\n') + for i, d0 in enumerate(res0[1:]): + gmul[i] *= (float(d0) / base) + + file.write('| __Geometrical mean:__|') + for gm in gmul: + file.write('| |') + gmean = float(gm) ** (1.0 / len(configurations)) + percent_diff = (gmean - 1) * 100 + precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__") + file.write(precent_diff_cell) + file.write("|\n") + def write_md_table_gc(file, configurations, mark_data, sweep_data, total_data): header = ['name', ""] From fd22f2b8c1d419fd824ea6fdb8f86bcf9ff5b7e4 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 12 Oct 2018 15:25:46 +0200 Subject: [PATCH 051/169] by default: do not collect gc stats, remove old data do not chart the gc stats --- scripts/run.py | 13 +++++++++++-- scripts/summary.py | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index da574fc..25981f1 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -121,10 +121,12 @@ def single_run(to_run): resultsdir = to_run["resultsdir"] conf = to_run["conf"] bench = to_run["bench"] + gcstats = to_run["gcstats"] print('--- run {}/{}'.format(n, runs)) my_env = os.environ.copy() - my_env["SCALANATIVE_GC_STATS_FILE"] = os.path.join(resultsdir, str(n) + ".gc.csv") + if gcstats: + my_env["SCALANATIVE_GC_STATS_FILE"] = os.path.join(resultsdir, str(n) + ".gc.csv") try: out = run(cmd, my_env) with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: @@ -145,6 +147,8 @@ def single_run(to_run): parser.add_argument("--runs", help="number of runs", type=int, default=default_runs) parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) + parser.add_argument("--gc", help="gather gc statistics", action="store_true") + parser.add_argument("--append", help="do not delete old data", action="store_true") parser.add_argument("set", nargs='*', choices=generate_choices(configurations) + ["baseline", "all"], default="all") args = parser.parse_args() @@ -160,6 +164,8 @@ def single_run(to_run): suffix += "-b" + str(batches) if par != default_par: suffix += "-p" + str(par) + if args.gc: + suffix += "-gc" if args.suffix is not None: suffix += "_" + args.suffix @@ -180,6 +186,9 @@ def single_run(to_run): pool = mp.Pool(par) for conf in configurations: + if not args.append: + sh.rmtree(os.path.join('results', conf + suffix), ignore_errors=True) + for bench in benchmarks: print('--- conf: {}, bench: {}'.format(conf, bench)) @@ -210,7 +219,7 @@ def single_run(to_run): to_run = [] for n in xrange(runs): - to_run += [dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n)] + to_run += [dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc)] if par == 1: diff --git a/scripts/summary.py b/scripts/summary.py index 1f705d4..5cd8535 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -401,7 +401,7 @@ def write_md_file(rootdir, md_file, configurations, gc_charts = True): parser = argparse.ArgumentParser() parser.add_argument("--comment", help="comment at the suffix of the report name") - parser.add_argument("--nogc", help="disable charts about garbage collector", action="store_true") + parser.add_argument("--gc", help="enable charts about garbage collector", action="store_true") parser.add_argument("comparisons", nargs='*', choices=results + ["all"], default="all") args = parser.parse_args() @@ -421,6 +421,6 @@ def write_md_file(rootdir, md_file, configurations, gc_charts = True): plt.rcParams["figure.figsize"] = [16.0, 12.0] mkdir(report_dir) with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: - write_md_file(report_dir, md_file, configurations, not args.nogc) + write_md_file(report_dir, md_file, configurations, args.gc) print report_dir From 766d31ace98f7666afc0105424f25d2ecd4a1cbb Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 12 Oct 2018 19:17:35 +0200 Subject: [PATCH 052/169] fixed geometrical mean --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 5cd8535..ad89594 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -308,7 +308,7 @@ def write_md_table(file, configurations, data): file.write('| __Geometrical mean:__|') for gm in gmul: file.write('| |') - gmean = float(gm) ** (1.0 / len(configurations)) + gmean = float(gm) ** (1.0 / len(benchmarks)) percent_diff = (gmean - 1) * 100 precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__") file.write(precent_diff_cell) From 935a69f51ad585ac3c61f6910c2f869194f70b8d Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 16 Oct 2018 20:38:56 +0200 Subject: [PATCH 053/169] make thin-lto default --- .../build.sbt | 2 +- .../compile | 0 .../plugins.sbt | 0 .../run | 0 confs/scala-native-0.3.9-SNAPSHOT/build.sbt | 1 + 5 files changed, 2 insertions(+), 1 deletion(-) rename confs/{scala-native-0.3.9-SNAPSHOT-thin-lto => scala-native-0.3.9-SNAPSHOT-no-lto}/build.sbt (86%) rename confs/{scala-native-0.3.9-SNAPSHOT-thin-lto => scala-native-0.3.9-SNAPSHOT-no-lto}/compile (100%) rename confs/{scala-native-0.3.9-SNAPSHOT-thin-lto => scala-native-0.3.9-SNAPSHOT-no-lto}/plugins.sbt (100%) rename confs/{scala-native-0.3.9-SNAPSHOT-thin-lto => scala-native-0.3.9-SNAPSHOT-no-lto}/run (100%) diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-lto/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/build.sbt similarity index 86% rename from confs/scala-native-0.3.9-SNAPSHOT-thin-lto/build.sbt rename to confs/scala-native-0.3.9-SNAPSHOT-no-lto/build.sbt index ae87f31..2fc1873 100644 --- a/confs/scala-native-0.3.9-SNAPSHOT-thin-lto/build.sbt +++ b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/build.sbt @@ -3,4 +3,4 @@ enablePlugins(ScalaNativePlugin) nativeLinkStubs := true nativeGC := "immix" nativeMode := "release" -nativeLTO := "thin" +nativeLTO := "none" diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-lto/compile b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/compile similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-thin-lto/compile rename to confs/scala-native-0.3.9-SNAPSHOT-no-lto/compile diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-lto/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/plugins.sbt similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-thin-lto/plugins.sbt rename to confs/scala-native-0.3.9-SNAPSHOT-no-lto/plugins.sbt diff --git a/confs/scala-native-0.3.9-SNAPSHOT-thin-lto/run b/confs/scala-native-0.3.9-SNAPSHOT-no-lto/run similarity index 100% rename from confs/scala-native-0.3.9-SNAPSHOT-thin-lto/run rename to confs/scala-native-0.3.9-SNAPSHOT-no-lto/run diff --git a/confs/scala-native-0.3.9-SNAPSHOT/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT/build.sbt index b4a5690..ae87f31 100644 --- a/confs/scala-native-0.3.9-SNAPSHOT/build.sbt +++ b/confs/scala-native-0.3.9-SNAPSHOT/build.sbt @@ -3,3 +3,4 @@ enablePlugins(ScalaNativePlugin) nativeLinkStubs := true nativeGC := "immix" nativeMode := "release" +nativeLTO := "thin" From 1500000c974e6a2b1a213278d50cf356e9b603fc Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 16 Oct 2018 20:40:41 +0200 Subject: [PATCH 054/169] scala-native project building --- README.md | 15 +++++-- scripts/notebook.ipynb | 6 +-- scripts/run.py | 92 +++++++++++++++++++++++++++++------------- 3 files changed, 80 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 19a79d5..e19c159 100644 --- a/README.md +++ b/README.md @@ -50,12 +50,21 @@ Note that you can also use `stable-r50-p40-b3000` when using the `summary.py`. ```bash scripts/run.py latest ``` -3. build `scala-native` from your branch -4. specify a suffix to identify it +3. specify a suffix to identify it ```bash NAME=PR9001-adding-a-kitchen-sink +SHA1=adcd124eee ``` -5. run the benchmark and get the summary report +4. run the benchmark and get the summary report + +It will automatically compile the project in `../scala-native` if you provide the SHA1. +```bash +SHA1=adcd124eee +scripts/run.py --suffix "$NAME" latest@"$SHA1" && +REPORT=$(scripts/summary.py --comment "$NAME" latest latest@"$SHA1"_"$NAME") +``` + +Alternatively you can build the scala-native project from your branch manually and run ```bash scripts/run.py --suffix "$NAME" latest && REPORT=$(scripts/summary.py --comment "$NAME" latest latest_"$NAME") diff --git a/scripts/notebook.ipynb b/scripts/notebook.ipynb index 75c9347..d667762 100644 --- a/scripts/notebook.ipynb +++ b/scripts/notebook.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "collapsed": false, "scrolled": true @@ -11,7 +11,6 @@ { "ename": "ModuleNotFoundError", "evalue": "No module named 'matplotlib'", - "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", @@ -24,7 +23,8 @@ "\u001b[0;32m/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36menable_matplotlib\u001b[0;34m(self, gui)\u001b[0m\n\u001b[1;32m 2933\u001b[0m \"\"\"\n\u001b[1;32m 2934\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mIPython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpylabtools\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2935\u001b[0;31m \u001b[0mgui\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbackend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_gui_and_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgui\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpylab_gui_select\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2936\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2937\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgui\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/lib/python3/dist-packages/IPython/core/pylabtools.py\u001b[0m in \u001b[0;36mfind_gui_and_backend\u001b[0;34m(gui, gui_select)\u001b[0m\n\u001b[1;32m 257\u001b[0m \"\"\"\n\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 259\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgui\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mgui\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'auto'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'" - ] + ], + "output_type": "error" } ], "source": [ diff --git a/scripts/run.py b/scripts/run.py index 25981f1..a2e8a81 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -35,13 +35,37 @@ def where(cmd): return None -def run(cmd, env = None): +def run(cmd, env=None, wd=None): print(">>> " + str(cmd)) - return subp.check_output(cmd, stderr=subp.STDOUT, env = env) + if wd == None: + return subp.check_output(cmd, stderr=subp.STDOUT, env=env) + else: + return subp.check_output(cmd, stderr=subp.STDOUT, env=env, cwd=wd) + + +def compile_scala_native(sha1): + scala_native_dir = "../scala-native" + git_fetch = ['git', '--fetch', '--all'] + try: + run(git_fetch, wd = scala_native_dir) + except: + pass + + git_checkout = ['git', 'checkout', sha1] + print run(git_checkout, wd = scala_native_dir) + + compile_cmd = [sbt, '-no-colors', '-J-Xmx2G', 'clean', 'rebuild', 'sandbox/run'] + compile_env = os.environ.copy() + compile_env["SCALANATIVE_GC"] = "immix" + local_scala_repo_dir = "../scala-2.11.11-only" + if os.path.isdir(local_scala_repo_dir): + compile_env["SCALANATIVE_SCALAREPO"] = local_scala_repo_dir + + run(compile_cmd, compile_env, wd = scala_native_dir) def compile(bench, compilecmd): - cmd = [sbt, '-J-Xmx2G', 'clean'] + cmd = [sbt, '-no-colors', '-J-Xmx2G', 'clean'] cmd.append('set mainClass in Compile := Some("{}")'.format(bench)) cmd.append(compilecmd) return run(cmd) @@ -104,6 +128,14 @@ def expand_wild_cards(arg): return arg +def split_sha1(arg): + parts = arg.split("@") + if len(parts) == 2: + return parts[0], parts[1] + else: + return arg, None + + def generate_choices(direct_choices): results = direct_choices for dir in direct_choices: @@ -149,14 +181,24 @@ def single_run(to_run): parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) parser.add_argument("--gc", help="gather gc statistics", action="store_true") parser.add_argument("--append", help="do not delete old data", action="store_true") - parser.add_argument("set", nargs='*', choices=generate_choices(configurations) + ["baseline", "all"], - default="all") + parser.add_argument("set", nargs='*', default="all") args = parser.parse_args() runs = args.runs batches = args.batches par = args.par + if args.set != "all": + configurations = [] + for choice in args.set: + expanded = expand_wild_cards(choice) + if expanded == "baseline": + configurations += baseline + else: + configurations += [expanded] + else: + configurations = all_configs + suffix = "" if runs != default_runs: suffix += "-r" + str(runs) @@ -169,23 +211,17 @@ def single_run(to_run): if args.suffix is not None: suffix += "_" + args.suffix - if args.set != "all": - configurations = [] - for choice in args.set: - expanded = expand_wild_cards(choice) - if expanded == "baseline": - configurations += baseline - else: - configurations += [expanded] - else: - configurations = all_configs - failed = [] pool = None if par > 1: pool = mp.Pool(par) for conf in configurations: + conf_name, sha1 = split_sha1(conf) + + if sha1 != None: + compile_scala_native(sha1) + if not args.append: sh.rmtree(os.path.join('results', conf + suffix), ignore_errors=True) @@ -194,17 +230,19 @@ def single_run(to_run): input = slurp(os.path.join('input', bench)) output = slurp(os.path.join('output', bench)) - compilecmd = slurp(os.path.join('confs', conf, 'compile')) - runcmd = slurp(os.path.join('confs', conf, 'run')).replace('$BENCH', bench).replace('$HOME', os.environ[ - 'HOME']).split(' ') - - if os.path.exists(os.path.join('confs', conf, 'build.sbt')): - sh.copyfile(os.path.join('confs', conf, 'build.sbt'), 'build.sbt') + compilecmd = slurp(os.path.join('confs', conf_name, 'compile')) + runcmd = slurp(os.path.join('confs', conf_name, 'run')).replace('$BENCH', bench).replace('$HOME', + os.environ[ + 'HOME']).split( + ' ') + + if os.path.exists(os.path.join('confs', conf_name, 'build.sbt')): + sh.copyfile(os.path.join('confs', conf_name, 'build.sbt'), 'build.sbt') else: os.remove('build.sbt') - if os.path.exists(os.path.join('confs', conf, 'plugins.sbt')): - sh.copyfile(os.path.join('confs', conf, 'plugins.sbt'), 'project/plugins.sbt') + if os.path.exists(os.path.join('confs', conf_name, 'plugins.sbt')): + sh.copyfile(os.path.join('confs', conf_name, 'plugins.sbt'), 'project/plugins.sbt') else: os.remove('project/plugins.sbt') @@ -219,14 +257,14 @@ def single_run(to_run): to_run = [] for n in xrange(runs): - to_run += [dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc)] - + to_run += [ + dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc)] if par == 1: for tr in to_run: failed += single_run(tr) else: - failed += sum(pool.map(single_run, to_run),[]) + failed += sum(pool.map(single_run, to_run), []) if len(failed) > 0: print("{} benchmarks failed ".format(len(failed))) From c325f4aba83c28e3daa4329c00dad66c4a7d24f9 Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 16 Oct 2018 21:16:54 +0200 Subject: [PATCH 055/169] build scala native when specify @ref --- README.md | 12 ++++-------- scripts/run.py | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index e19c159..bc560a1 100644 --- a/README.md +++ b/README.md @@ -45,21 +45,17 @@ These settings will impact accuracy, this is why the names of the results folder Note that you can also use `stable-r50-p40-b3000` when using the `summary.py`. ### Comparing an experimental feature with latest from master -1. build `scala-native` from latest master -2. run the benchmark for it -```bash -scripts/run.py latest -``` -3. specify a suffix to identify it +1. specify a suffix to identify it ```bash NAME=PR9001-adding-a-kitchen-sink SHA1=adcd124eee ``` -4. run the benchmark and get the summary report +2. run the benchmark and get the summary report -It will automatically compile the project in `../scala-native` if you provide the SHA1. +It will automatically compile the project in `../scala-native` if you provide the SHA1 or reference. ```bash SHA1=adcd124eee +scripts/run.py latest@master scripts/run.py --suffix "$NAME" latest@"$SHA1" && REPORT=$(scripts/summary.py --comment "$NAME" latest latest@"$SHA1"_"$NAME") ``` diff --git a/scripts/run.py b/scripts/run.py index a2e8a81..95153f6 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -52,16 +52,29 @@ def compile_scala_native(sha1): pass git_checkout = ['git', 'checkout', sha1] - print run(git_checkout, wd = scala_native_dir) + try: + print run(git_checkout, wd = scala_native_dir) + except subp.CalledProcessError as err: + out = err.output + print "Cannot checkout", sha1, "!" + print out + return False - compile_cmd = [sbt, '-no-colors', '-J-Xmx2G', 'clean', 'rebuild', 'sandbox/run'] + compile_cmd = [sbt, '-no-colors', '-J-Xmx2G', 'rebuild', 'sandbox/run'] compile_env = os.environ.copy() compile_env["SCALANATIVE_GC"] = "immix" - local_scala_repo_dir = "../scala-2.11.11-only" + local_scala_repo_dir = os.path.abspath("../scala-2.11.11-only") if os.path.isdir(local_scala_repo_dir): compile_env["SCALANATIVE_SCALAREPO"] = local_scala_repo_dir - run(compile_cmd, compile_env, wd = scala_native_dir) + try: + run(compile_cmd, compile_env, wd = scala_native_dir) + return True + except subp.CalledProcessError as err: + out = err.output + print "Compilation failure!" + print out + return False def compile(bench, compilecmd): @@ -183,6 +196,7 @@ def single_run(to_run): parser.add_argument("--append", help="do not delete old data", action="store_true") parser.add_argument("set", nargs='*', default="all") args = parser.parse_args() + print args runs = args.runs batches = args.batches @@ -190,15 +204,17 @@ def single_run(to_run): if args.set != "all": configurations = [] - for choice in args.set: - expanded = expand_wild_cards(choice) - if expanded == "baseline": - configurations += baseline - else: - configurations += [expanded] + for choice in args.set: + expanded = expand_wild_cards(choice) + if expanded == "baseline": + configurations += baseline + else: + configurations += [expanded] else: configurations = all_configs + print "configurations:", configurations + suffix = "" if runs != default_runs: suffix += "-r" + str(runs) @@ -220,7 +236,9 @@ def single_run(to_run): conf_name, sha1 = split_sha1(conf) if sha1 != None: - compile_scala_native(sha1) + success = compile_scala_native(sha1) + if not success: + continue if not args.append: sh.rmtree(os.path.join('results', conf + suffix), ignore_errors=True) From 9fb6514ea832633272802c0eb2cfa62af1b992ae Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 16 Oct 2018 21:23:57 +0200 Subject: [PATCH 056/169] make a ".complete" file when it is done --- scripts/run.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 95153f6..c382654 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -243,16 +243,18 @@ def single_run(to_run): if not args.append: sh.rmtree(os.path.join('results', conf + suffix), ignore_errors=True) + root_dir = os.path.join('results', conf + suffix) + mkdir(root_dir) + for bench in benchmarks: print('--- conf: {}, bench: {}'.format(conf, bench)) input = slurp(os.path.join('input', bench)) output = slurp(os.path.join('output', bench)) compilecmd = slurp(os.path.join('confs', conf_name, 'compile')) - runcmd = slurp(os.path.join('confs', conf_name, 'run')).replace('$BENCH', bench).replace('$HOME', - os.environ[ - 'HOME']).split( - ' ') + runcmd = slurp(os.path.join('confs', conf_name, 'run')) \ + .replace('$BENCH', bench) \ + .replace('$HOME', os.environ['HOME']).split(' ') if os.path.exists(os.path.join('confs', conf_name, 'build.sbt')): sh.copyfile(os.path.join('confs', conf_name, 'build.sbt'), 'build.sbt') @@ -284,6 +286,10 @@ def single_run(to_run): else: failed += sum(pool.map(single_run, to_run), []) + # mark it as complete + open(os.path.join(root_dir, ".complete"), 'w+').close() + + if len(failed) > 0: print("{} benchmarks failed ".format(len(failed))) for fail in failed: From c54b813701b3b1564a1b5c0d6d4943ec7cf4e36c Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 16 Oct 2018 21:24:06 +0200 Subject: [PATCH 057/169] default --- build.sbt | 5 +++++ project/plugins.sbt | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 9874e27..ae87f31 100644 --- a/build.sbt +++ b/build.sbt @@ -1 +1,6 @@ scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/project/plugins.sbt b/project/plugins.sbt index afc9d5a..c1423b6 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1 +1 @@ -addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.7") +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") From c783d626f7d3d2a063577b6875fdc8094fbacd74 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 17 Oct 2018 15:48:06 +0200 Subject: [PATCH 058/169] handle us and ns GC data --- scripts/summary.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index ad89594..8c047bc 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -30,6 +30,8 @@ def config_data(bench, conf): pass return np.array(out) +usecond_header = "timestamp_us,collection,mark_time_us,sweep_time_us" +nanosecond_header = "timestamp_ns,collection,mark_time_ns,sweep_time_ns" def gc_stats(bench, conf): files = next(os.walk("results/{}/{}".format(conf, bench)), [[], [], []])[2] @@ -45,18 +47,25 @@ def gc_stats(bench, conf): gc_times = [] for run in runs: try: - with open('results/{}/{}/{}'.format(conf, bench, run)) as data: - # skip header - # timestamp_us,collection,mark_time_us,sweep_time_us - data.readline() + file = 'results/{}/{}/{}'.format(conf, bench, run) + with open(file) as data: + # header + header = data.readline() + if header == usecond_header: + div_to_ms = 1000 + elif header == nanosecond_header: + div_to_ms = 1000 * 1000 + else: + print "Unknown GC header", header, "at", file + continue for line in data.readlines(): arr = line.split(",") # in ms - timestamps.append(int(arr[0]) / 1000) + timestamps.append(int(arr[0]) / div_to_ms) # collection = arr[1] - mark_time = float(arr[2]) / 1000 + mark_time = float(arr[2]) / div_to_ms mark_times.append(mark_time) - sweep_time = float(arr[3]) / 1000 + sweep_time = float(arr[3]) / div_to_ms sweep_times.append(sweep_time) gc_times.append(mark_time + sweep_time) except IOError: From f20db417e825e9a4deeae570d0af7b87eebd6503 Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 18 Oct 2018 11:42:19 +0200 Subject: [PATCH 059/169] header includes the newline --- scripts/summary.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 8c047bc..4dce398 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -29,9 +29,8 @@ def config_data(bench, conf): except IOError: pass return np.array(out) - -usecond_header = "timestamp_us,collection,mark_time_us,sweep_time_us" -nanosecond_header = "timestamp_ns,collection,mark_time_ns,sweep_time_ns" +usecond_header = "timestamp_us,collection,mark_time_us,sweep_time_us\n" +nanosecond_header = "timestamp_ns,collection,mark_time_ns,sweep_time_ns\n" def gc_stats(bench, conf): files = next(os.walk("results/{}/{}".format(conf, bench)), [[], [], []])[2] From 6393817193199ad4c72e32cd5877569ebbf309aa Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 19 Oct 2018 09:47:02 +0200 Subject: [PATCH 060/169] remove gc vs time plot --- scripts/summary.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 4dce398..edfcd74 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -228,22 +228,6 @@ def example_run_plot(plt, configurations, bench, run=3): return plt -def example_gc_plot(plt, configurations, bench, run=3): - plt.clf() - plt.cla() - - for conf in configurations: - timestamps, _, _, total = gc_stats(bench, conf) - if len(timestamps) > 0: - ind = np.array(map(lambda x: x - timestamps[0], timestamps)) - plt.plot(ind, total, label=conf) - plt.title("{} run #{} garbage collections".format(bench, str(run))) - plt.xlabel("Time since first GC (ms)") - plt.ylabel("Run time (ms)") - plt.legend() - return plt - - def percentiles_chart(plt, configurations, bench, limit=99): plt.clf() plt.cla() @@ -399,8 +383,6 @@ def write_md_file(rootdir, md_file, configurations, gc_charts = True): if gc_charts: chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") chart_md(md_file, example_run_plot(plt, configurations, bench), rootdir, "example_run_3_" + bench + ".png") - if gc_charts: - chart_md(md_file, example_gc_plot(plt, configurations, bench), rootdir, "example_gc_run_3_" + bench + ".png") if __name__ == '__main__': From 151f901f22893051dc1d4c57ec9cbb3ef5059ac3 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 19 Oct 2018 10:15:07 +0200 Subject: [PATCH 061/169] analise the header to handle possible format changes and legacy data --- scripts/summary.py | 60 ++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index edfcd74..1c91f31 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -29,8 +29,7 @@ def config_data(bench, conf): except IOError: pass return np.array(out) -usecond_header = "timestamp_us,collection,mark_time_us,sweep_time_us\n" -nanosecond_header = "timestamp_ns,collection,mark_time_ns,sweep_time_ns\n" + def gc_stats(bench, conf): files = next(os.walk("results/{}/{}".format(conf, bench)), [[], [], []])[2] @@ -40,7 +39,6 @@ def gc_stats(bench, conf): # gc stats data runs += [file] - timestamps = [] mark_times = [] sweep_times = [] gc_times = [] @@ -48,28 +46,48 @@ def gc_stats(bench, conf): try: file = 'results/{}/{}/{}'.format(conf, bench, run) with open(file) as data: - # header - header = data.readline() - if header == usecond_header: - div_to_ms = 1000 - elif header == nanosecond_header: - div_to_ms = 1000 * 1000 - else: - print "Unknown GC header", header, "at", file + # analise header + mark_index = -1 + sweep_index = -1 + mark_to_ms = 0 + sweep_to_ms = 0 + + unit2div = dict(ms = 1, us = 1000, ns = 1000 * 1000) + + header = data.readline().strip() + for i, h in enumerate(header.split(',')): + arr = h.rsplit('_', 1) + if len(arr) != 2: + continue + prefix = arr[0] + unit = arr[1] + + if prefix == "mark_time": + mark_index = i + mark_to_ms = unit2div[unit] + elif prefix == "sweep_time": + sweep_index = i + sweep_to_ms = unit2div[unit] + + + if mark_index == -1: + print "Header does not have mark_time_", header, "at", file + if sweep_index == -1: + print "Header does not have sweep_time_", header, "at", file + if mark_index == -1 or sweep_index == -1: continue + for line in data.readlines(): arr = line.split(",") # in ms - timestamps.append(int(arr[0]) / div_to_ms) - # collection = arr[1] - mark_time = float(arr[2]) / div_to_ms + mark_time = float(arr[mark_index]) / mark_to_ms mark_times.append(mark_time) - sweep_time = float(arr[3]) / div_to_ms + sweep_time = float(arr[sweep_index]) / sweep_to_ms sweep_times.append(sweep_time) gc_times.append(mark_time + sweep_time) except IOError: pass - return np.array(timestamps), np.array(mark_times), np.array(sweep_times), np.array(gc_times) + return np.array(mark_times), np.array(sweep_times), np.array(gc_times) def percentile_gc(configurations, percentile): @@ -82,7 +100,7 @@ def percentile_gc(configurations, percentile): res_total = [] for conf in configurations: try: - _, mark, sweep, total = gc_stats(bench, conf) + mark, sweep, total = gc_stats(bench, conf) res_mark.append(np.percentile(mark, percentile)) res_sweep.append(np.percentile(sweep, percentile)) res_total.append(np.percentile(total, percentile)) @@ -149,7 +167,7 @@ def bar_chart_gc_relative(plt, configurations, percentile): mark_ref = [] for bench in benchmarks: try: - _, mark, _, total = gc_stats(bench, configurations[0]) + mark, _, total = gc_stats(bench, configurations[0]) base.append(np.percentile(total, percentile)) ref.append(1.0) mark_ref.append(np.percentile(mark / total, percentile)) @@ -166,7 +184,7 @@ def bar_chart_gc_relative(plt, configurations, percentile): for bench, base_val in zip(benchmarks, base): try: if base_val > 0: - _, mark, _, total = gc_stats(bench, conf) + mark, _, total = gc_stats(bench, conf) res.append(np.percentile(total, percentile) / base_val) mark_res.append(np.percentile(mark, percentile) / base_val) else: @@ -194,7 +212,7 @@ def bar_chart_gc_absolute(plt, configurations, percentile): mark_res = [] for bench in benchmarks: try: - _, mark, _, total = gc_stats(bench, conf) + mark, _, total = gc_stats(bench, conf) res.append(np.percentile(total, percentile)) mark_res.append(np.percentile(mark, percentile)) except IndexError: @@ -249,7 +267,7 @@ def gc_pause_time_chart(plt, configurations, bench, limit=100): plt.clf() plt.cla() for conf in configurations: - _, _, _, pauses = gc_stats(bench, conf) + _, _, pauses = gc_stats(bench, conf) if pauses.size > 0: percentiles = np.arange(0, limit) percvalue = np.array([np.percentile(pauses, perc) for perc in percentiles]) From b3ae6d63dbccf2d846d363ebf9a46e0eb0350cd8 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 21 Oct 2018 20:40:10 +0200 Subject: [PATCH 062/169] it is `git fetch -all`, not `git --fetch --all` --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index c382654..deebef3 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -45,7 +45,7 @@ def run(cmd, env=None, wd=None): def compile_scala_native(sha1): scala_native_dir = "../scala-native" - git_fetch = ['git', '--fetch', '--all'] + git_fetch = ['git', 'fetch', '--all'] try: run(git_fetch, wd = scala_native_dir) except: From e6a44a7a924304cef9f33692a18ae060d85fce7a Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 24 Oct 2018 16:39:51 +0200 Subject: [PATCH 063/169] --new : do not ovewrite previous results --- scripts/run.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index c382654..93c1128 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -193,6 +193,7 @@ def single_run(to_run): parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) parser.add_argument("--gc", help="gather gc statistics", action="store_true") + parser.add_argument("--new", help="do not override old results", action="store_true") parser.add_argument("--append", help="do not delete old data", action="store_true") parser.add_argument("set", nargs='*', default="all") args = parser.parse_args() @@ -235,6 +236,10 @@ def single_run(to_run): for conf in configurations: conf_name, sha1 = split_sha1(conf) + root_dir = os.path.join('results', conf + suffix) + if args.new and os.path.isfile(os.path.join(root_dir,".complete")): + print root_dir, "already complete, skipping" + continue if sha1 != None: success = compile_scala_native(sha1) if not success: @@ -243,7 +248,6 @@ def single_run(to_run): if not args.append: sh.rmtree(os.path.join('results', conf + suffix), ignore_errors=True) - root_dir = os.path.join('results', conf + suffix) mkdir(root_dir) for bench in benchmarks: From 4ca474622922d854c40e8a4d16cae00a1d9c31db Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 24 Oct 2018 16:46:26 +0200 Subject: [PATCH 064/169] better default setting --- scripts/run.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 93c1128..68ec5f9 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -110,6 +110,7 @@ def compile(bench, compilecmd): 'jvm', stable, ] +default = baseline + [latest] confs_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/confs" @@ -195,7 +196,7 @@ def single_run(to_run): parser.add_argument("--gc", help="gather gc statistics", action="store_true") parser.add_argument("--new", help="do not override old results", action="store_true") parser.add_argument("--append", help="do not delete old data", action="store_true") - parser.add_argument("set", nargs='*', default="all") + parser.add_argument("set", nargs='*', default="default") args = parser.parse_args() print args @@ -203,16 +204,16 @@ def single_run(to_run): batches = args.batches par = args.par - if args.set != "all": - configurations = [] - for choice in args.set: - expanded = expand_wild_cards(choice) - if expanded == "baseline": - configurations += baseline - else: - configurations += [expanded] - else: - configurations = all_configs + configurations = [] + for choice in args.set: + expanded = expand_wild_cards(choice) + if expanded == "baseline": + configurations += baseline + elif expanded == "default": + configurations = default + else: + configurations += [expanded] + print "configurations:", configurations From 3917386bfa87730694542e5f0ac730c1d8623759 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 24 Oct 2018 17:17:44 +0200 Subject: [PATCH 065/169] handling branch@remote, resolving refs, doing only 1 fetch --- scripts/run.py | 56 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 68ec5f9..0140476 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -43,14 +43,28 @@ def run(cmd, env=None, wd=None): return subp.check_output(cmd, stderr=subp.STDOUT, env=env, cwd=wd) -def compile_scala_native(sha1): - scala_native_dir = "../scala-native" - git_fetch = ['git', '--fetch', '--all'] +scala_native_dir = "../scala-native" + + +def fetch(): + git_fetch = ['git', 'fetch', '--all'] try: run(git_fetch, wd = scala_native_dir) except: pass +def get_ref(ref): + git_rev_parse = ['git', 'rev-parse', ref] + try: + return run(git_rev_parse, wd = scala_native_dir).strip() + except subp.CalledProcessError as err: + out = err.output + print "Cannot find", ref, "!" + print out + return None + + +def compile_scala_native(sha1): git_checkout = ['git', 'checkout', sha1] try: print run(git_checkout, wd = scala_native_dir) @@ -142,9 +156,11 @@ def expand_wild_cards(arg): return arg -def split_sha1(arg): +def ref_parse(arg): parts = arg.split("@") - if len(parts) == 2: + if len(parts) == 3: + return parts[0], (parts[2] + "/" + parts[1]) + elif len(parts) == 2: return parts[0], parts[1] else: return arg, None @@ -217,6 +233,15 @@ def single_run(to_run): print "configurations:", configurations + should_fetch = False + for conf in configurations: + if '@' in conf: + should_fetch = True + break + + if should_fetch: + fetch() + suffix = "" if runs != default_runs: suffix += "-r" + str(runs) @@ -230,17 +255,30 @@ def single_run(to_run): suffix += "_" + args.suffix failed = [] + result_dirs = [] pool = None if par > 1: pool = mp.Pool(par) for conf in configurations: - conf_name, sha1 = split_sha1(conf) + conf_name, ref = ref_parse(conf) + + if ref == None: + sha1 = None + root_dir = os.path.join('results', conf_name + suffix) + else: + sha1 = get_ref(ref) + if sha1 == None: + continue + root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) + + - root_dir = os.path.join('results', conf + suffix) if args.new and os.path.isfile(os.path.join(root_dir,".complete")): print root_dir, "already complete, skipping" continue + + if sha1 != None: success = compile_scala_native(sha1) if not success: @@ -293,7 +331,11 @@ def single_run(to_run): # mark it as complete open(os.path.join(root_dir, ".complete"), 'w+').close() + result_dirs += [root_dir] + print "results:" + for dir in result_dirs: + print dir if len(failed) > 0: print("{} benchmarks failed ".format(len(failed))) From f2b7135b844a8b2a22bfda30b9364244dafd3e4c Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 24 Oct 2018 17:20:33 +0200 Subject: [PATCH 066/169] use short sha1 --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 0140476..c3f3068 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -54,7 +54,7 @@ def fetch(): pass def get_ref(ref): - git_rev_parse = ['git', 'rev-parse', ref] + git_rev_parse = ['git', 'rev-parse', '--short' , ref] try: return run(git_rev_parse, wd = scala_native_dir).strip() except subp.CalledProcessError as err: From 2a238443aa3aff33c3a9f40b4cff380bbf09eb16 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 24 Oct 2018 18:08:15 +0200 Subject: [PATCH 067/169] ability to choose induvidual benchmarks --- scripts/run.py | 32 +++++++++++++++++++------------- scripts/summary.py | 38 +++++++++++++++++++------------------- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index c3f3068..af7e2ab 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -46,17 +46,18 @@ def run(cmd, env=None, wd=None): scala_native_dir = "../scala-native" -def fetch(): +def fetch(): git_fetch = ['git', 'fetch', '--all'] try: - run(git_fetch, wd = scala_native_dir) + run(git_fetch, wd=scala_native_dir) except: pass -def get_ref(ref): - git_rev_parse = ['git', 'rev-parse', '--short' , ref] + +def get_ref(ref): + git_rev_parse = ['git', 'rev-parse', '--short', ref] try: - return run(git_rev_parse, wd = scala_native_dir).strip() + return run(git_rev_parse, wd=scala_native_dir).strip() except subp.CalledProcessError as err: out = err.output print "Cannot find", ref, "!" @@ -67,7 +68,7 @@ def get_ref(ref): def compile_scala_native(sha1): git_checkout = ['git', 'checkout', sha1] try: - print run(git_checkout, wd = scala_native_dir) + print run(git_checkout, wd=scala_native_dir) except subp.CalledProcessError as err: out = err.output print "Cannot checkout", sha1, "!" @@ -82,7 +83,7 @@ def compile_scala_native(sha1): compile_env["SCALANATIVE_SCALAREPO"] = local_scala_repo_dir try: - run(compile_cmd, compile_env, wd = scala_native_dir) + run(compile_cmd, compile_env, wd=scala_native_dir) return True except subp.CalledProcessError as err: out = err.output @@ -100,7 +101,7 @@ def compile(bench, compilecmd): sbt = where('sbt') -benchmarks = [ +all_benchmarks = [ 'bounce.BounceBenchmark', 'list.ListBenchmark', 'richards.RichardsBenchmark', @@ -208,6 +209,7 @@ def single_run(to_run): parser.add_argument("--suffix", help="suffix added to results") parser.add_argument("--runs", help="number of runs", type=int, default=default_runs) parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) + parser.add_argument("--benchmark", help="number of batches per run", action='append') parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) parser.add_argument("--gc", help="gather gc statistics", action="store_true") parser.add_argument("--new", help="do not override old results", action="store_true") @@ -220,6 +222,13 @@ def single_run(to_run): batches = args.batches par = args.par + if len(args.benchmark) > 0: + benchmarks = [] + for b in args.benchmark: + benchmarks += filter( lambda s: s.startswith(b), all_benchmarks) + else: + benchmarks = all_benchmarks + configurations = [] for choice in args.set: expanded = expand_wild_cards(choice) @@ -230,8 +239,8 @@ def single_run(to_run): else: configurations += [expanded] - print "configurations:", configurations + print "benchmarks:", benchmarks should_fetch = False for conf in configurations: @@ -272,13 +281,10 @@ def single_run(to_run): continue root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) - - - if args.new and os.path.isfile(os.path.join(root_dir,".complete")): + if args.new and os.path.isfile(os.path.join(root_dir, ".complete")): print root_dir, "already complete, skipping" continue - if sha1 != None: success = compile_scala_native(sha1) if not success: diff --git a/scripts/summary.py b/scripts/summary.py index 1c91f31..77ce8df 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1,5 +1,5 @@ #!/usr/bin/env python2 -from run import benchmarks, mkdir, expand_wild_cards, generate_choices +from run import all_benchmarks, mkdir, expand_wild_cards, generate_choices import numpy as np import time @@ -94,7 +94,7 @@ def percentile_gc(configurations, percentile): out_mark = [] out_sweep = [] out_total = [] - for bench in benchmarks: + for bench in all_benchmarks: res_mark = [] res_sweep = [] res_total = [] @@ -116,7 +116,7 @@ def percentile_gc(configurations, percentile): def percentile(configurations, percentile): out = [] - for bench in benchmarks: + for bench in all_benchmarks: res = [] for conf in configurations: try: @@ -130,11 +130,11 @@ def percentile(configurations, percentile): def bar_chart_relative(plt, configurations, percentile): plt.clf() plt.cla() - ind = np.arange(len(benchmarks)) + ind = np.arange(len(all_benchmarks)) conf_count = len(configurations) + 1 base = [] ref = [] - for bench in benchmarks: + for bench in all_benchmarks: try: base.append(np.percentile(config_data(bench, configurations[0]), percentile)) ref.append(1.0) @@ -145,13 +145,13 @@ def bar_chart_relative(plt, configurations, percentile): for i, conf in enumerate(configurations[1:]): res = [] - for bench, base_val in zip(benchmarks, base): + for bench, base_val in zip(all_benchmarks, base): try: res.append(np.percentile(config_data(bench, conf), percentile) / base_val) except IndexError: res.append(0) plt.bar(ind * conf_count + i + 1, res, label=conf) - plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, all_benchmarks)) plt.title("Relative test execution times against " + configurations[0] + " at " + str(percentile) + " percentile") plt.legend() return plt @@ -160,12 +160,12 @@ def bar_chart_relative(plt, configurations, percentile): def bar_chart_gc_relative(plt, configurations, percentile): plt.clf() plt.cla() - ind = np.arange(len(benchmarks)) + ind = np.arange(len(all_benchmarks)) conf_count = len(configurations) + 1 base = [] ref = [] mark_ref = [] - for bench in benchmarks: + for bench in all_benchmarks: try: mark, _, total = gc_stats(bench, configurations[0]) base.append(np.percentile(total, percentile)) @@ -181,7 +181,7 @@ def bar_chart_gc_relative(plt, configurations, percentile): for i, conf in enumerate(configurations[1:]): res = [] mark_res = [] - for bench, base_val in zip(benchmarks, base): + for bench, base_val in zip(all_benchmarks, base): try: if base_val > 0: mark, _, total = gc_stats(bench, conf) @@ -195,7 +195,7 @@ def bar_chart_gc_relative(plt, configurations, percentile): mark_res.append(0) plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time - plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, all_benchmarks)) plt.title("Relative gc times against " + configurations[0] + " at " + str(percentile) + " percentile") plt.legend() return plt @@ -204,13 +204,13 @@ def bar_chart_gc_relative(plt, configurations, percentile): def bar_chart_gc_absolute(plt, configurations, percentile): plt.clf() plt.cla() - ind = np.arange(len(benchmarks)) + ind = np.arange(len(all_benchmarks)) conf_count = len(configurations) + 1 for i, conf in enumerate(configurations): res = [] mark_res = [] - for bench in benchmarks: + for bench in all_benchmarks: try: mark, _, total = gc_stats(bench, conf) res.append(np.percentile(total, percentile)) @@ -219,7 +219,7 @@ def bar_chart_gc_absolute(plt, configurations, percentile): res.append(0) plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time - plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, all_benchmarks)) plt.title("Garbage collector pause times at " + str(percentile) + " percentile") plt.legend() return plt @@ -285,7 +285,7 @@ def print_table(configurations, data): for conf in configurations: leading.append(conf) print ','.join(leading) - for bench, res in zip(benchmarks, data): + for bench, res in zip(all_benchmarks, data): print ','.join([bench] + list(map(str, res))) @@ -305,7 +305,7 @@ def write_md_table(file, configurations, data): file.write('\n') gmul = np.ones(len(configurations)-1) - for bench, res0 in zip(benchmarks, data): + for bench, res0 in zip(all_benchmarks, data): base = res0[0] res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) file.write('|') @@ -318,7 +318,7 @@ def write_md_table(file, configurations, data): file.write('| __Geometrical mean:__|') for gm in gmul: file.write('| |') - gmean = float(gm) ** (1.0 / len(benchmarks)) + gmean = float(gm) ** (1.0 / len(all_benchmarks)) percent_diff = (gmean - 1) * 100 precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__") file.write(precent_diff_cell) @@ -340,7 +340,7 @@ def write_md_table_gc(file, configurations, mark_data, sweep_data, total_data): file.write(' -- |') file.write('\n') - for bench, mark_res0, sweep_res0, total_res0 in zip(benchmarks, mark_data, sweep_data, total_data): + for bench, mark_res0, sweep_res0, total_res0 in zip(all_benchmarks, mark_data, sweep_data, total_data): for name, res0 in zip(["mark", "sweep", "total"], [mark_res0, sweep_res0, total_res0]): base = res0[0] res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) @@ -392,7 +392,7 @@ def write_md_file(rootdir, md_file, configurations, gc_charts = True): write_md_table_gc(md_file, configurations, mark, sweep, total) md_file.write("# Individual benchmarks\n") - for bench in benchmarks: + for bench in all_benchmarks: md_file.write("## ") md_file.write(bench) md_file.write("\n") From 1bf4856181339a88c86ac6c489d9bbed3a606ea6 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 24 Oct 2018 21:18:28 +0200 Subject: [PATCH 068/169] usable normally --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index af7e2ab..d40e514 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -222,7 +222,7 @@ def single_run(to_run): batches = args.batches par = args.par - if len(args.benchmark) > 0: + if args.benchmark != None: benchmarks = [] for b in args.benchmark: benchmarks += filter( lambda s: s.startswith(b), all_benchmarks) From dda14885364f6f53eff221a706581c433aa79e4e Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 25 Oct 2018 07:18:42 +0200 Subject: [PATCH 069/169] fix: results not in root_dir --- scripts/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index d40e514..10f35af 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -291,7 +291,7 @@ def single_run(to_run): continue if not args.append: - sh.rmtree(os.path.join('results', conf + suffix), ignore_errors=True) + sh.rmtree(root_dir, ignore_errors=True) mkdir(root_dir) @@ -317,7 +317,7 @@ def single_run(to_run): compile(bench, compilecmd) - resultsdir = os.path.join('results', conf + suffix, bench) + resultsdir = os.path.join(root_dir, bench) mkdir(resultsdir) cmd = [] From 2175c0fa96d6b7ed2aeb76b2ec8701c91bba8b86 Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 25 Oct 2018 19:35:48 +0200 Subject: [PATCH 070/169] @HEAD option to avoid checkout --- scripts/run.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 10f35af..189bf39 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -65,15 +65,16 @@ def get_ref(ref): return None -def compile_scala_native(sha1): - git_checkout = ['git', 'checkout', sha1] - try: - print run(git_checkout, wd=scala_native_dir) - except subp.CalledProcessError as err: - out = err.output - print "Cannot checkout", sha1, "!" - print out - return False +def compile_scala_native(ref, sha1): + if ref != "HEAD": + git_checkout = ['git', 'checkout', sha1] + try: + print run(git_checkout, wd=scala_native_dir) + except subp.CalledProcessError as err: + out = err.output + print "Cannot checkout", sha1, "!" + print out + return False compile_cmd = [sbt, '-no-colors', '-J-Xmx2G', 'rebuild', 'sandbox/run'] compile_env = os.environ.copy() @@ -286,7 +287,7 @@ def single_run(to_run): continue if sha1 != None: - success = compile_scala_native(sha1) + success = compile_scala_native(ref, sha1) if not success: continue From b9bb31bf3796dca5bf00d0769cbcd65533e689aa Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 25 Oct 2018 19:54:34 +0200 Subject: [PATCH 071/169] update to handle changes in scala-native/scala-native#1364 --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 189bf39..e361ba5 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -190,7 +190,7 @@ def single_run(to_run): print('--- run {}/{}'.format(n, runs)) my_env = os.environ.copy() if gcstats: - my_env["SCALANATIVE_GC_STATS_FILE"] = os.path.join(resultsdir, str(n) + ".gc.csv") + my_env["SCALANATIVE_STATS_FILE"] = os.path.join(resultsdir, str(n) + ".gc.csv") try: out = run(cmd, my_env) with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: From 00191290af9884955f3465914eaafc29d8453617 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 27 Oct 2018 11:09:09 +0200 Subject: [PATCH 072/169] selecting the size --- scripts/run.py | 125 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 42 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index e361ba5..fd34755 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -168,6 +168,15 @@ def ref_parse(arg): return arg, None +def size_parse(arg): + parts = arg.split(":") + if len(parts) == 1: + return [arg, arg] + else: + return parts + + + def generate_choices(direct_choices): results = direct_choices for dir in direct_choices: @@ -186,11 +195,24 @@ def single_run(to_run): conf = to_run["conf"] bench = to_run["bench"] gcstats = to_run["gcstats"] + minsize = to_run["size"][0] + maxsize = to_run["size"][1] print('--- run {}/{}'.format(n, runs)) my_env = os.environ.copy() if gcstats: my_env["SCALANATIVE_STATS_FILE"] = os.path.join(resultsdir, str(n) + ".gc.csv") + + if minsize != "default": + my_env["SCALANATIVE_MIN_HEAP_SIZE"] = minsize + elif "SCALANATIVE_MIN_HEAP_SIZE" in my_env: + del my_env["SCALANATIVE_MIN_HEAP_SIZE"] + + if maxsize != "default": + my_env["SCALANATIVE_MAX_HEAP_SIZE"] = maxsize + elif "SCALANATIVE_MAX_HEAP_SIZE" in my_env: + del my_env["SCALANATIVE_MAX_HEAP_SIZE"] + try: out = run(cmd, my_env) with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: @@ -210,10 +232,11 @@ def single_run(to_run): parser.add_argument("--suffix", help="suffix added to results") parser.add_argument("--runs", help="number of runs", type=int, default=default_runs) parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) - parser.add_argument("--benchmark", help="number of batches per run", action='append') + parser.add_argument("--benchmark", help="benchmarks to run", action='append') + parser.add_argument("--size", help="different size settings to use", action='append') parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) parser.add_argument("--gc", help="gather gc statistics", action="store_true") - parser.add_argument("--new", help="do not override old results", action="store_true") + parser.add_argument("--overwrite", help="overwrite old results", action="store_true") parser.add_argument("--append", help="do not delete old data", action="store_true") parser.add_argument("set", nargs='*', default="default") args = parser.parse_args() @@ -230,6 +253,13 @@ def single_run(to_run): else: benchmarks = all_benchmarks + if args.size != None: + sizes = [] + for size_str in args.size: + sizes += [size_parse(size_str)] + else: + sizes = [["default", "default"]] + configurations = [] for choice in args.set: expanded = expand_wild_cards(choice) @@ -242,6 +272,7 @@ def single_run(to_run): print "configurations:", configurations print "benchmarks:", benchmarks + print "heap sizes:", sizes should_fetch = False for conf in configurations: @@ -282,63 +313,73 @@ def single_run(to_run): continue root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) - if args.new and os.path.isfile(os.path.join(root_dir, ".complete")): - print root_dir, "already complete, skipping" - continue if sha1 != None: success = compile_scala_native(ref, sha1) if not success: continue - if not args.append: - sh.rmtree(root_dir, ignore_errors=True) + for size in sizes: - mkdir(root_dir) + if size == ["default","default"] : + sized_dir = root_dir + else: + size_str = "size_" + size[0] + "-" + size[1] + sized_dir = os.path.join(root_dir, size_str) - for bench in benchmarks: - print('--- conf: {}, bench: {}'.format(conf, bench)) + if not args.overwrite and os.path.isfile(os.path.join(sized_dir, ".complete")): + print sized_dir, "already complete, skipping" + continue - input = slurp(os.path.join('input', bench)) - output = slurp(os.path.join('output', bench)) - compilecmd = slurp(os.path.join('confs', conf_name, 'compile')) - runcmd = slurp(os.path.join('confs', conf_name, 'run')) \ - .replace('$BENCH', bench) \ - .replace('$HOME', os.environ['HOME']).split(' ') - if os.path.exists(os.path.join('confs', conf_name, 'build.sbt')): - sh.copyfile(os.path.join('confs', conf_name, 'build.sbt'), 'build.sbt') - else: - os.remove('build.sbt') + if not args.append: + sh.rmtree(sized_dir, ignore_errors=True) - if os.path.exists(os.path.join('confs', conf_name, 'plugins.sbt')): - sh.copyfile(os.path.join('confs', conf_name, 'plugins.sbt'), 'project/plugins.sbt') - else: - os.remove('project/plugins.sbt') + mkdir(sized_dir) - compile(bench, compilecmd) + for bench in benchmarks: + print('--- conf: {}, bench: {}'.format(conf, bench)) - resultsdir = os.path.join(root_dir, bench) - mkdir(resultsdir) + input = slurp(os.path.join('input', bench)) + output = slurp(os.path.join('output', bench)) + compilecmd = slurp(os.path.join('confs', conf_name, 'compile')) + runcmd = slurp(os.path.join('confs', conf_name, 'run')) \ + .replace('$BENCH', bench) \ + .replace('$HOME', os.environ['HOME']).split(' ') - cmd = [] - cmd.extend(runcmd) - cmd.extend([str(batches), str(batch_size), input, output]) + if os.path.exists(os.path.join('confs', conf_name, 'build.sbt')): + sh.copyfile(os.path.join('confs', conf_name, 'build.sbt'), 'build.sbt') + else: + os.remove('build.sbt') - to_run = [] - for n in xrange(runs): - to_run += [ - dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc)] + if os.path.exists(os.path.join('confs', conf_name, 'plugins.sbt')): + sh.copyfile(os.path.join('confs', conf_name, 'plugins.sbt'), 'project/plugins.sbt') + else: + os.remove('project/plugins.sbt') - if par == 1: - for tr in to_run: - failed += single_run(tr) - else: - failed += sum(pool.map(single_run, to_run), []) + compile(bench, compilecmd) + + resultsdir = os.path.join(sized_dir, bench) + mkdir(resultsdir) + + cmd = [] + cmd.extend(runcmd) + cmd.extend([str(batches), str(batch_size), input, output]) + + to_run = [] + for n in xrange(runs): + to_run += [ + dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc, size = size)] + + if par == 1: + for tr in to_run: + failed += single_run(tr) + else: + failed += sum(pool.map(single_run, to_run), []) - # mark it as complete - open(os.path.join(root_dir, ".complete"), 'w+').close() - result_dirs += [root_dir] + # mark it as complete + open(os.path.join(sized_dir, ".complete"), 'w+').close() + result_dirs += [sized_dir] print "results:" for dir in result_dirs: From 4a6c4ceaea23956a0b3b770d44d99882b5fd934c Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 27 Oct 2018 11:18:40 +0200 Subject: [PATCH 073/169] allow comparing with sized --- scripts/summary.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/summary.py b/scripts/summary.py index 77ce8df..292bc1d 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -405,6 +405,14 @@ def write_md_file(rootdir, md_file, configurations, gc_charts = True): if __name__ == '__main__': all_configs = next(os.walk("results"))[1] + # added size_ + for conf in all_configs: + folder = os.path.join("results", conf) + subfolders = next(os.walk(folder))[1] + for size in subfolders: + if size.startswith("size_"): + all_configs += [os.path.join(conf, size)] + results = generate_choices(all_configs) parser = argparse.ArgumentParser() From 168c734d1495fc539a38e2cb264ec680b7174257 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 27 Oct 2018 11:32:33 +0200 Subject: [PATCH 074/169] allow comparing with sized --- scripts/run.py | 10 ++++------ scripts/summary.py | 27 ++++++++++++++++----------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index fd34755..06bc6fe 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -176,7 +176,6 @@ def size_parse(arg): return parts - def generate_choices(direct_choices): results = direct_choices for dir in direct_choices: @@ -249,7 +248,7 @@ def single_run(to_run): if args.benchmark != None: benchmarks = [] for b in args.benchmark: - benchmarks += filter( lambda s: s.startswith(b), all_benchmarks) + benchmarks += filter(lambda s: s.startswith(b), all_benchmarks) else: benchmarks = all_benchmarks @@ -313,7 +312,6 @@ def single_run(to_run): continue root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) - if sha1 != None: success = compile_scala_native(ref, sha1) if not success: @@ -321,7 +319,7 @@ def single_run(to_run): for size in sizes: - if size == ["default","default"] : + if size == ["default", "default"]: sized_dir = root_dir else: size_str = "size_" + size[0] + "-" + size[1] @@ -331,7 +329,6 @@ def single_run(to_run): print sized_dir, "already complete, skipping" continue - if not args.append: sh.rmtree(sized_dir, ignore_errors=True) @@ -369,7 +366,8 @@ def single_run(to_run): to_run = [] for n in xrange(runs): to_run += [ - dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc, size = size)] + dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc, + size=size)] if par == 1: for tr in to_run: diff --git a/scripts/summary.py b/scripts/summary.py index 292bc1d..a6a50bc 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -52,7 +52,7 @@ def gc_stats(bench, conf): mark_to_ms = 0 sweep_to_ms = 0 - unit2div = dict(ms = 1, us = 1000, ns = 1000 * 1000) + unit2div = dict(ms=1, us=1000, ns=1000 * 1000) header = data.readline().strip() for i, h in enumerate(header.split(',')): @@ -69,7 +69,6 @@ def gc_stats(bench, conf): sweep_index = i sweep_to_ms = unit2div[unit] - if mark_index == -1: print "Header does not have mark_time_", header, "at", file if sweep_index == -1: @@ -304,7 +303,8 @@ def write_md_table(file, configurations, data): file.write(' -- |') file.write('\n') - gmul = np.ones(len(configurations)-1) + gmul = np.ones(len(configurations) - 1) + gcount = np.zeros(len(configurations) - 1) for bench, res0 in zip(all_benchmarks, data): base = res0[0] res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) @@ -313,14 +313,17 @@ def write_md_table(file, configurations, data): file.write('|\n') for i, d0 in enumerate(res0[1:]): - gmul[i] *= (float(d0) / base) + if d0 != 0 and base != 0: + gmul[i] *= (float(d0) / base) + gcount[i] += 1 file.write('| __Geometrical mean:__|') - for gm in gmul: + for gm, count in zip(gmul, gcount): file.write('| |') - gmean = float(gm) ** (1.0 / len(all_benchmarks)) + gmean = float(gm) ** (1.0 / count) percent_diff = (gmean - 1) * 100 - precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__") + precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ( + "" if percent_diff > 0 else "__") file.write(precent_diff_cell) file.write("|\n") @@ -358,7 +361,8 @@ def write_md_table_gc(file, configurations, mark_data, sweep_data, total_data): def cell(x, base): if base > 0: percent_diff = (float(x) / base - 1) * 100 - precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ("" if percent_diff > 0 else "__") + precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ( + "" if percent_diff > 0 else "__") else: precent_diff_cell = "N/A" return [("%.4f" % x), precent_diff_cell] @@ -377,7 +381,7 @@ def chart_md(md_file, plt, rootdir, name): md_file.write("![Chart]({})\n\n".format(name)) -def write_md_file(rootdir, md_file, configurations, gc_charts = True): +def write_md_file(rootdir, md_file, configurations, gc_charts=True): md_file.write("# Summary\n") for p in [50, 90, 99]: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) @@ -399,7 +403,8 @@ def write_md_file(rootdir, md_file, configurations, gc_charts = True): chart_md(md_file, percentiles_chart(plt, configurations, bench), rootdir, "percentile_" + bench + ".png") if gc_charts: - chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") + chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, + "gc_pause_times_" + bench + ".png") chart_md(md_file, example_run_plot(plt, configurations, bench), rootdir, "example_run_3_" + bench + ".png") @@ -429,7 +434,7 @@ def write_md_file(rootdir, md_file, configurations, gc_charts = True): for arg in args.comparisons: configurations += [expand_wild_cards(arg)] - comment = "_vs_".join(configurations) + comment = "_vs_".join(configurations).replace(os.sep, "_") if args.comment is not None: comment = args.comment From effbc5515df9ed31f24ccc984b9019def5f43738 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 27 Oct 2018 11:43:02 +0200 Subject: [PATCH 075/169] log skipped and failed compilation at the end --- scripts/run.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scripts/run.py b/scripts/run.py index 06bc6fe..ce6b139 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -295,6 +295,8 @@ def single_run(to_run): suffix += "_" + args.suffix failed = [] + skipped = [] + compile_fail = [] result_dirs = [] pool = None if par > 1: @@ -309,12 +311,14 @@ def single_run(to_run): else: sha1 = get_ref(ref) if sha1 == None: + compile_fail += [conf] continue root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) if sha1 != None: success = compile_scala_native(ref, sha1) if not success: + compile_fail += [conf] continue for size in sizes: @@ -327,6 +331,7 @@ def single_run(to_run): if not args.overwrite and os.path.isfile(os.path.join(sized_dir, ".complete")): print sized_dir, "already complete, skipping" + skipped += [sized_dir] continue if not args.append: @@ -383,8 +388,20 @@ def single_run(to_run): for dir in result_dirs: print dir + if len(compile_fail) > 0: + print("{} compilation failed ".format(len(failed))) + for skip in compile_fail: + print skip + + if len(skipped) > 0: + print("{} benchmarks skipped ".format(len(failed))) + for skip in skipped: + print skip + if len(failed) > 0: print("{} benchmarks failed ".format(len(failed))) for fail in failed: print fail + + if len(compile_fail) > 0 or len(failed) > 0: exit(1) From 18c4d08c90961c1c261ec2f929a2422e9af38c52 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 27 Oct 2018 12:42:13 +0200 Subject: [PATCH 076/169] create symlinks for more general names --- scripts/run.py | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index ce6b139..eaf0fdf 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -255,7 +255,11 @@ def single_run(to_run): if args.size != None: sizes = [] for size_str in args.size: - sizes += [size_parse(size_str)] + parsed = size_parse(size_str) + if parsed == ["default", "default"]: + sizes = [parsed] + sizes + else: + sizes += [parsed] else: sizes = [["default", "default"]] @@ -279,8 +283,8 @@ def single_run(to_run): should_fetch = True break - if should_fetch: - fetch() + # if should_fetch: + # fetch() suffix = "" if runs != default_runs: @@ -298,6 +302,8 @@ def single_run(to_run): skipped = [] compile_fail = [] result_dirs = [] + symlinks = [] + pool = None if par > 1: pool = mp.Pool(par) @@ -305,9 +311,11 @@ def single_run(to_run): for conf in configurations: conf_name, ref = ref_parse(conf) + + generalized_dir = os.path.join('results', conf + suffix) if ref == None: sha1 = None - root_dir = os.path.join('results', conf_name + suffix) + root_dir = generalized_dir else: sha1 = get_ref(ref) if sha1 == None: @@ -315,11 +323,20 @@ def single_run(to_run): continue root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) - if sha1 != None: - success = compile_scala_native(ref, sha1) - if not success: - compile_fail += [conf] - continue + # if sha1 != None: + # success = compile_scala_native(ref, sha1) + # if not success: + # compile_fail += [conf] + # continue + + if generalized_dir != root_dir: + try: + os.unlink(generalized_dir) + except: + pass + print "creating symlink", generalized_dir, "->", root_dir + symlinks += [[generalized_dir,root_dir]] + os.symlink(os.path.split(root_dir)[1], generalized_dir) for size in sizes: @@ -340,7 +357,7 @@ def single_run(to_run): mkdir(sized_dir) for bench in benchmarks: - print('--- conf: {}, bench: {}'.format(conf, bench)) + print('--- heap size: {} conf: {}, bench: {}'.format(size, conf, bench)) input = slurp(os.path.join('input', bench)) output = slurp(os.path.join('output', bench)) @@ -362,6 +379,7 @@ def single_run(to_run): compile(bench, compilecmd) resultsdir = os.path.join(sized_dir, bench) + print "results in", resultsdir mkdir(resultsdir) cmd = [] @@ -388,6 +406,12 @@ def single_run(to_run): for dir in result_dirs: print dir + if len(symlinks) > 0: + print("{} symlinks ".format(len(symlinks))) + for symlink in symlinks: + print symlink[0], "->", symlink[1] + + if len(compile_fail) > 0: print("{} compilation failed ".format(len(failed))) for skip in compile_fail: From 20ddae88107c75e37d20011376db6ca41a2ebe18 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 27 Oct 2018 12:56:31 +0200 Subject: [PATCH 077/169] set JVM heap sizes --- confs/jvm/run | 2 +- confs/native-image-pgo/run | 2 +- confs/native-image/run | 2 +- scripts/run.py | 13 ++++++++++++- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/confs/jvm/run b/confs/jvm/run index a4d5cce..d8bf304 100644 --- a/confs/jvm/run +++ b/confs/jvm/run @@ -1 +1 @@ -java -Xmx1024M -Xms1024M -classpath target/scala-2.11/classes:$HOME/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.11.12.jar $BENCH +java $JAVA_SIZE_ARGS -classpath target/scala-2.11/classes:$HOME/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.11.12.jar $BENCH diff --git a/confs/native-image-pgo/run b/confs/native-image-pgo/run index a6b6dfc..baed793 100644 --- a/confs/native-image-pgo/run +++ b/confs/native-image-pgo/run @@ -1,2 +1,2 @@ -target/native-image-pgo-bench -Xmx1g -Xmx1g +target/native-image-pgo-bench $JAVA_SIZE_ARGS diff --git a/confs/native-image/run b/confs/native-image/run index 456cc5a..99bbc6e 100644 --- a/confs/native-image/run +++ b/confs/native-image/run @@ -1 +1 @@ -target/native-image-bench -Xmx1g -Xmx1g +target/native-image-bench $JAVA_SIZE_ARGS diff --git a/scripts/run.py b/scripts/run.py index eaf0fdf..e74c126 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -189,7 +189,7 @@ def generate_choices(direct_choices): def single_run(to_run): n = to_run["n"] runs = to_run["runs"] - cmd = to_run["cmd"] + unexpanded_cmd = to_run["cmd"] resultsdir = to_run["resultsdir"] conf = to_run["conf"] bench = to_run["bench"] @@ -212,6 +212,17 @@ def single_run(to_run): elif "SCALANATIVE_MAX_HEAP_SIZE" in my_env: del my_env["SCALANATIVE_MAX_HEAP_SIZE"] + + cmd = [] + for token in unexpanded_cmd: + if token == "$JAVA_SIZE_ARGS": + if minsize != "default": + cmd += ["-Xms" + minsize] + if maxsize != "default": + cmd += ["-Xmx" + maxsize] + else: + cmd += [token] + try: out = run(cmd, my_env) with open(os.path.join(resultsdir, str(n)), 'w+') as resultfile: From 340cd3817210c3f059fcc6e2c2428ce7b32b9ff2 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 27 Oct 2018 13:29:30 +0200 Subject: [PATCH 078/169] it should fetch --- scripts/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index e74c126..b3a23df 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -294,8 +294,8 @@ def single_run(to_run): should_fetch = True break - # if should_fetch: - # fetch() + if should_fetch: + fetch() suffix = "" if runs != default_runs: From e42afdacef2954d947fe33c6ae9d98c5b6f9128a Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 10:41:37 +0100 Subject: [PATCH 079/169] handle low --runs --- scripts/summary.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index a6a50bc..9bd6649 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -405,7 +405,24 @@ def write_md_file(rootdir, md_file, configurations, gc_charts=True): if gc_charts: chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") - chart_md(md_file, example_run_plot(plt, configurations, bench), rootdir, "example_run_3_" + bench + ".png") + + run = 3 + while run >= 0 and not any_run_exists(bench, configurations, run): + run -= 1 + + if run >= 0: + chart_md(md_file, example_run_plot(plt, configurations, bench), rootdir, + "example_run_" + str(run) + "_" + bench + ".png") + + +def any_run_exists(bench, configurations, run): + exits = False + for conf in configurations: + file = 'results/{}/{}/{}'.format(conf, bench, run) + if os.path.exists(file): + exits = True + break + return exits if __name__ == '__main__': From 4233b25ef51b9dbb40fce96cc1f08812104ff25d Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 10:43:41 +0100 Subject: [PATCH 080/169] skip benchmarks that were not run --- scripts/summary.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/summary.py b/scripts/summary.py index 9bd6649..66306fa 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -397,6 +397,9 @@ def write_md_file(rootdir, md_file, configurations, gc_charts=True): md_file.write("# Individual benchmarks\n") for bench in all_benchmarks: + if not any_run_exists(bench, configurations, 0): + continue + md_file.write("## ") md_file.write(bench) md_file.write("\n") From 5b514ff4322dee06c79e69941466cc4820ba8c0b Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 11:51:50 +0100 Subject: [PATCH 081/169] run time vs heap size --- scripts/summary.py | 97 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 14 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 66306fa..5555da1 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -9,7 +9,8 @@ def config_data(bench, conf): - files = next(os.walk("results/{}/{}".format(conf, bench)), [[], [], []])[2] + benchmark_dir = os.path.join("results", conf, bench) + files = next(os.walk(benchmark_dir), [[], [], []])[2] runs = [] for file in files: if "." not in file: @@ -20,7 +21,7 @@ def config_data(bench, conf): for run in runs: try: points = [] - with open('results/{}/{}/{}'.format(conf, bench, run)) as data: + with open(os.path.join("results", conf, bench, run)) as data: for line in data.readlines(): # in ms points.append(float(line) / 1000000) @@ -32,7 +33,8 @@ def config_data(bench, conf): def gc_stats(bench, conf): - files = next(os.walk("results/{}/{}".format(conf, bench)), [[], [], []])[2] + benchmark_dir = os.path.join("results", conf, bench) + files = next(os.walk(benchmark_dir), [[], [], []])[2] runs = [] for file in files: if file.endswith(".gc.csv"): @@ -44,7 +46,7 @@ def gc_stats(bench, conf): gc_times = [] for run in runs: try: - file = 'results/{}/{}/{}'.format(conf, bench, run) + file = os.path.join("results", conf, bench, run) with open(file) as data: # analise header mark_index = -1 @@ -113,19 +115,23 @@ def percentile_gc(configurations, percentile): return out_mark, out_sweep, out_total -def percentile(configurations, percentile): +def percentile(configurations, p): out = [] for bench in all_benchmarks: - res = [] - for conf in configurations: - try: - res.append(np.percentile(config_data(bench, conf), percentile)) - except IndexError: - res.append(0) - out.append(res) + out += [percentile_bench(configurations, bench, p)] return out +def percentile_bench(configurations, bench, p): + res = [] + for conf in configurations: + try: + res += [np.percentile(config_data(bench, conf), p)] + except IndexError: + res += [0] + return res + + def bar_chart_relative(plt, configurations, percentile): plt.clf() plt.cla() @@ -245,6 +251,55 @@ def example_run_plot(plt, configurations, bench, run=3): return plt +def to_gb(size_str): + if size_str[-1] == "k" or size_str[-1] == "K": + return float(size_str[:-1]) / 1024 / 1024 + elif size_str[-1] == "m" or size_str[-1] == "M": + return float(size_str[:-1]) / 1024 + elif size_str[-1] == "g" or size_str[-1] == "G": + return float(size_str[:-1]) + else: + # bytes + return float(size_str) / 1024 / 1024 / 1024 + + +def sizes_per_conf(parent_configuration): + parent_folder = os.path.join("results", parent_configuration) + min_sizes = [] + max_sizes = [] + child_confs = [] + folders = next(os.walk(parent_folder))[1] + for f in folders: + if f.startswith("size_"): + parts = f[len("size_"):].split("-") + min_sizes += [to_gb(parts[0])] + max_sizes += [to_gb(parts[1])] + child_confs += [os.path.join(parent_configuration,f)] + return min_sizes, max_sizes, child_confs + + +def size_compare_chart(plt, parent_configurations, bench, p): + plt.clf() + plt.cla() + for parent_conf in parent_configurations: + min_sizes, max_sizes, child_confs = sizes_per_conf(parent_conf) + equal_sizes = [] + equal_confs = [] + for min_size, max_size, child_conf in zip(min_sizes, max_sizes, child_confs): + if min_size == max_size: + equal_sizes += [min_size] + equal_confs += [child_conf] + + percentiles = percentile_bench(equal_confs, bench, p) + plt.plot(np.array(equal_sizes), percentiles, label=parent_conf) + plt.legend() + plt.title("{} at {} percentile".format(bench, p)) + plt.ylim(ymin=0) + plt.xlabel("Heap Size (GB)") + plt.ylabel("Run time (ms)") + return plt + + def percentiles_chart(plt, configurations, bench, limit=99): plt.clf() plt.cla() @@ -381,7 +436,7 @@ def chart_md(md_file, plt, rootdir, name): md_file.write("![Chart]({})\n\n".format(name)) -def write_md_file(rootdir, md_file, configurations, gc_charts=True): +def write_md_file(rootdir, md_file, parent_configurations, configurations, gc_charts=True): md_file.write("# Summary\n") for p in [50, 90, 99]: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) @@ -409,6 +464,10 @@ def write_md_file(rootdir, md_file, configurations, gc_charts=True): chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") + for p in [50, 90, 99]: + chart_md(md_file, size_compare_chart(plt, parent_configurations, bench, p), rootdir, + "size_chart_" + bench + "percentile_" + str(p) + ".png") + run = 3 while run >= 0 and not any_run_exists(bench, configurations, run): run -= 1 @@ -458,10 +517,20 @@ def any_run_exists(bench, configurations, run): if args.comment is not None: comment = args.comment + + parent_configurations = [] + for conf in configurations: + if os.sep in conf: + parent = os.path.split(conf)[0] + else: + parent = conf + if parent not in parent_configurations: + parent_configurations += [parent] + report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + comment + "/" plt.rcParams["figure.figsize"] = [16.0, 12.0] mkdir(report_dir) with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: - write_md_file(report_dir, md_file, configurations, args.gc) + write_md_file(report_dir, md_file, parent_configurations, configurations, args.gc) print report_dir From 382eb7578754247a89a86600bdb30b6abdbb06f6 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 12:38:43 +0100 Subject: [PATCH 082/169] allow selecting benchmarks to compare --- scripts/summary.py | 71 +++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 5555da1..6740c3d 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -91,11 +91,11 @@ def gc_stats(bench, conf): return np.array(mark_times), np.array(sweep_times), np.array(gc_times) -def percentile_gc(configurations, percentile): +def percentile_gc(configurations, benchmarks, percentile): out_mark = [] out_sweep = [] out_total = [] - for bench in all_benchmarks: + for bench in benchmarks: res_mark = [] res_sweep = [] res_total = [] @@ -115,9 +115,9 @@ def percentile_gc(configurations, percentile): return out_mark, out_sweep, out_total -def percentile(configurations, p): +def percentile(configurations, benchmarks, p): out = [] - for bench in all_benchmarks: + for bench in benchmarks: out += [percentile_bench(configurations, bench, p)] return out @@ -132,14 +132,14 @@ def percentile_bench(configurations, bench, p): return res -def bar_chart_relative(plt, configurations, percentile): +def bar_chart_relative(plt, configurations, benchmarks, percentile): plt.clf() plt.cla() - ind = np.arange(len(all_benchmarks)) + ind = np.arange(len(benchmarks)) conf_count = len(configurations) + 1 base = [] ref = [] - for bench in all_benchmarks: + for bench in benchmarks: try: base.append(np.percentile(config_data(bench, configurations[0]), percentile)) ref.append(1.0) @@ -150,27 +150,27 @@ def bar_chart_relative(plt, configurations, percentile): for i, conf in enumerate(configurations[1:]): res = [] - for bench, base_val in zip(all_benchmarks, base): + for bench, base_val in zip(benchmarks, base): try: res.append(np.percentile(config_data(bench, conf), percentile) / base_val) except IndexError: res.append(0) plt.bar(ind * conf_count + i + 1, res, label=conf) - plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, all_benchmarks)) + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) plt.title("Relative test execution times against " + configurations[0] + " at " + str(percentile) + " percentile") plt.legend() return plt -def bar_chart_gc_relative(plt, configurations, percentile): +def bar_chart_gc_relative(plt, configurations, benchmarks, percentile): plt.clf() plt.cla() - ind = np.arange(len(all_benchmarks)) + ind = np.arange(len(benchmarks)) conf_count = len(configurations) + 1 base = [] ref = [] mark_ref = [] - for bench in all_benchmarks: + for bench in benchmarks: try: mark, _, total = gc_stats(bench, configurations[0]) base.append(np.percentile(total, percentile)) @@ -186,7 +186,7 @@ def bar_chart_gc_relative(plt, configurations, percentile): for i, conf in enumerate(configurations[1:]): res = [] mark_res = [] - for bench, base_val in zip(all_benchmarks, base): + for bench, base_val in zip(benchmarks, base): try: if base_val > 0: mark, _, total = gc_stats(bench, conf) @@ -200,22 +200,22 @@ def bar_chart_gc_relative(plt, configurations, percentile): mark_res.append(0) plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time - plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, all_benchmarks)) + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) plt.title("Relative gc times against " + configurations[0] + " at " + str(percentile) + " percentile") plt.legend() return plt -def bar_chart_gc_absolute(plt, configurations, percentile): +def bar_chart_gc_absolute(plt, configurations, benchmarks, percentile): plt.clf() plt.cla() - ind = np.arange(len(all_benchmarks)) + ind = np.arange(len(benchmarks)) conf_count = len(configurations) + 1 for i, conf in enumerate(configurations): res = [] mark_res = [] - for bench in all_benchmarks: + for bench in benchmarks: try: mark, _, total = gc_stats(bench, conf) res.append(np.percentile(total, percentile)) @@ -224,7 +224,7 @@ def bar_chart_gc_absolute(plt, configurations, percentile): res.append(0) plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time - plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, all_benchmarks)) + plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) plt.title("Garbage collector pause times at " + str(percentile) + " percentile") plt.legend() return plt @@ -334,16 +334,16 @@ def gc_pause_time_chart(plt, configurations, bench, limit=100): return plt -def print_table(configurations, data): +def print_table(configurations, benchmarks, data): leading = ['name'] for conf in configurations: leading.append(conf) print ','.join(leading) - for bench, res in zip(all_benchmarks, data): + for bench, res in zip(benchmarks, data): print ','.join([bench] + list(map(str, res))) -def write_md_table(file, configurations, data): +def write_md_table(file, configurations, benchmarks, data): header = ['name'] header.append(configurations[0]) for conf in configurations[1:]: @@ -360,7 +360,7 @@ def write_md_table(file, configurations, data): gmul = np.ones(len(configurations) - 1) gcount = np.zeros(len(configurations) - 1) - for bench, res0 in zip(all_benchmarks, data): + for bench, res0 in zip(benchmarks, data): base = res0[0] res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) file.write('|') @@ -383,7 +383,7 @@ def write_md_table(file, configurations, data): file.write("|\n") -def write_md_table_gc(file, configurations, mark_data, sweep_data, total_data): +def write_md_table_gc(file, configurations, benchmarks, mark_data, sweep_data, total_data): header = ['name', ""] header.append(configurations[0]) for conf in configurations[1:]: @@ -398,7 +398,7 @@ def write_md_table_gc(file, configurations, mark_data, sweep_data, total_data): file.write(' -- |') file.write('\n') - for bench, mark_res0, sweep_res0, total_res0 in zip(all_benchmarks, mark_data, sweep_data, total_data): + for bench, mark_res0, sweep_res0, total_res0 in zip(benchmarks, mark_data, sweep_data, total_data): for name, res0 in zip(["mark", "sweep", "total"], [mark_res0, sweep_res0, total_res0]): base = res0[0] res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) @@ -436,22 +436,22 @@ def chart_md(md_file, plt, rootdir, name): md_file.write("![Chart]({})\n\n".format(name)) -def write_md_file(rootdir, md_file, parent_configurations, configurations, gc_charts=True): +def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=True): md_file.write("# Summary\n") for p in [50, 90, 99]: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) - chart_md(md_file, bar_chart_relative(plt, configurations, p), rootdir, "relative_percentile_" + str(p) + ".png") - write_md_table(md_file, configurations, percentile(configurations, p)) + chart_md(md_file, bar_chart_relative(plt, configurations, benchmarks, p), rootdir, "relative_percentile_" + str(p) + ".png") + write_md_table(md_file, configurations, benchmarks, percentile(configurations, benchmarks, p)) if gc_charts: md_file.write("## GC time (ms) at {} percentile \n".format(p)) - chart_md(md_file, bar_chart_gc_relative(plt, configurations, p), rootdir, + chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, p), rootdir, "relative_gc_percentile_" + str(p) + ".png") - mark, sweep, total = percentile_gc(configurations, p) - write_md_table_gc(md_file, configurations, mark, sweep, total) + mark, sweep, total = percentile_gc(configurations, benchmarks, p) + write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total) md_file.write("# Individual benchmarks\n") - for bench in all_benchmarks: + for bench in benchmarks: if not any_run_exists(bench, configurations, 0): continue @@ -502,6 +502,7 @@ def any_run_exists(bench, configurations, run): parser = argparse.ArgumentParser() parser.add_argument("--comment", help="comment at the suffix of the report name") parser.add_argument("--gc", help="enable charts about garbage collector", action="store_true") + parser.add_argument("--benchmark", help="benchmarks use in comparision", action='append') parser.add_argument("comparisons", nargs='*', choices=results + ["all"], default="all") args = parser.parse_args() @@ -517,6 +518,12 @@ def any_run_exists(bench, configurations, run): if args.comment is not None: comment = args.comment + if args.benchmark != None: + benchmarks = [] + for b in args.benchmark: + benchmarks += filter(lambda s: s.startswith(b), all_benchmarks) + else: + benchmarks = all_benchmarks parent_configurations = [] for conf in configurations: @@ -531,6 +538,6 @@ def any_run_exists(bench, configurations, run): plt.rcParams["figure.figsize"] = [16.0, 12.0] mkdir(report_dir) with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: - write_md_file(report_dir, md_file, parent_configurations, configurations, args.gc) + write_md_file(report_dir, md_file, parent_configurations, configurations, benchmarks, args.gc) print report_dir From c2c4d0d01492caea0b08b60f6d932ec980a5a3ff Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 12:49:50 +0100 Subject: [PATCH 083/169] discover all existing benchmarks --- scripts/summary.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 6740c3d..189ff9c 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1,5 +1,5 @@ #!/usr/bin/env python2 -from run import all_benchmarks, mkdir, expand_wild_cards, generate_choices +from run import mkdir, expand_wild_cards, generate_choices import numpy as np import time @@ -487,6 +487,22 @@ def any_run_exists(bench, configurations, run): return exits +def discover_benchmarks(configurations): + benchmarks = [] + for conf in configurations: + parent_folders = next(os.walk(os.path.join("results", conf)))[1] + for pf in parent_folders: + if pf.startswith("size_"): + for child in next(os.walk(os.path.join("results", conf, pf)))[1]: + if child not in benchmarks: + benchmarks += [child] + else: + if pf not in benchmarks: + benchmarks += [pf] + + return benchmarks + + if __name__ == '__main__': all_configs = next(os.walk("results"))[1] # added size_ @@ -502,7 +518,7 @@ def any_run_exists(bench, configurations, run): parser = argparse.ArgumentParser() parser.add_argument("--comment", help="comment at the suffix of the report name") parser.add_argument("--gc", help="enable charts about garbage collector", action="store_true") - parser.add_argument("--benchmark", help="benchmarks use in comparision", action='append') + parser.add_argument("--benchmark", help="benchmarks to use in comparision", action='append') parser.add_argument("comparisons", nargs='*', choices=results + ["all"], default="all") args = parser.parse_args() @@ -518,13 +534,6 @@ def any_run_exists(bench, configurations, run): if args.comment is not None: comment = args.comment - if args.benchmark != None: - benchmarks = [] - for b in args.benchmark: - benchmarks += filter(lambda s: s.startswith(b), all_benchmarks) - else: - benchmarks = all_benchmarks - parent_configurations = [] for conf in configurations: if os.sep in conf: @@ -534,6 +543,15 @@ def any_run_exists(bench, configurations, run): if parent not in parent_configurations: parent_configurations += [parent] + all_benchmarks = discover_benchmarks(parent_configurations) + + if args.benchmark != None: + benchmarks = [] + for b in args.benchmark: + benchmarks += filter(lambda s: s.startswith(b), all_benchmarks) + else: + benchmarks = all_benchmarks + report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + comment + "/" plt.rcParams["figure.figsize"] = [16.0, 12.0] mkdir(report_dir) From 939e1a16d88666296fbd63074fd7b669087b8259 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 13:12:52 +0100 Subject: [PATCH 084/169] do compile --- scripts/run.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index b3a23df..b9fe32c 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -334,11 +334,11 @@ def single_run(to_run): continue root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) - # if sha1 != None: - # success = compile_scala_native(ref, sha1) - # if not success: - # compile_fail += [conf] - # continue + if sha1 != None: + success = compile_scala_native(ref, sha1) + if not success: + compile_fail += [conf] + continue if generalized_dir != root_dir: try: From 88a18f6af6b6280394eb7562d4a493af8dd41a41 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 14:08:43 +0100 Subject: [PATCH 085/169] gc pause time charts --- scripts/summary.py | 131 ++++++++++++++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 38 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 189ff9c..8bf62b9 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -33,6 +33,7 @@ def config_data(bench, conf): def gc_stats(bench, conf): + print bench, conf benchmark_dir = os.path.join("results", conf, bench) files = next(os.walk(benchmark_dir), [[], [], []])[2] runs = [] @@ -91,30 +92,56 @@ def gc_stats(bench, conf): return np.array(mark_times), np.array(sweep_times), np.array(gc_times) +def gc_stats_total(bench, conf): + _, _, total = gc_stats(bench, conf) + return total + + + def percentile_gc(configurations, benchmarks, percentile): out_mark = [] out_sweep = [] out_total = [] for bench in benchmarks: - res_mark = [] - res_sweep = [] - res_total = [] - for conf in configurations: - try: - mark, sweep, total = gc_stats(bench, conf) - res_mark.append(np.percentile(mark, percentile)) - res_sweep.append(np.percentile(sweep, percentile)) - res_total.append(np.percentile(total, percentile)) - except IndexError: - res_mark.append(0) - res_sweep.append(0) - res_total.append(0) - out_mark.append(res_mark) - out_sweep.append(res_sweep) - out_total.append(res_total) + res_mark, res_sweep, res_total = percentile_gc_bench(configurations, bench, percentile) + out_mark += [res_mark] + out_sweep += [res_sweep] + out_total += [res_total] return out_mark, out_sweep, out_total +def percentile_gc_bench(configurations, bench, p): + res_mark = [] + res_sweep = [] + res_total = [] + for conf in configurations: + try: + mark, sweep, total = gc_stats(bench, conf) + res_mark += [np.percentile(mark, p)] + res_sweep += [np.percentile(sweep, p)] + res_total += [np.percentile(total, p)] + except IndexError: + res_mark += [0] + res_sweep += [0] + res_total += [0] + return res_mark, res_sweep, res_total + + +def percentile_gc_bench_mark(configurations, bench, p): + mark, _, _ = percentile_gc_bench(configurations, bench, p) + return mark + + +def percentile_gc_bench_sweep(configurations, bench, p): + _, sweep, _ = percentile_gc_bench(configurations, bench, p) + return sweep + + +def percentile_gc_bench_total(configurations, bench, p): + _, _, total = percentile_gc_bench(configurations, bench, p) + return total + + def percentile(configurations, benchmarks, p): out = [] for bench in benchmarks: @@ -278,7 +305,7 @@ def sizes_per_conf(parent_configuration): return min_sizes, max_sizes, child_confs -def size_compare_chart(plt, parent_configurations, bench, p): +def size_compare_chart_generic(plt, parent_configurations, bench, get_percentile, p): plt.clf() plt.cla() for parent_conf in parent_configurations: @@ -290,50 +317,70 @@ def size_compare_chart(plt, parent_configurations, bench, p): equal_sizes += [min_size] equal_confs += [child_conf] - percentiles = percentile_bench(equal_confs, bench, p) + percentiles = get_percentile(equal_confs, bench, p) plt.plot(np.array(equal_sizes), percentiles, label=parent_conf) plt.legend() - plt.title("{} at {} percentile".format(bench, p)) + plt.xlim(xmin=0) plt.ylim(ymin=0) plt.xlabel("Heap Size (GB)") + + return plt + + +def size_compare_chart(plt, parent_configurations, bench, p): + plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_bench, p) + plt.title("{} at {} percentile".format(bench, p)) plt.ylabel("Run time (ms)") + return plt + + +def size_compare_chart_gc(plt, parent_configurations, bench, p): + plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_total, p) + plt.title("{}: GC pause time at {} percentile".format(bench, p)) + plt.ylabel("GC pause time (ms)") return plt -def percentiles_chart(plt, configurations, bench, limit=99): +def size_compare_chart_gc_mark(plt, parent_configurations, bench, p): + plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_mark, p) + plt.title("{}: GC mark time at {} percentile".format(bench, p)) + plt.ylabel("GC mark time (ms)") + return plt + +def size_compare_chart_gc_sweep(plt, parent_configurations, bench, p): + plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_sweep, p) + plt.title("{}: GC sweep time at {} percentile".format(bench, p)) + plt.ylabel("GC sweep time (ms)") + return plt + + +def percentiles_chart_generic(plt, configurations, bench, get_data, limit): plt.clf() plt.cla() for conf in configurations: - data = config_data(bench, conf) + data = get_data(bench, conf) if data.size > 0: percentiles = np.arange(0, limit) percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) plt.plot(percentiles, percvalue, label=conf) plt.legend() - plt.title(bench) plt.ylim(ymin=0) plt.xlabel("Percentile") - plt.ylabel("Run time (ms)") return plt +def percentiles_chart(plt, configurations, bench, limit=99): + plt = percentiles_chart_generic(plt, configurations, bench, config_data, limit) + plt.title(bench) + plt.ylabel("Run time (ms)") + return plt + def gc_pause_time_chart(plt, configurations, bench, limit=100): - plt.clf() - plt.cla() - for conf in configurations: - _, _, pauses = gc_stats(bench, conf) - if pauses.size > 0: - percentiles = np.arange(0, limit) - percvalue = np.array([np.percentile(pauses, perc) for perc in percentiles]) - plt.plot(percentiles, percvalue, label=conf) - plt.legend() + plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_total, limit) plt.title(bench + ": Garbage Collector Pause Times") - plt.ylim(ymin=0) - plt.xlabel("Percentile") plt.ylabel("GC pause time (ms)") return plt - def print_table(configurations, benchmarks, data): leading = ['name'] for conf in configurations: @@ -437,8 +484,9 @@ def chart_md(md_file, plt, rootdir, name): def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=True): + interesting_percentiles = [50, 90, 99] md_file.write("# Summary\n") - for p in [50, 90, 99]: + for p in interesting_percentiles: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) chart_md(md_file, bar_chart_relative(plt, configurations, benchmarks, p), rootdir, "relative_percentile_" + str(p) + ".png") write_md_table(md_file, configurations, benchmarks, percentile(configurations, benchmarks, p)) @@ -463,8 +511,15 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench if gc_charts: chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") - - for p in [50, 90, 99]: + for p in interesting_percentiles: + chart_md(md_file, size_compare_chart_gc(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart" + bench + "percentile_" + str(p) + ".png") + chart_md(md_file, size_compare_chart_gc_mark(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart_mark" + bench + "percentile_" + str(p) + ".png") + chart_md(md_file, size_compare_chart_gc_sweep(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart_sweep" + bench + "percentile_" + str(p) + ".png") + + for p in interesting_percentiles: chart_md(md_file, size_compare_chart(plt, parent_configurations, bench, p), rootdir, "size_chart_" + bench + "percentile_" + str(p) + ".png") From eeb7bc4663385b0eeb2883b46b6f1cb157be6ddc Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 14:16:58 +0100 Subject: [PATCH 086/169] combined size chart for gc --- scripts/summary.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 8bf62b9..f2ff1f9 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -33,7 +33,6 @@ def config_data(bench, conf): def gc_stats(bench, conf): - print bench, conf benchmark_dir = os.path.join("results", conf, bench) files = next(os.walk(benchmark_dir), [[], [], []])[2] runs = [] @@ -327,6 +326,31 @@ def size_compare_chart_generic(plt, parent_configurations, bench, get_percentile return plt +def size_compare_chart_gc_combined(plt, parent_configurations, bench, p): + plt.clf() + plt.cla() + for parent_conf in parent_configurations: + min_sizes, max_sizes, child_confs = sizes_per_conf(parent_conf) + equal_sizes = [] + equal_confs = [] + for min_size, max_size, child_conf in zip(min_sizes, max_sizes, child_confs): + if min_size == max_size: + equal_sizes += [min_size] + equal_confs += [child_conf] + + mark, _, total = percentile_gc_bench(equal_confs, bench, p) + plt.plot(np.array(equal_sizes), total, label=parent_conf + "-sweep") # total (look like sweep) + plt.plot(np.array(equal_sizes), mark, label=parent_conf + "-mark") # mark time + plt.legend() + plt.xlim(xmin=0) + plt.ylim(ymin=0) + plt.xlabel("Heap Size (GB)") + plt.title("{}: GC times at {} percentile".format(bench, p)) + plt.ylabel("Time (ms)") + + return plt + + def size_compare_chart(plt, parent_configurations, bench, p): plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_bench, p) plt.title("{} at {} percentile".format(bench, p)) @@ -512,12 +536,8 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") for p in interesting_percentiles: - chart_md(md_file, size_compare_chart_gc(plt, parent_configurations, bench, p), rootdir, + chart_md(md_file, size_compare_chart_gc_combined(plt, parent_configurations, bench, p), rootdir, "gc_size_chart" + bench + "percentile_" + str(p) + ".png") - chart_md(md_file, size_compare_chart_gc_mark(plt, parent_configurations, bench, p), rootdir, - "gc_size_chart_mark" + bench + "percentile_" + str(p) + ".png") - chart_md(md_file, size_compare_chart_gc_sweep(plt, parent_configurations, bench, p), rootdir, - "gc_size_chart_sweep" + bench + "percentile_" + str(p) + ".png") for p in interesting_percentiles: chart_md(md_file, size_compare_chart(plt, parent_configurations, bench, p), rootdir, From f6e4cd5d9b8b08a9c5b00d38e3f074343b2d0017 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 14:43:45 +0100 Subject: [PATCH 087/169] geometrical means for gc --- scripts/summary.py | 51 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index f2ff1f9..bce637d 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -446,11 +446,14 @@ def write_md_table(file, configurations, benchmarks, data): file.write('| __Geometrical mean:__|') for gm, count in zip(gmul, gcount): file.write('| |') - gmean = float(gm) ** (1.0 / count) - percent_diff = (gmean - 1) * 100 - precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ( - "" if percent_diff > 0 else "__") - file.write(precent_diff_cell) + if count > 0: + gmean = float(gm) ** (1.0 / count) + percent_diff = (gmean - 1) * 100 + precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ( + "" if percent_diff > 0 else "__") + file.write(precent_diff_cell) + else: + file.write(" ") file.write("|\n") @@ -469,8 +472,16 @@ def write_md_table_gc(file, configurations, benchmarks, mark_data, sweep_data, t file.write(' -- |') file.write('\n') + mark_gmul = np.ones(len(configurations) - 1) + mark_gcount = np.zeros(len(configurations) - 1) + sweep_gmul = np.ones(len(configurations) - 1) + sweep_gcount = np.zeros(len(configurations) - 1) + total_gmul = np.ones(len(configurations) - 1) + total_gcount = np.zeros(len(configurations) - 1) for bench, mark_res0, sweep_res0, total_res0 in zip(benchmarks, mark_data, sweep_data, total_data): - for name, res0 in zip(["mark", "sweep", "total"], [mark_res0, sweep_res0, total_res0]): + for name, res0, gmul, gcount in zip(["mark", "sweep", "total"], [mark_res0, sweep_res0, total_res0], + [mark_gmul, sweep_gmul, total_gmul], + [mark_gcount, sweep_gcount, total_gcount]): base = res0[0] res = [("%.4f" % base)] + sum(map(lambda x: cell(x, base), res0[1:]), []) @@ -483,6 +494,34 @@ def write_md_table_gc(file, configurations, benchmarks, mark_data, sweep_data, t file.write('|'.join(link + list([name]) + list(res))) file.write('|\n') + for i, d0 in enumerate(res0[1:]): + if d0 != 0 and base != 0: + gmul[i] *= (float(d0) / base) + gcount[i] += 1 + + for name, gmul, gcount in zip(["mark", "sweep", "total"], + [mark_gmul, sweep_gmul, total_gmul], + [mark_gcount, sweep_gcount, total_gcount]): + if name == "mark": + link = "__Geometrical mean:__" + else: + link = "" + + file.write('|' + link + '|' + name + '|') + for gm, count in zip(gmul, gcount): + file.write('| |') + if count > 0: + gmean = float(gm) ** (1.0 / count) + percent_diff = (gmean - 1) * 100 + precent_diff_cell = ("+" if percent_diff > 0 else "__") + ("%.2f" % percent_diff) + "%" + ( + "" if percent_diff > 0 else "__") + file.write(precent_diff_cell) + else: + file.write(" ") + file.write("|\n") + + + def cell(x, base): if base > 0: From b3b719448ca1020793475bdd2f66d2bdd7f1f250 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 28 Oct 2018 14:55:11 +0100 Subject: [PATCH 088/169] option to enable charts that are based on heapsize --- scripts/summary.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index bce637d..15855bd 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -546,7 +546,7 @@ def chart_md(md_file, plt, rootdir, name): md_file.write("![Chart]({})\n\n".format(name)) -def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=True): +def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=False, size_charts = False): interesting_percentiles = [50, 90, 99] md_file.write("# Summary\n") for p in interesting_percentiles: @@ -574,13 +574,15 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench if gc_charts: chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") - for p in interesting_percentiles: - chart_md(md_file, size_compare_chart_gc_combined(plt, parent_configurations, bench, p), rootdir, - "gc_size_chart" + bench + "percentile_" + str(p) + ".png") + if size_charts: + for p in interesting_percentiles: + chart_md(md_file, size_compare_chart_gc_combined(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart" + bench + "percentile_" + str(p) + ".png") - for p in interesting_percentiles: - chart_md(md_file, size_compare_chart(plt, parent_configurations, bench, p), rootdir, - "size_chart_" + bench + "percentile_" + str(p) + ".png") + if size_charts: + for p in interesting_percentiles: + chart_md(md_file, size_compare_chart(plt, parent_configurations, bench, p), rootdir, + "size_chart_" + bench + "percentile_" + str(p) + ".png") run = 3 while run >= 0 and not any_run_exists(bench, configurations, run): @@ -632,6 +634,7 @@ def discover_benchmarks(configurations): parser = argparse.ArgumentParser() parser.add_argument("--comment", help="comment at the suffix of the report name") parser.add_argument("--gc", help="enable charts about garbage collector", action="store_true") + parser.add_argument("--vssize", help="enable charts against heap size", action="store_true") parser.add_argument("--benchmark", help="benchmarks to use in comparision", action='append') parser.add_argument("comparisons", nargs='*', choices=results + ["all"], default="all") @@ -670,6 +673,6 @@ def discover_benchmarks(configurations): plt.rcParams["figure.figsize"] = [16.0, 12.0] mkdir(report_dir) with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: - write_md_file(report_dir, md_file, parent_configurations, configurations, benchmarks, args.gc) + write_md_file(report_dir, md_file, parent_configurations, configurations, benchmarks, args.gc, args.vssize) print report_dir From d378776a1c16932d0122614b986c82a7b01066ec Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 30 Oct 2018 08:23:32 +0100 Subject: [PATCH 089/169] sort by size when plotting --- scripts/summary.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/summary.py b/scripts/summary.py index 15855bd..1f7dbdf 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -316,6 +316,8 @@ def size_compare_chart_generic(plt, parent_configurations, bench, get_percentile equal_sizes += [min_size] equal_confs += [child_conf] + # sorts all by size in GB + equal_sizes, equal_confs = zip(*[(x,y) for x,y in sorted(zip(equal_sizes,equal_confs))]) percentiles = get_percentile(equal_confs, bench, p) plt.plot(np.array(equal_sizes), percentiles, label=parent_conf) plt.legend() @@ -338,6 +340,9 @@ def size_compare_chart_gc_combined(plt, parent_configurations, bench, p): equal_sizes += [min_size] equal_confs += [child_conf] + # sorts all by size in GB + equal_sizes, equal_confs = zip(*[(x,y) for x,y in sorted(zip(equal_sizes,equal_confs))]) + mark, _, total = percentile_gc_bench(equal_confs, bench, p) plt.plot(np.array(equal_sizes), total, label=parent_conf + "-sweep") # total (look like sweep) plt.plot(np.array(equal_sizes), mark, label=parent_conf + "-mark") # mark time From 0c2e24247e4835ecbd05a7bfbad0b507502ff542 Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 30 Oct 2018 17:22:19 +0100 Subject: [PATCH 090/169] make root dir --- scripts/run.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/run.py b/scripts/run.py index b9fe32c..7314bc6 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -334,6 +334,8 @@ def single_run(to_run): continue root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) + mkdir(root_dir) + if sha1 != None: success = compile_scala_native(ref, sha1) if not success: From 9bb344989168a8acec2c28c1fdc65261477b6b11 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 3 Nov 2018 17:34:07 +0100 Subject: [PATCH 091/169] refactored bar_chart --- scripts/summary.py | 57 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 1f7dbdf..082c427 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -106,9 +106,22 @@ def percentile_gc(configurations, benchmarks, percentile): out_mark += [res_mark] out_sweep += [res_sweep] out_total += [res_total] + + return out_mark, out_sweep, out_total + +def total_gc(configurations, benchmarks): + out_mark = [] + out_sweep = [] + out_total = [] + for bench in benchmarks: + res_mark, res_sweep, res_total = total_gc_bench(configurations, bench) + out_mark += [res_mark] + out_sweep += [res_sweep] + out_total += [res_total] return out_mark, out_sweep, out_total + def percentile_gc_bench(configurations, bench, p): res_mark = [] res_sweep = [] @@ -126,6 +139,23 @@ def percentile_gc_bench(configurations, bench, p): return res_mark, res_sweep, res_total +def total_gc_bench(configurations, bench): + res_mark = [] + res_sweep = [] + res_total = [] + for conf in configurations: + try: + mark, sweep, total = gc_stats(bench, conf) + res_mark += [np.sum(mark)] + res_sweep += [np.sum(sweep)] + res_total += [np.sum(total)] + except IndexError: + res_mark += [0] + res_sweep += [0] + res_total += [0] + return res_mark, res_sweep, res_total + + def percentile_gc_bench_mark(configurations, bench, p): mark, _, _ = percentile_gc_bench(configurations, bench, p) return mark @@ -158,16 +188,16 @@ def percentile_bench(configurations, bench, p): return res -def bar_chart_relative(plt, configurations, benchmarks, percentile): +def bar_chart_relative(plt, configurations, benchmarks, percentile, data): plt.clf() plt.cla() ind = np.arange(len(benchmarks)) conf_count = len(configurations) + 1 base = [] ref = [] - for bench in benchmarks: + for bench_data in data: try: - base.append(np.percentile(config_data(bench, configurations[0]), percentile)) + base.append(bench_data[0]) ref.append(1.0) except IndexError: base.append(0) @@ -175,13 +205,14 @@ def bar_chart_relative(plt, configurations, benchmarks, percentile): plt.bar(ind * conf_count, ref, label=configurations[0]) for i, conf in enumerate(configurations[1:]): + conf_idx = i + 1 res = [] - for bench, base_val in zip(benchmarks, base): + for bench_idx, (bench, base_val) in enumerate(zip(benchmarks, base)): try: - res.append(np.percentile(config_data(bench, conf), percentile) / base_val) + res.append(data[bench_idx][conf_idx] / base_val) except IndexError: res.append(0) - plt.bar(ind * conf_count + i + 1, res, label=conf) + plt.bar(ind * conf_count + conf_idx, res, label=conf) plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) plt.title("Relative test execution times against " + configurations[0] + " at " + str(percentile) + " percentile") plt.legend() @@ -556,11 +587,17 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench md_file.write("# Summary\n") for p in interesting_percentiles: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) - chart_md(md_file, bar_chart_relative(plt, configurations, benchmarks, p), rootdir, "relative_percentile_" + str(p) + ".png") - write_md_table(md_file, configurations, benchmarks, percentile(configurations, benchmarks, p)) + data = percentile(configurations, benchmarks, p) + chart_md(md_file, bar_chart_relative(plt, configurations, benchmarks, p, data), rootdir, "relative_percentile_" + str(p) + ".png") + write_md_table(md_file, configurations, benchmarks, data) - if gc_charts: - md_file.write("## GC time (ms) at {} percentile \n".format(p)) + if gc_charts: + md_file.write("## Total GC time (ms) \n") + mark, sweep, total = total_gc(configurations, benchmarks) + write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total) + + for p in interesting_percentiles: + md_file.write("## GC pause time (ms) at {} percentile \n".format(p)) chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, p), rootdir, "relative_gc_percentile_" + str(p) + ".png") mark, sweep, total = percentile_gc(configurations, benchmarks, p) From bf1a45f7245a268393cedaf2b534c7053eddedc0 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 3 Nov 2018 18:14:03 +0100 Subject: [PATCH 092/169] total and pause time charts --- scripts/summary.py | 73 ++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 082c427..00242f4 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -188,16 +188,16 @@ def percentile_bench(configurations, bench, p): return res -def bar_chart_relative(plt, configurations, benchmarks, percentile, data): +def bar_chart_relative(plt, configurations, benchmarks, data): plt.clf() plt.cla() ind = np.arange(len(benchmarks)) conf_count = len(configurations) + 1 base = [] ref = [] - for bench_data in data: + for bench_idx, bench in enumerate(benchmarks): try: - base.append(bench_data[0]) + base.append(data[bench_idx][0]) ref.append(1.0) except IndexError: base.append(0) @@ -209,17 +209,31 @@ def bar_chart_relative(plt, configurations, benchmarks, percentile, data): res = [] for bench_idx, (bench, base_val) in enumerate(zip(benchmarks, base)): try: - res.append(data[bench_idx][conf_idx] / base_val) + if(base_val > 0): + res.append(data[bench_idx][conf_idx] / base_val) + else: + res.append(0.0) except IndexError: res.append(0) plt.bar(ind * conf_count + conf_idx, res, label=conf) plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) - plt.title("Relative test execution times against " + configurations[0] + " at " + str(percentile) + " percentile") plt.legend() return plt -def bar_chart_gc_relative(plt, configurations, benchmarks, percentile): +def relative_execution_times(plt, configurations, benchmarks, data): + plt = bar_chart_relative(plt, configurations, benchmarks, data) + plt.title("Relative test execution times against " + configurations[0] + " at " + str(percentile) + " percentile") + return plt + + +def relative_gc_pauses(plt, configurations, benchmarks, data): + plt = bar_chart_relative(plt, configurations, benchmarks, data) + plt.title("Relative GC pauses against " + configurations[0] + " at " + str(percentile) + " percentile") + return plt + + +def bar_chart_gc_relative(plt, configurations, benchmarks, mark_data, total_data): plt.clf() plt.cla() ind = np.arange(len(benchmarks)) @@ -227,13 +241,14 @@ def bar_chart_gc_relative(plt, configurations, benchmarks, percentile): base = [] ref = [] mark_ref = [] - for bench in benchmarks: - try: - mark, _, total = gc_stats(bench, configurations[0]) - base.append(np.percentile(total, percentile)) + for bench_idx, bench in enumerate(benchmarks): + mark = mark_data[bench_idx][0] + total = total_data[bench_idx][0] + if total > 0: + base.append(total) ref.append(1.0) - mark_ref.append(np.percentile(mark / total, percentile)) - except IndexError: + mark_ref.append(mark / total) + else: base.append(0) ref.append(0.0) mark_ref.append(0.0) @@ -241,24 +256,23 @@ def bar_chart_gc_relative(plt, configurations, benchmarks, percentile): plt.bar(ind * conf_count, mark_ref, label=configurations[0] + "-mark") # mark time for i, conf in enumerate(configurations[1:]): + conf_idx = i + 1 res = [] mark_res = [] - for bench, base_val in zip(benchmarks, base): - try: - if base_val > 0: - mark, _, total = gc_stats(bench, conf) - res.append(np.percentile(total, percentile) / base_val) - mark_res.append(np.percentile(mark, percentile) / base_val) - else: - res.append(0) - mark_res.append(0) - except IndexError: + for bench_idx, (bench, base_val) in enumerate(zip(benchmarks, base)): + if base_val > 0: + mark, _, total = gc_stats(bench, conf) + mark = mark_data[bench_idx][conf_idx] + total = total_data[bench_idx][conf_idx] + res.append(np.array(total) / base_val) + mark_res.append(np.array(mark) / base_val) + else: res.append(0) mark_res.append(0) plt.bar(ind * conf_count + i + 1, res, label=conf + "-sweep") # total (look like sweep) plt.bar(ind * conf_count + i + 1, mark_res, label=conf + "-mark") # mark time plt.xticks((ind * conf_count + (conf_count - 1) / 2.0), map(benchmark_short_name, benchmarks)) - plt.title("Relative gc times against " + configurations[0] + " at " + str(percentile) + " percentile") + plt.title("Relative gc times against " + configurations[0]) plt.legend() return plt @@ -582,26 +596,29 @@ def chart_md(md_file, plt, rootdir, name): md_file.write("![Chart]({})\n\n".format(name)) -def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=False, size_charts = False): +def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=False, size_charts=False): interesting_percentiles = [50, 90, 99] md_file.write("# Summary\n") for p in interesting_percentiles: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) data = percentile(configurations, benchmarks, p) - chart_md(md_file, bar_chart_relative(plt, configurations, benchmarks, p, data), rootdir, "relative_percentile_" + str(p) + ".png") + chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data), rootdir, + "relative_percentile_" + str(p) + ".png") write_md_table(md_file, configurations, benchmarks, data) if gc_charts: md_file.write("## Total GC time (ms) \n") mark, sweep, total = total_gc(configurations, benchmarks) + chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, mark, total), rootdir, + "relative_gc_total.png") write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total) for p in interesting_percentiles: md_file.write("## GC pause time (ms) at {} percentile \n".format(p)) - chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, p), rootdir, + _, _, total = percentile_gc(configurations, benchmarks, p) + chart_md(md_file, relative_gc_pauses(plt, configurations, benchmarks, total), rootdir, "relative_gc_percentile_" + str(p) + ".png") - mark, sweep, total = percentile_gc(configurations, benchmarks, p) - write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total) + write_md_table(md_file, configurations, benchmarks, total) md_file.write("# Individual benchmarks\n") for bench in benchmarks: From e29635b4a613279dc0b81831842639a93023138d Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 3 Nov 2018 19:17:03 +0100 Subject: [PATCH 093/169] more size charts --- scripts/summary.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 00242f4..568a8af 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -373,7 +373,7 @@ def size_compare_chart_generic(plt, parent_configurations, bench, get_percentile return plt -def size_compare_chart_gc_combined(plt, parent_configurations, bench, p): +def size_compare_chart_gc_combined(plt, parent_configurations, bench): plt.clf() plt.cla() for parent_conf in parent_configurations: @@ -388,14 +388,14 @@ def size_compare_chart_gc_combined(plt, parent_configurations, bench, p): # sorts all by size in GB equal_sizes, equal_confs = zip(*[(x,y) for x,y in sorted(zip(equal_sizes,equal_confs))]) - mark, _, total = percentile_gc_bench(equal_confs, bench, p) + mark, _, total = total_gc_bench(equal_confs, bench) plt.plot(np.array(equal_sizes), total, label=parent_conf + "-sweep") # total (look like sweep) plt.plot(np.array(equal_sizes), mark, label=parent_conf + "-mark") # mark time plt.legend() plt.xlim(xmin=0) plt.ylim(ymin=0) plt.xlabel("Heap Size (GB)") - plt.title("{}: GC times at {} percentile".format(bench, p)) + plt.title("{}: GC total time".format(bench)) plt.ylabel("Time (ms)") return plt @@ -417,13 +417,13 @@ def size_compare_chart_gc(plt, parent_configurations, bench, p): def size_compare_chart_gc_mark(plt, parent_configurations, bench, p): plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_mark, p) - plt.title("{}: GC mark time at {} percentile".format(bench, p)) + plt.title("{}: GC mark pause time at {} percentile".format(bench, p)) plt.ylabel("GC mark time (ms)") return plt def size_compare_chart_gc_sweep(plt, parent_configurations, bench, p): plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_sweep, p) - plt.title("{}: GC sweep time at {} percentile".format(bench, p)) + plt.title("{}: GC sweep pause time at {} percentile".format(bench, p)) plt.ylabel("GC sweep time (ms)") return plt @@ -635,8 +635,16 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench "gc_pause_times_" + bench + ".png") if size_charts: for p in interesting_percentiles: - chart_md(md_file, size_compare_chart_gc_combined(plt, parent_configurations, bench, p), rootdir, - "gc_size_chart" + bench + "percentile_" + str(p) + ".png") + chart_md(md_file, size_compare_chart_gc_mark(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart" + bench + "percentile_" + str(p) + "_mark.png") + for p in interesting_percentiles: + chart_md(md_file, size_compare_chart_gc_sweep(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart" + bench + "percentile_" + str(p) + "_sweep.png") + for p in interesting_percentiles: + chart_md(md_file, size_compare_chart_gc_sweep(plt, parent_configurations, bench, p), rootdir, + "gc_size_chart" + bench + "percentile_" + str(p) + "_total.png") + chart_md(md_file, size_compare_chart_gc_combined(plt, parent_configurations, bench), rootdir, + "gc_size_chart_total" + bench + ".png") if size_charts: for p in interesting_percentiles: From c271137e87f4a3b43097331f0882e976116972e9 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 3 Nov 2018 19:19:33 +0100 Subject: [PATCH 094/169] ignore empty bars --- scripts/summary.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 568a8af..20c5e7b 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -197,10 +197,15 @@ def bar_chart_relative(plt, configurations, benchmarks, data): ref = [] for bench_idx, bench in enumerate(benchmarks): try: - base.append(data[bench_idx][0]) - ref.append(1.0) + base_val = data[bench_idx][0] + if base_val > 0: + base.append(base_val) + ref.append(1.0) + else: + base.append(0.0) + ref.append(0.0) except IndexError: - base.append(0) + base.append(0.0) ref.append(0.0) plt.bar(ind * conf_count, ref, label=configurations[0]) @@ -209,7 +214,7 @@ def bar_chart_relative(plt, configurations, benchmarks, data): res = [] for bench_idx, (bench, base_val) in enumerate(zip(benchmarks, base)): try: - if(base_val > 0): + if base_val > 0: res.append(data[bench_idx][conf_idx] / base_val) else: res.append(0.0) From 52dbcf5018df4fed03e44434239d2f9f20ca35fe Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 3 Nov 2018 20:54:31 +0100 Subject: [PATCH 095/169] fix chart titles --- scripts/summary.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 20c5e7b..0c5f23a 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -226,15 +226,15 @@ def bar_chart_relative(plt, configurations, benchmarks, data): return plt -def relative_execution_times(plt, configurations, benchmarks, data): +def relative_execution_times(plt, configurations, benchmarks, data, p): plt = bar_chart_relative(plt, configurations, benchmarks, data) - plt.title("Relative test execution times against " + configurations[0] + " at " + str(percentile) + " percentile") + plt.title("Relative test execution times against " + configurations[0] + " at " + str(p) + " percentile") return plt -def relative_gc_pauses(plt, configurations, benchmarks, data): +def relative_gc_pauses(plt, configurations, benchmarks, data, p): plt = bar_chart_relative(plt, configurations, benchmarks, data) - plt.title("Relative GC pauses against " + configurations[0] + " at " + str(percentile) + " percentile") + plt.title("Relative GC pauses against " + configurations[0] + " at " + str(p) + " percentile") return plt @@ -607,7 +607,7 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench for p in interesting_percentiles: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) data = percentile(configurations, benchmarks, p) - chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data), rootdir, + chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data, p), rootdir, "relative_percentile_" + str(p) + ".png") write_md_table(md_file, configurations, benchmarks, data) @@ -621,7 +621,7 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench for p in interesting_percentiles: md_file.write("## GC pause time (ms) at {} percentile \n".format(p)) _, _, total = percentile_gc(configurations, benchmarks, p) - chart_md(md_file, relative_gc_pauses(plt, configurations, benchmarks, total), rootdir, + chart_md(md_file, relative_gc_pauses(plt, configurations, benchmarks, total, p), rootdir, "relative_gc_percentile_" + str(p) + ".png") write_md_table(md_file, configurations, benchmarks, total) From 1c89529f2ea2a09d35cc0bedb5ee2d8e868abb8f Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 3 Nov 2018 21:53:22 +0100 Subject: [PATCH 096/169] parse gc events --- scripts/summary.py | 117 ++++++++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 38 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 0c5f23a..81bad67 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -48,49 +48,90 @@ def gc_stats(bench, conf): try: file = os.path.join("results", conf, bench, run) with open(file) as data: - # analise header - mark_index = -1 - sweep_index = -1 - mark_to_ms = 0 - sweep_to_ms = 0 - - unit2div = dict(ms=1, us=1000, ns=1000 * 1000) - - header = data.readline().strip() - for i, h in enumerate(header.split(',')): - arr = h.rsplit('_', 1) - if len(arr) != 2: - continue - prefix = arr[0] - unit = arr[1] - - if prefix == "mark_time": - mark_index = i - mark_to_ms = unit2div[unit] - elif prefix == "sweep_time": - sweep_index = i - sweep_to_ms = unit2div[unit] - - if mark_index == -1: - print "Header does not have mark_time_", header, "at", file - if sweep_index == -1: - print "Header does not have sweep_time_", header, "at", file - if mark_index == -1 or sweep_index == -1: - continue - - for line in data.readlines(): - arr = line.split(",") - # in ms - mark_time = float(arr[mark_index]) / mark_to_ms - mark_times.append(mark_time) - sweep_time = float(arr[sweep_index]) / sweep_to_ms - sweep_times.append(sweep_time) - gc_times.append(mark_time + sweep_time) + mark, sweep, total = gc_parse_file(data, file) + mark_times += mark + sweep_times += sweep + gc_times += total except IOError: pass return np.array(mark_times), np.array(sweep_times), np.array(gc_times) +def gc_parse_file(data, file): + header = data.readline().strip() + if header.startswith("event_type,"): + return parse_gc_events(data, file, header) + else: + return parse_gc_tabular(data, file, header) + + +def parse_gc_events(data, file, header): + mark_times = [] + sweep_times = [] + gc_times = [] + event_type_index = 0 + time_ns_index = -1 + ns_to_ms_div = 1000 * 1000 + for i, h in enumerate(header.split(',')): + if h == "time_ns": + time_ns_index = i + if time_ns_index == -1: + print "Header does not have time_ns", header, "at", file + return mark_times, sweep_times, gc_times + + for line in data.readlines(): + arr = line.split(",") + event = arr[event_type_index] + time = float(arr[time_ns_index]) / ns_to_ms_div + if event == "mark": + mark_times += [time] + elif event == "sweep": + sweep_times += [time] + gc_times += [time] + + return mark_times, sweep_times, gc_times + + +def parse_gc_tabular(data, file, header): + mark_times = [] + sweep_times = [] + gc_times = [] + # analise header + mark_index = -1 + sweep_index = -1 + mark_to_ms = 0 + sweep_to_ms = 0 + unit2div = dict(ms=1, us=1000, ns=1000 * 1000) + for i, h in enumerate(header.split(',')): + arr = h.rsplit('_', 1) + if len(arr) != 2: + continue + prefix = arr[0] + unit = arr[1] + + if prefix == "mark_time": + mark_index = i + mark_to_ms = unit2div[unit] + elif prefix == "sweep_time": + sweep_index = i + sweep_to_ms = unit2div[unit] + if mark_index == -1: + print "Header does not have mark_time_", header, "at", file + if sweep_index == -1: + print "Header does not have sweep_time_", header, "at", file + if mark_index == -1 or sweep_index == -1: + return mark_times, sweep_times, gc_times + for line in data.readlines(): + arr = line.split(",") + # in ms + mark_time = float(arr[mark_index]) / mark_to_ms + mark_times.append(mark_time) + sweep_time = float(arr[sweep_index]) / sweep_to_ms + sweep_times.append(sweep_time) + gc_times.append(mark_time + sweep_time) + return mark_times, sweep_times, gc_times + + def gc_stats_total(bench, conf): _, _, total = gc_stats(bench, conf) return total From 413dc23f4896599a8d0470073f422bd1f9b3f429 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 5 Nov 2018 09:44:51 +0100 Subject: [PATCH 097/169] use array.append for better performance --- scripts/summary.py | 76 +++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 81bad67..eb3ef40 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -15,7 +15,7 @@ def config_data(bench, conf): for file in files: if "." not in file: # regular benchmark data - runs += [file] + runs.append(file) out = [] for run in runs: @@ -39,7 +39,7 @@ def gc_stats(bench, conf): for file in files: if file.endswith(".gc.csv"): # gc stats data - runs += [file] + runs.append(file) mark_times = [] sweep_times = [] @@ -84,10 +84,10 @@ def parse_gc_events(data, file, header): event = arr[event_type_index] time = float(arr[time_ns_index]) / ns_to_ms_div if event == "mark": - mark_times += [time] + mark_times.append(time) elif event == "sweep": - sweep_times += [time] - gc_times += [time] + sweep_times.append(time) + gc_times.append(time) return mark_times, sweep_times, gc_times @@ -144,9 +144,9 @@ def percentile_gc(configurations, benchmarks, percentile): out_total = [] for bench in benchmarks: res_mark, res_sweep, res_total = percentile_gc_bench(configurations, bench, percentile) - out_mark += [res_mark] - out_sweep += [res_sweep] - out_total += [res_total] + out_mark.append(res_mark) + out_sweep.append(res_sweep) + out_total.append(res_total) return out_mark, out_sweep, out_total @@ -156,9 +156,9 @@ def total_gc(configurations, benchmarks): out_total = [] for bench in benchmarks: res_mark, res_sweep, res_total = total_gc_bench(configurations, bench) - out_mark += [res_mark] - out_sweep += [res_sweep] - out_total += [res_total] + out_mark.append(res_mark) + out_sweep.append(res_sweep) + out_total.append(res_total) return out_mark, out_sweep, out_total @@ -170,13 +170,13 @@ def percentile_gc_bench(configurations, bench, p): for conf in configurations: try: mark, sweep, total = gc_stats(bench, conf) - res_mark += [np.percentile(mark, p)] - res_sweep += [np.percentile(sweep, p)] - res_total += [np.percentile(total, p)] + res_mark.append(np.percentile(mark, p)) + res_sweep.append(np.percentile(sweep, p)) + res_total.append(np.percentile(total, p)) except IndexError: - res_mark += [0] - res_sweep += [0] - res_total += [0] + res_mark.append(0) + res_sweep.append(0) + res_total.append(0) return res_mark, res_sweep, res_total @@ -187,13 +187,13 @@ def total_gc_bench(configurations, bench): for conf in configurations: try: mark, sweep, total = gc_stats(bench, conf) - res_mark += [np.sum(mark)] - res_sweep += [np.sum(sweep)] - res_total += [np.sum(total)] + res_mark.append(np.sum(mark)) + res_sweep.append(np.sum(sweep)) + res_total.append(np.sum(total)) except IndexError: - res_mark += [0] - res_sweep += [0] - res_total += [0] + res_mark.append(0) + res_sweep.append(0) + res_total.append(0) return res_mark, res_sweep, res_total @@ -215,7 +215,7 @@ def percentile_gc_bench_total(configurations, bench, p): def percentile(configurations, benchmarks, p): out = [] for bench in benchmarks: - out += [percentile_bench(configurations, bench, p)] + out.append(percentile_bench(configurations, bench, p)) return out @@ -223,9 +223,9 @@ def percentile_bench(configurations, bench, p): res = [] for conf in configurations: try: - res += [np.percentile(config_data(bench, conf), p)] + res.append(np.percentile(config_data(bench, conf), p)) except IndexError: - res += [0] + res.append(0) return res @@ -389,9 +389,9 @@ def sizes_per_conf(parent_configuration): for f in folders: if f.startswith("size_"): parts = f[len("size_"):].split("-") - min_sizes += [to_gb(parts[0])] - max_sizes += [to_gb(parts[1])] - child_confs += [os.path.join(parent_configuration,f)] + min_sizes.append(to_gb(parts[0])) + max_sizes.append(to_gb(parts[1])) + child_confs.append(os.path.join(parent_configuration,f)) return min_sizes, max_sizes, child_confs @@ -404,8 +404,8 @@ def size_compare_chart_generic(plt, parent_configurations, bench, get_percentile equal_confs = [] for min_size, max_size, child_conf in zip(min_sizes, max_sizes, child_confs): if min_size == max_size: - equal_sizes += [min_size] - equal_confs += [child_conf] + equal_sizes.append(min_size) + equal_confs.append(child_conf) # sorts all by size in GB equal_sizes, equal_confs = zip(*[(x,y) for x,y in sorted(zip(equal_sizes,equal_confs))]) @@ -428,8 +428,8 @@ def size_compare_chart_gc_combined(plt, parent_configurations, bench): equal_confs = [] for min_size, max_size, child_conf in zip(min_sizes, max_sizes, child_confs): if min_size == max_size: - equal_sizes += [min_size] - equal_confs += [child_conf] + equal_sizes.append(min_size) + equal_confs.append(child_conf) # sorts all by size in GB equal_sizes, equal_confs = zip(*[(x,y) for x,y in sorted(zip(equal_sizes,equal_confs))]) @@ -724,10 +724,10 @@ def discover_benchmarks(configurations): if pf.startswith("size_"): for child in next(os.walk(os.path.join("results", conf, pf)))[1]: if child not in benchmarks: - benchmarks += [child] + benchmarks.append(child) else: if pf not in benchmarks: - benchmarks += [pf] + benchmarks.append(pf) return benchmarks @@ -740,7 +740,7 @@ def discover_benchmarks(configurations): subfolders = next(os.walk(folder))[1] for size in subfolders: if size.startswith("size_"): - all_configs += [os.path.join(conf, size)] + all_configs.append(os.path.join(conf, size)) results = generate_choices(all_configs) @@ -758,7 +758,7 @@ def discover_benchmarks(configurations): configurations = all_configs else: for arg in args.comparisons: - configurations += [expand_wild_cards(arg)] + configurations.append(expand_wild_cards(arg)) comment = "_vs_".join(configurations).replace(os.sep, "_") if args.comment is not None: @@ -771,7 +771,7 @@ def discover_benchmarks(configurations): else: parent = conf if parent not in parent_configurations: - parent_configurations += [parent] + parent_configurations.append(parent) all_benchmarks = discover_benchmarks(parent_configurations) From 05f2ec7afc892e324525cd1d869b096e2f154b96 Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 8 Nov 2018 12:08:01 +0100 Subject: [PATCH 098/169] rename to JAVA_ARGS --- confs/jvm/run | 2 +- scripts/run.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confs/jvm/run b/confs/jvm/run index d8bf304..3700501 100644 --- a/confs/jvm/run +++ b/confs/jvm/run @@ -1 +1 @@ -java $JAVA_SIZE_ARGS -classpath target/scala-2.11/classes:$HOME/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.11.12.jar $BENCH +java $JAVA_ARGS -classpath target/scala-2.11/classes:$HOME/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.11.12.jar $BENCH diff --git a/scripts/run.py b/scripts/run.py index 7314bc6..91537d6 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -215,7 +215,7 @@ def single_run(to_run): cmd = [] for token in unexpanded_cmd: - if token == "$JAVA_SIZE_ARGS": + if token == "$JAVA_ARGS": if minsize != "default": cmd += ["-Xms" + minsize] if maxsize != "default": From 149f40504955aef89a54abcc3ff9ab22208627ab Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 8 Nov 2018 12:14:33 +0100 Subject: [PATCH 099/169] log java GC pause times with "--gc" --- scripts/run.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/run.py b/scripts/run.py index 91537d6..7f63b1b 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -220,6 +220,8 @@ def single_run(to_run): cmd += ["-Xms" + minsize] if maxsize != "default": cmd += ["-Xmx" + maxsize] + if gcstats: + cmd += ["-XX:+PrintGCApplicationStoppedTime", "-Xloggc:" + os.path.join(resultsdir, str(n) + ".gc.txt")] else: cmd += [token] From 5fcf02fb93a26bc3892e1b0d6cf20f27e24294fc Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 15 Nov 2018 09:17:14 +0100 Subject: [PATCH 100/169] trace and debug options for the gc --- scripts/run.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 7f63b1b..def80fd 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -93,9 +93,14 @@ def compile_scala_native(ref, sha1): return False -def compile(bench, compilecmd): +def compile(conf, bench, compilecmd, debug, trace): cmd = [sbt, '-no-colors', '-J-Xmx2G', 'clean'] cmd.append('set mainClass in Compile := Some("{}")'.format(bench)) + if conf.startswith("scala-native"): + if debug or trace: + cmd.append('set nativeCompileOptions ++= Seq("-g", "-DDEBUG_ASSERT")') + if trace: + cmd.append('set nativeCompileOptions +="-DDEBUG_PRINT"') cmd.append(compilecmd) return run(cmd) @@ -250,6 +255,8 @@ def single_run(to_run): parser.add_argument("--gc", help="gather gc statistics", action="store_true") parser.add_argument("--overwrite", help="overwrite old results", action="store_true") parser.add_argument("--append", help="do not delete old data", action="store_true") + parser.add_argument("--gcdebug", help="enable debug for GCs", action="store_true") + parser.add_argument("--gctrace", help="verbose logging for GCs to stdout", action="store_true") parser.add_argument("set", nargs='*', default="default") args = parser.parse_args() print args @@ -308,6 +315,10 @@ def single_run(to_run): suffix += "-p" + str(par) if args.gc: suffix += "-gc" + if args.gcdebug: + suffix += "-gcdebug" + if args.gctrace: + suffix += "-gctrace" if args.suffix is not None: suffix += "_" + args.suffix @@ -391,7 +402,7 @@ def single_run(to_run): else: os.remove('project/plugins.sbt') - compile(bench, compilecmd) + compile(conf, bench, compilecmd, args.gcdebug, args.gctrace) resultsdir = os.path.join(sized_dir, bench) print "results in", resultsdir From 33a38493c3a97af51f800bb492ee9bc6e13228b9 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 21 Nov 2018 11:01:03 +0100 Subject: [PATCH 101/169] gave figures x2 resolution in each direction --- scripts/summary.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index eb3ef40..5d3e9ae 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -783,7 +783,8 @@ def discover_benchmarks(configurations): benchmarks = all_benchmarks report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + comment + "/" - plt.rcParams["figure.figsize"] = [16.0, 12.0] + plt.rcParams["figure.figsize"] = [32.0, 24.0] + plt.rcParams["font.size"] = 20.0 mkdir(report_dir) with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: write_md_file(report_dir, md_file, parent_configurations, configurations, benchmarks, args.gc, args.vssize) From 9982a53ca2edc91c99c776e0c95944b89b4a2041 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 23 Nov 2018 09:08:03 +0100 Subject: [PATCH 102/169] do not fetch on @HEAD --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index def80fd..d8ecb84 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -299,7 +299,7 @@ def single_run(to_run): should_fetch = False for conf in configurations: - if '@' in conf: + if '@' in conf and not conf.endswith("@HEAD"): should_fetch = True break From 87a6b55b470b98166439eb60b560ad6d59005fc0 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 23 Nov 2018 09:09:30 +0100 Subject: [PATCH 103/169] gc threads setting --- scripts/run.py | 55 +++++++++++++++++++++++++++++++--------------- scripts/summary.py | 8 +++---- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index d8ecb84..fdb0256 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -5,6 +5,8 @@ import shutil as sh import argparse import multiprocessing as mp +import itertools + def mkdir(path): @@ -201,6 +203,7 @@ def single_run(to_run): gcstats = to_run["gcstats"] minsize = to_run["size"][0] maxsize = to_run["size"][1] + gcThreads = to_run["gcThreads"] print('--- run {}/{}'.format(n, runs)) my_env = os.environ.copy() @@ -217,6 +220,10 @@ def single_run(to_run): elif "SCALANATIVE_MAX_HEAP_SIZE" in my_env: del my_env["SCALANATIVE_MAX_HEAP_SIZE"] + if gcThreads != "default": + my_env["SCALANATIVE_GC_THREADS"] = gcThreads + elif "SCALANATIVE_GC_THREADS" in my_env: + del my_env["SCALANATIVE_GC_THREADS"] cmd = [] for token in unexpanded_cmd: @@ -251,6 +258,7 @@ def single_run(to_run): parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) parser.add_argument("--benchmark", help="benchmarks to run", action='append') parser.add_argument("--size", help="different size settings to use", action='append') + parser.add_argument("--gcthreads", help="different number of garbage collection threads to use", action='append') parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) parser.add_argument("--gc", help="gather gc statistics", action="store_true") parser.add_argument("--overwrite", help="overwrite old results", action="store_true") @@ -274,8 +282,8 @@ def single_run(to_run): if args.size != None: sizes = [] - for size_str in args.size: - parsed = size_parse(size_str) + for subconf_str in args.size: + parsed = size_parse(subconf_str) if parsed == ["default", "default"]: sizes = [parsed] + sizes else: @@ -283,6 +291,11 @@ def single_run(to_run): else: sizes = [["default", "default"]] + if args.gcthreads != None: + gcThreadCounts = args.gcthreads + else: + gcThreadCounts = ["default"] + configurations = [] for choice in args.set: expanded = expand_wild_cards(choice) @@ -296,6 +309,7 @@ def single_run(to_run): print "configurations:", configurations print "benchmarks:", benchmarks print "heap sizes:", sizes + print "GC thread counts:", gcThreadCounts should_fetch = False for conf in configurations: @@ -364,26 +378,31 @@ def single_run(to_run): symlinks += [[generalized_dir,root_dir]] os.symlink(os.path.split(root_dir)[1], generalized_dir) - for size in sizes: + # subconfigurations + for (size, gcThreads) in itertools.product(sizes, gcThreadCounts): - if size == ["default", "default"]: - sized_dir = root_dir + if size == ["default", "default"] and gcThreads == "default": + subconfig_dir = root_dir else: - size_str = "size_" + size[0] + "-" + size[1] - sized_dir = os.path.join(root_dir, size_str) - - if not args.overwrite and os.path.isfile(os.path.join(sized_dir, ".complete")): - print sized_dir, "already complete, skipping" - skipped += [sized_dir] + subconf_str = "" + if size != ["default", "default"] : + subconf_str += "size_" + size[0] + "-" + size[1] + if gcThreads != "default": + subconf_str += "gcthreads_" + gcThreads + subconfig_dir = os.path.join(root_dir, subconf_str) + + if not args.overwrite and os.path.isfile(os.path.join(subconfig_dir, ".complete")): + print subconfig_dir, "already complete, skipping" + skipped += [subconfig_dir] continue if not args.append: - sh.rmtree(sized_dir, ignore_errors=True) + sh.rmtree(subconfig_dir, ignore_errors=True) - mkdir(sized_dir) + mkdir(subconfig_dir) for bench in benchmarks: - print('--- heap size: {} conf: {}, bench: {}'.format(size, conf, bench)) + print('--- heap size: {} GC threads: {} conf: {}, bench: {}'.format(size, gcThreads, conf, bench)) input = slurp(os.path.join('input', bench)) output = slurp(os.path.join('output', bench)) @@ -404,7 +423,7 @@ def single_run(to_run): compile(conf, bench, compilecmd, args.gcdebug, args.gctrace) - resultsdir = os.path.join(sized_dir, bench) + resultsdir = os.path.join(subconfig_dir, bench) print "results in", resultsdir mkdir(resultsdir) @@ -416,7 +435,7 @@ def single_run(to_run): for n in xrange(runs): to_run += [ dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc, - size=size)] + size=size, gcThreads=gcThreads)] if par == 1: for tr in to_run: @@ -425,8 +444,8 @@ def single_run(to_run): failed += sum(pool.map(single_run, to_run), []) # mark it as complete - open(os.path.join(sized_dir, ".complete"), 'w+').close() - result_dirs += [sized_dir] + open(os.path.join(subconfig_dir, ".complete"), 'w+').close() + result_dirs += [subconfig_dir] print "results:" for dir in result_dirs: diff --git a/scripts/summary.py b/scripts/summary.py index 5d3e9ae..8b2282e 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -734,13 +734,13 @@ def discover_benchmarks(configurations): if __name__ == '__main__': all_configs = next(os.walk("results"))[1] - # added size_ + # added subconfigurations for conf in all_configs: folder = os.path.join("results", conf) subfolders = next(os.walk(folder))[1] - for size in subfolders: - if size.startswith("size_"): - all_configs.append(os.path.join(conf, size)) + for subconf in subfolders: + if subconf.startswith("size_") or subconf.startswith("gcthreads_"): + all_configs.append(os.path.join(conf, subconf)) results = generate_choices(all_configs) From cffa5420d4063bfc7b9a9bb20da9acb695e045ab Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 23 Nov 2018 11:02:32 +0100 Subject: [PATCH 104/169] gc threads setting: better naming --- scripts/run.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index fdb0256..c1f480b 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -384,11 +384,13 @@ def single_run(to_run): if size == ["default", "default"] and gcThreads == "default": subconfig_dir = root_dir else: - subconf_str = "" + size_str = "" if size != ["default", "default"] : - subconf_str += "size_" + size[0] + "-" + size[1] + size_str = "size_" + size[0] + "-" + size[1] + gcThreads_str = "" if gcThreads != "default": - subconf_str += "gcthreads_" + gcThreads + gcThreads_str += "gcthreads_" + gcThreads + subconf_str = "_".join(size_str, gcThreads_str) subconfig_dir = os.path.join(root_dir, subconf_str) if not args.overwrite and os.path.isfile(os.path.join(subconfig_dir, ".complete")): From 5ee578d4a0be29425b107033642e43f2384b7a24 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 23 Nov 2018 11:54:20 +0100 Subject: [PATCH 105/169] embedded uploading via git or just copy --- scripts/run.py | 101 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 21 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index c1f480b..0543a7f 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -37,6 +37,15 @@ def where(cmd): return None +def try_run(cmd, env=None, wd=None): + try: + print run(cmd, env, wd) + return True + except subp.CalledProcessError as err: + print err.output + return False + + def run(cmd, env=None, wd=None): print(">>> " + str(cmd)) if wd == None: @@ -45,15 +54,31 @@ def run(cmd, env=None, wd=None): return subp.check_output(cmd, stderr=subp.STDOUT, env=env, cwd=wd) -scala_native_dir = "../scala-native" +scala_native_dir = os.path.join("..", "scala-native") +upload_dir = os.path.abspath(os.path.join("..", "scala-native-benchmark-results")) +local_scala_repo_dir = os.path.abspath(os.path.join("..", "scala-2.11.11-only")) -def fetch(): - git_fetch = ['git', 'fetch', '--all'] - try: - run(git_fetch, wd=scala_native_dir) - except: - pass +def git_add(dir, *items): + return try_run(["git", "add"] + list(items), wd=dir) + + +def git_commit(dir, msg): + return try_run(["git", "commit", "-m", msg], wd=dir) + + +def git_pull(dir): + my_env = os.environ.copy() + my_env["GIT_MERGE_AUTOEDIT"] = "no" + return try_run(["git", "pull"], env=my_env, wd=dir) + + +def git_push(dir): + return try_run(['git', 'push'], wd=dir) + + +def git_fetch(dir): + return try_run(['git', 'fetch', '--all'], wd=dir) def get_ref(ref): @@ -81,7 +106,6 @@ def compile_scala_native(ref, sha1): compile_cmd = [sbt, '-no-colors', '-J-Xmx2G', 'rebuild', 'sandbox/run'] compile_env = os.environ.copy() compile_env["SCALANATIVE_GC"] = "immix" - local_scala_repo_dir = os.path.abspath("../scala-2.11.11-only") if os.path.isdir(local_scala_repo_dir): compile_env["SCALANATIVE_SCALAREPO"] = local_scala_repo_dir @@ -251,6 +275,37 @@ def single_run(to_run): return [dict(conf=conf, bench=bench, run=n)] +def upload(subconfig_dir, symlink, use_git, overwrite): + if os.path.isdir(upload_dir): + target = os.path.join(upload_dir, subconfig_dir) + targetComplete = os.path.isfile(os.path.join(target, ".complete")) + targetExisted = os.path.isdir(target) + if (targetComplete and overwrite) or targetExisted: + mkdir(os.path.join("..", target)) + sh.rmtree(target, ignore_errors=True) + if not targetExisted or overwrite: + sh.copytree(subconfig_dir, target, symlinks=True) + if use_git: + if symlink != None: + git_add(upload_dir, symlink) + if git_add(upload_dir, target) \ + and git_commit(upload_dir, "automated commit " + subconfig_dir) \ + and git_pull(upload_dir) \ + and git_push(upload_dir): + pass + else: + print "WARN", upload_dir, "does not exist!" + + +def create_symlink(generalized_dir, root_dir): + try: + os.unlink(generalized_dir) + except: + pass + print "creating symlink", generalized_dir, "->", root_dir + os.symlink(os.path.split(root_dir)[1], generalized_dir) + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--suffix", help="suffix added to results") @@ -261,6 +316,8 @@ def single_run(to_run): parser.add_argument("--gcthreads", help="different number of garbage collection threads to use", action='append') parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) parser.add_argument("--gc", help="gather gc statistics", action="store_true") + parser.add_argument("--upload", help="copy the results to ../scala-native-benchmark-results", action="store_true") + parser.add_argument("--gitupload", help="copy the results to ../scala-native-benchmark-results and commit and push to git", action="store_true") parser.add_argument("--overwrite", help="overwrite old results", action="store_true") parser.add_argument("--append", help="do not delete old data", action="store_true") parser.add_argument("--gcdebug", help="enable debug for GCs", action="store_true") @@ -318,7 +375,7 @@ def single_run(to_run): break if should_fetch: - fetch() + git_fetch(scala_native_dir) suffix = "" if runs != default_runs: @@ -369,14 +426,13 @@ def single_run(to_run): compile_fail += [conf] continue + symlink = None if generalized_dir != root_dir: - try: - os.unlink(generalized_dir) - except: - pass - print "creating symlink", generalized_dir, "->", root_dir - symlinks += [[generalized_dir,root_dir]] - os.symlink(os.path.split(root_dir)[1], generalized_dir) + create_symlink(generalized_dir, root_dir) + symlinks += [[generalized_dir, root_dir]] + symlink = generalized_dir + if args.upload or args.gitupload: + create_symlink(os.path.join(upload_dir, generalized_dir), root_dir) # subconfigurations for (size, gcThreads) in itertools.product(sizes, gcThreadCounts): @@ -384,13 +440,13 @@ def single_run(to_run): if size == ["default", "default"] and gcThreads == "default": subconfig_dir = root_dir else: - size_str = "" + size_str = [] if size != ["default", "default"] : - size_str = "size_" + size[0] + "-" + size[1] - gcThreads_str = "" + size_str = ["size_" + size[0] + "-" + size[1]] + gcThreads_str = [] if gcThreads != "default": - gcThreads_str += "gcthreads_" + gcThreads - subconf_str = "_".join(size_str, gcThreads_str) + gcThreads_str = ["gcthreads_" + gcThreads] + subconf_str = "_".join(size_str + gcThreads_str) subconfig_dir = os.path.join(root_dir, subconf_str) if not args.overwrite and os.path.isfile(os.path.join(subconfig_dir, ".complete")): @@ -449,6 +505,9 @@ def single_run(to_run): open(os.path.join(subconfig_dir, ".complete"), 'w+').close() result_dirs += [subconfig_dir] + if args.upload or args.gitupload: + upload(subconfig_dir, symlink, args.gitupload, args.overwrite) + print "results:" for dir in result_dirs: print dir From c37e1f5251dda655e02877e4ac0bbdddd0792593 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 23 Nov 2018 12:35:06 +0100 Subject: [PATCH 106/169] calculate total time --- scripts/summary.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 8b2282e..78d313f 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -229,6 +229,23 @@ def percentile_bench(configurations, bench, p): return res +def totals(configurations, benchmarks): + out = [] + for bench in benchmarks: + out.append(totals_bench(configurations, bench)) + return out + + +def totals_bench(configurations, bench): + res = [] + for conf in configurations: + try: + res.append(np.sum(config_data(bench, conf))) + except IndexError: + res.append(0) + return res + + def bar_chart_relative(plt, configurations, benchmarks, data): plt.clf() plt.cla() @@ -267,6 +284,12 @@ def bar_chart_relative(plt, configurations, benchmarks, data): return plt +def total_execution_times(plt, configurations, benchmarks, data): + plt = bar_chart_relative(plt, configurations, benchmarks, data) + plt.title("Total test execution times against " + configurations[0]) + return plt + + def relative_execution_times(plt, configurations, benchmarks, data, p): plt = bar_chart_relative(plt, configurations, benchmarks, data) plt.title("Relative test execution times against " + configurations[0] + " at " + str(p) + " percentile") @@ -652,8 +675,14 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench "relative_percentile_" + str(p) + ".png") write_md_table(md_file, configurations, benchmarks, data) + md_file.write("## Benchmark total run time (ms) \n") + data = totals(configurations, benchmarks) + chart_md(md_file, total_execution_times(plt, configurations, benchmarks, data), rootdir, + "relative_total.png") + write_md_table(md_file, configurations, benchmarks, data) + if gc_charts: - md_file.write("## Total GC time (ms) \n") + md_file.write("## Total GC time on Application thread (ms) \n") mark, sweep, total = total_gc(configurations, benchmarks) chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, mark, total), rootdir, "relative_gc_total.png") From 4d1754c85e5a8dddd47ea0852d125e05674278ec Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 26 Nov 2018 23:20:49 +0100 Subject: [PATCH 107/169] fix subconfig detection --- scripts/summary.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 78d313f..7b012fb 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -750,7 +750,7 @@ def discover_benchmarks(configurations): for conf in configurations: parent_folders = next(os.walk(os.path.join("results", conf)))[1] for pf in parent_folders: - if pf.startswith("size_"): + if is_subconfig(pf): for child in next(os.walk(os.path.join("results", conf, pf)))[1]: if child not in benchmarks: benchmarks.append(child) @@ -761,6 +761,10 @@ def discover_benchmarks(configurations): return benchmarks +def is_subconfig(subconf): + return subconf.startswith("size_") or subconf.startswith("gcthreads_") + + if __name__ == '__main__': all_configs = next(os.walk("results"))[1] # added subconfigurations @@ -768,7 +772,7 @@ def discover_benchmarks(configurations): folder = os.path.join("results", conf) subfolders = next(os.walk(folder))[1] for subconf in subfolders: - if subconf.startswith("size_") or subconf.startswith("gcthreads_"): + if is_subconfig(subconf): all_configs.append(os.path.join(conf, subconf)) results = generate_choices(all_configs) From 895af48317f5ae7a522c8706da11dd9d7e059ef1 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 28 Nov 2018 10:50:48 +0100 Subject: [PATCH 108/169] fix example charts with runs < 4 --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 78d313f..2c23215 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -731,7 +731,7 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench run -= 1 if run >= 0: - chart_md(md_file, example_run_plot(plt, configurations, bench), rootdir, + chart_md(md_file, example_run_plot(plt, configurations, bench, run), rootdir, "example_run_" + str(run) + "_" + bench + ".png") From 0ae9864c16fdd4c7d4b9a4bbb7a0299e28192185 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 28 Nov 2018 22:04:52 +0100 Subject: [PATCH 109/169] gc pause should be only from non-concurrent times --- scripts/summary.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 792578d..01f5487 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -87,7 +87,8 @@ def parse_gc_events(data, file, header): mark_times.append(time) elif event == "sweep": sweep_times.append(time) - gc_times.append(time) + if event == "mark" or event == "sweep": + gc_times.append(time) return mark_times, sweep_times, gc_times From 0c2227f7ee326ac565c1280570c424c9ec812edf Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 13:24:51 +0100 Subject: [PATCH 110/169] parse the different events --- scripts/summary.py | 74 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 9 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 01f5487..52c3bb7 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -32,7 +32,7 @@ def config_data(bench, conf): return np.array(out) -def gc_stats(bench, conf): +def gc_pauses_main_thread(bench, conf): benchmark_dir = os.path.join("results", conf, bench) files = next(os.walk(benchmark_dir), [[], [], []])[2] runs = [] @@ -60,12 +60,12 @@ def gc_stats(bench, conf): def gc_parse_file(data, file): header = data.readline().strip() if header.startswith("event_type,"): - return parse_gc_events(data, file, header) + return parse_gc_pause_events(data, file, header) else: return parse_gc_tabular(data, file, header) -def parse_gc_events(data, file, header): +def parse_gc_pause_events(data, file, header): mark_times = [] sweep_times = [] gc_times = [] @@ -133,12 +133,68 @@ def parse_gc_tabular(data, file, header): return mark_times, sweep_times, gc_times +def append_or_create(dict, key, value): + if dict.has_key(key): + dict[key].append(value) + else: + dict[key] = [value] + + +# event = [type, start, end] +def parse_events(data, file, header, timeFilter = (lambda t: True)): + collection_types = ["collection"] + phase_types = ["mark", "sweep", "concmark", "concsweep"] + batch_types = ["mark_batch", "sweep_batch", "coalesce_batch"] + + collection_events = [] + phase_events_by_thread = dict() + batch_events_by_thread = dict() + + event_type_index = 0 + start_ns_index = -1 + time_ns_index = -1 + thread_index = -1 + ns_to_ms_div = 1000 * 1000 + for i, h in enumerate(header.split(',')): + if h == "start_ns": + start_ns_index = i + if h == "time_ns": + time_ns_index = i + if h == "gc_thread": + thread_index = i + + if start_ns_index == -1: + print "Header does not have start_ns", header, "at", file + if time_ns_index == -1: + print "Header does not have time_ns", header, "at", file + if thread_index == -1: + print "Header does not have gc_thread", header, "at", file + if start_ns_index == -1 or time_ns_index == -1 or thread_index == -1: + return collection_events, phase_events_by_thread, batch_events_by_thread + + for line in data.readlines(): + arr = line.split(",") + event = arr[event_type_index] + start = float(arr[start_ns_index]) / ns_to_ms_div + if not timeFilter(start): + continue + time = float(arr[time_ns_index]) / ns_to_ms_div + thread = arr[thread_index] + if event in collection_types: + collection_events.append([event, start, time]) + elif event in phase_types: + append_or_create(phase_events_by_thread, thread, [event, start, time]) + elif event in batch_types: + append_or_create(batch_events_by_thread, thread, [event, start, time]) + + return collection_events, phase_events_by_thread, batch_events_by_thread + + def gc_stats_total(bench, conf): - _, _, total = gc_stats(bench, conf) + _, _, total = gc_pauses_main_thread(bench, conf) return total - def percentile_gc(configurations, benchmarks, percentile): out_mark = [] out_sweep = [] @@ -170,7 +226,7 @@ def percentile_gc_bench(configurations, bench, p): res_total = [] for conf in configurations: try: - mark, sweep, total = gc_stats(bench, conf) + mark, sweep, total = gc_pauses_main_thread(bench, conf) res_mark.append(np.percentile(mark, p)) res_sweep.append(np.percentile(sweep, p)) res_total.append(np.percentile(total, p)) @@ -187,7 +243,7 @@ def total_gc_bench(configurations, bench): res_total = [] for conf in configurations: try: - mark, sweep, total = gc_stats(bench, conf) + mark, sweep, total = gc_pauses_main_thread(bench, conf) res_mark.append(np.sum(mark)) res_sweep.append(np.sum(sweep)) res_total.append(np.sum(total)) @@ -331,7 +387,7 @@ def bar_chart_gc_relative(plt, configurations, benchmarks, mark_data, total_data mark_res = [] for bench_idx, (bench, base_val) in enumerate(zip(benchmarks, base)): if base_val > 0: - mark, _, total = gc_stats(bench, conf) + mark, _, total = gc_pauses_main_thread(bench, conf) mark = mark_data[bench_idx][conf_idx] total = total_data[bench_idx][conf_idx] res.append(np.array(total) / base_val) @@ -358,7 +414,7 @@ def bar_chart_gc_absolute(plt, configurations, benchmarks, percentile): mark_res = [] for bench in benchmarks: try: - mark, _, total = gc_stats(bench, conf) + mark, _, total = gc_pauses_main_thread(bench, conf) res.append(np.percentile(total, percentile)) mark_res.append(np.percentile(mark, percentile)) except IndexError: From 7c4a3f60ed004e9e270ce24f1051a7bb28c8b4b6 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 13:49:41 +0100 Subject: [PATCH 111/169] read all relevant files --- scripts/summary.py | 54 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 52c3bb7..9bfd239 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -133,6 +133,58 @@ def parse_gc_tabular(data, file, header): return mark_times, sweep_times, gc_times +def merge_or_create(dict1, dict2): + for key1 in dict1.keys(): + if dict2.has_key(key1): + dict1[key1].append(dict2[key1]) + for key2 in dict2.keys(): + if not dict1.has_key(key2): + dict1[key2] = dict2[key2] + + +def gc_events_for_last_n_collections(bench, conf, run=3, n=2): + benchmark_dir = os.path.join("results", conf, bench) + files = next(os.walk(benchmark_dir), [[], [], []])[2] + main_file_name = str(run) + ".gc.csv" + main_file = os.path.join("results", conf, bench, main_file_name) + parts = [] + for file in files: + if file.startswith(main_file_name): + parts.append(file) + + try: + with open(main_file) as data: + header = data.readline().strip() + collection_events, _, _ = parse_events(data, main_file, header) + except IOError: + print "run does not exist", main_file + return [], [], [] + + last_events = collection_events[-n:] + if len(last_events) == 0: + return [], [], [] + + min_time = last_events[0][1] + time_filter = (lambda t: t < min_time) + + phase_events_by_thread = dict() + batch_events_by_thread = dict() + + for part in parts: + try: + file = os.path.join("results", conf, bench, part) + with open(file) as data: + header = data.readline().strip() + # no collection events on other threads + _, phase_events_by_thread0, batch_events_by_thread0 = parse_events(data, file, header, time_filter) + merge_or_create(phase_events_by_thread,phase_events_by_thread0) + merge_or_create(batch_events_by_thread,batch_events_by_thread0) + except IOError: + pass + + return collection_events, phase_events_by_thread, batch_events_by_thread + + def append_or_create(dict, key, value): if dict.has_key(key): dict[key].append(value) @@ -141,7 +193,7 @@ def append_or_create(dict, key, value): # event = [type, start, end] -def parse_events(data, file, header, timeFilter = (lambda t: True)): +def parse_events(data, file, header, timeFilter=(lambda t: True)): collection_types = ["collection"] phase_types = ["mark", "sweep", "concmark", "concsweep"] batch_types = ["mark_batch", "sweep_batch", "coalesce_batch"] From 6236a0eb9d266bac20611c674a66880d467af209 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 14:30:52 +0100 Subject: [PATCH 112/169] start plotting --- scripts/summary.py | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 9bfd239..b207a5f 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -177,8 +177,8 @@ def gc_events_for_last_n_collections(bench, conf, run=3, n=2): header = data.readline().strip() # no collection events on other threads _, phase_events_by_thread0, batch_events_by_thread0 = parse_events(data, file, header, time_filter) - merge_or_create(phase_events_by_thread,phase_events_by_thread0) - merge_or_create(batch_events_by_thread,batch_events_by_thread0) + merge_or_create(phase_events_by_thread, phase_events_by_thread0) + merge_or_create(batch_events_by_thread, batch_events_by_thread0) except IOError: pass @@ -192,11 +192,13 @@ def append_or_create(dict, key, value): dict[key] = [value] +phase_event_types = ["mark", "sweep", "concmark", "concsweep"] +batch_events_types = ["mark_batch", "sweep_batch", "coalesce_batch"] + + # event = [type, start, end] def parse_events(data, file, header, timeFilter=(lambda t: True)): collection_types = ["collection"] - phase_types = ["mark", "sweep", "concmark", "concsweep"] - batch_types = ["mark_batch", "sweep_batch", "coalesce_batch"] collection_events = [] phase_events_by_thread = dict() @@ -234,9 +236,9 @@ def parse_events(data, file, header, timeFilter=(lambda t: True)): thread = arr[thread_index] if event in collection_types: collection_events.append([event, start, time]) - elif event in phase_types: + elif event in phase_event_types: append_or_create(phase_events_by_thread, thread, [event, start, time]) - elif event in batch_types: + elif event in batch_events_types: append_or_create(batch_events_by_thread, thread, [event, start, time]) return collection_events, phase_events_by_thread, batch_events_by_thread @@ -642,6 +644,34 @@ def print_table(configurations, benchmarks, data): print ','.join([bench] + list(map(str, res))) +def gc_gantt_chart(plt, conf, bench, data): + plt.clf() + plt.cla() + + labels = [conf + "-Collections"] + collection_events, phase_events_by_thread, batch_events_by_thread = data + + values = [] + for e in collection_events: + # [event, start, time] => (start, times) + values.append((e[1], e[2])) + plt.broken_barh(values, (0, 1)) + + # threads = phase_events_by_thread.keys() + # for thread in threads: + # labels.append(conf + "-thread_" + str(thread) + "-Phases") + # for e_t in phase_event_types: + # plt.broken_barh() + # for thread in threads: + # labels.append(conf + "-thread_" + str(thread) + "-Batches") + # for e_t in batch_events_types: + + + + return plt + + + def write_md_table(file, configurations, benchmarks, data): header = ['name'] header.append(configurations[0]) @@ -842,6 +872,10 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench if run >= 0: chart_md(md_file, example_run_plot(plt, configurations, bench, run), rootdir, "example_run_" + str(run) + "_" + bench + ".png") + if gc_charts: + for conf in configurations: + chart_md(md_file, gc_gantt_chart(plt, conf, bench, gc_events_for_last_n_collections(bench, conf, run)), rootdir, + "example_gc_last_2_" + str(run) + "_" + bench + ".png") def any_run_exists(bench, configurations, run): From 8cd81fb21c7a496fa5470ad93e70cd9e2f140254 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 14:36:39 +0100 Subject: [PATCH 113/169] limit the collections events as well --- scripts/summary.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index b207a5f..a90ef30 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -160,11 +160,11 @@ def gc_events_for_last_n_collections(bench, conf, run=3, n=2): print "run does not exist", main_file return [], [], [] - last_events = collection_events[-n:] - if len(last_events) == 0: + collection_events = collection_events[-n:] + if len(collection_events) == 0: return [], [], [] - min_time = last_events[0][1] + min_time = collection_events[0][1] time_filter = (lambda t: t < min_time) phase_events_by_thread = dict() From d634813cb02830bdb265a5f0e1f186cfaa75a261 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 15:56:39 +0100 Subject: [PATCH 114/169] both batches and phases --- scripts/summary.py | 108 +++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 43 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index a90ef30..1d93c5c 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -165,7 +165,7 @@ def gc_events_for_last_n_collections(bench, conf, run=3, n=2): return [], [], [] min_time = collection_events[0][1] - time_filter = (lambda t: t < min_time) + time_filter = (lambda t: t > min_time) phase_events_by_thread = dict() batch_events_by_thread = dict() @@ -648,26 +648,48 @@ def gc_gantt_chart(plt, conf, bench, data): plt.clf() plt.cla() - labels = [conf + "-Collections"] + labels = ["Collections"] collection_events, phase_events_by_thread, batch_events_by_thread = data values = [] for e in collection_events: - # [event, start, time] => (start, times) + # [event, start, time] => (start, time) values.append((e[1], e[2])) - plt.broken_barh(values, (0, 1)) - - # threads = phase_events_by_thread.keys() - # for thread in threads: - # labels.append(conf + "-thread_" + str(thread) + "-Phases") - # for e_t in phase_event_types: - # plt.broken_barh() - # for thread in threads: - # labels.append(conf + "-thread_" + str(thread) + "-Batches") - # for e_t in batch_events_types: - - - + plt.broken_barh(values, (0, 1), color="black") + event_type_to_color = { + "mark": "red", "sweep": "blue", "concmark": "red", "concsweep": "blue", + "mark_batch": "red", "sweep_batch": "blue", "coalesce_batch": "green" + } + + for thread in sorted(phase_events_by_thread.keys()): + labels.append("Phases" + str(thread)) + raw_values = phase_events_by_thread[thread] + for et in phase_event_types: + values = [] + for e in raw_values: + event = e[0] + start = e[1] + time = e[2] + if event == et: + values.append((start, time)) + plt.broken_barh(values, (int(thread) + 2, 1), color = event_type_to_color[et]) + + tlen = len(labels) + for thread in sorted(batch_events_by_thread.keys()): + labels.append("Batches" + str(thread)) + raw_values = batch_events_by_thread[thread] + for et in batch_events_types: + values = [] + for e in raw_values: + event = e[0] + start = e[1] + time = e[2] + if event == et: + values.append((start, time)) + plt.broken_barh(values, (int(thread) + 1 + tlen, 1), color = event_type_to_color[et]) + + plt.yticks(np.arange(len(labels)), labels) + plt.legend() return plt @@ -806,33 +828,33 @@ def chart_md(md_file, plt, rootdir, name): def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=False, size_charts=False): interesting_percentiles = [50, 90, 99] - md_file.write("# Summary\n") - for p in interesting_percentiles: - md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) - data = percentile(configurations, benchmarks, p) - chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data, p), rootdir, - "relative_percentile_" + str(p) + ".png") - write_md_table(md_file, configurations, benchmarks, data) - - md_file.write("## Benchmark total run time (ms) \n") - data = totals(configurations, benchmarks) - chart_md(md_file, total_execution_times(plt, configurations, benchmarks, data), rootdir, - "relative_total.png") - write_md_table(md_file, configurations, benchmarks, data) - - if gc_charts: - md_file.write("## Total GC time on Application thread (ms) \n") - mark, sweep, total = total_gc(configurations, benchmarks) - chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, mark, total), rootdir, - "relative_gc_total.png") - write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total) - - for p in interesting_percentiles: - md_file.write("## GC pause time (ms) at {} percentile \n".format(p)) - _, _, total = percentile_gc(configurations, benchmarks, p) - chart_md(md_file, relative_gc_pauses(plt, configurations, benchmarks, total, p), rootdir, - "relative_gc_percentile_" + str(p) + ".png") - write_md_table(md_file, configurations, benchmarks, total) + # md_file.write("# Summary\n") + # for p in interesting_percentiles: + # md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) + # data = percentile(configurations, benchmarks, p) + # chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data, p), rootdir, + # "relative_percentile_" + str(p) + ".png") + # write_md_table(md_file, configurations, benchmarks, data) + # + # md_file.write("## Benchmark total run time (ms) \n") + # data = totals(configurations, benchmarks) + # chart_md(md_file, total_execution_times(plt, configurations, benchmarks, data), rootdir, + # "relative_total.png") + # write_md_table(md_file, configurations, benchmarks, data) + # + # if gc_charts: + # md_file.write("## Total GC time on Application thread (ms) \n") + # mark, sweep, total = total_gc(configurations, benchmarks) + # chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, mark, total), rootdir, + # "relative_gc_total.png") + # write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total) + # + # for p in interesting_percentiles: + # md_file.write("## GC pause time (ms) at {} percentile \n".format(p)) + # _, _, total = percentile_gc(configurations, benchmarks, p) + # chart_md(md_file, relative_gc_pauses(plt, configurations, benchmarks, total, p), rootdir, + # "relative_gc_percentile_" + str(p) + ".png") + # write_md_table(md_file, configurations, benchmarks, total) md_file.write("# Individual benchmarks\n") for bench in benchmarks: From db6b52132ce633f8702f924a3aaf6c708de79fbf Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 16:01:54 +0100 Subject: [PATCH 115/169] only the last GC --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 1d93c5c..e11ec48 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -142,7 +142,7 @@ def merge_or_create(dict1, dict2): dict1[key2] = dict2[key2] -def gc_events_for_last_n_collections(bench, conf, run=3, n=2): +def gc_events_for_last_n_collections(bench, conf, run=3, n=1): benchmark_dir = os.path.join("results", conf, bench) files = next(os.walk(benchmark_dir), [[], [], []])[2] main_file_name = str(run) + ".gc.csv" From b0483d0622fc70a49bbaddcf407c5617671ad5b5 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 18:07:27 +0100 Subject: [PATCH 116/169] looking nicer --- scripts/summary.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index e11ec48..3700fa6 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -158,11 +158,11 @@ def gc_events_for_last_n_collections(bench, conf, run=3, n=1): collection_events, _, _ = parse_events(data, main_file, header) except IOError: print "run does not exist", main_file - return [], [], [] + return [], dict(), dict() collection_events = collection_events[-n:] if len(collection_events) == 0: - return [], [], [] + return [], dict(), dict() min_time = collection_events[0][1] time_filter = (lambda t: t > min_time) @@ -657,11 +657,12 @@ def gc_gantt_chart(plt, conf, bench, data): values.append((e[1], e[2])) plt.broken_barh(values, (0, 1), color="black") event_type_to_color = { - "mark": "red", "sweep": "blue", "concmark": "red", "concsweep": "blue", - "mark_batch": "red", "sweep_batch": "blue", "coalesce_batch": "green" + "mark": ("red", "darkred"), "sweep": ("blue", "darkblue"), "concmark": ("red", "darkred"), "concsweep": ("blue", "darkblue"), + "mark_batch": ("red", "darkred"), "sweep_batch": ("blue", "darkblue"), "coalesce_batch": ("green", "darkgreen") } for thread in sorted(phase_events_by_thread.keys()): + end = len(labels) labels.append("Phases" + str(thread)) raw_values = phase_events_by_thread[thread] for et in phase_event_types: @@ -672,10 +673,10 @@ def gc_gantt_chart(plt, conf, bench, data): time = e[2] if event == et: values.append((start, time)) - plt.broken_barh(values, (int(thread) + 2, 1), color = event_type_to_color[et]) + plt.broken_barh(values, (end, 1), color = event_type_to_color[et]) - tlen = len(labels) for thread in sorted(batch_events_by_thread.keys()): + end = len(labels) labels.append("Batches" + str(thread)) raw_values = batch_events_by_thread[thread] for et in batch_events_types: @@ -686,9 +687,11 @@ def gc_gantt_chart(plt, conf, bench, data): time = e[2] if event == et: values.append((start, time)) - plt.broken_barh(values, (int(thread) + 1 + tlen, 1), color = event_type_to_color[et]) + plt.broken_barh(values, (end, 1), facecolors=event_type_to_color[et]) plt.yticks(np.arange(len(labels)), labels) + plt.xlabel("Time since start (ms)") + plt.title(conf + " last garbage collection") plt.legend() return plt From ed46eb465fcb5814b821445994e140e55cc20d80 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 20:09:18 +0100 Subject: [PATCH 117/169] handle multiple configurations --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 3700fa6..9516a14 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -900,7 +900,7 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench if gc_charts: for conf in configurations: chart_md(md_file, gc_gantt_chart(plt, conf, bench, gc_events_for_last_n_collections(bench, conf, run)), rootdir, - "example_gc_last_2_" + str(run) + "_" + bench + ".png") + "example_gc_last_" + "_conf" + str(configurations.index(conf)) +"_" + str(run) + "_" + bench + ".png") def any_run_exists(bench, configurations, run): From 4f6a89c641e3d34cfe7cc22f996c73039e4552ad Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 21:03:35 +0100 Subject: [PATCH 118/169] cleanup --- scripts/summary.py | 69 +++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 9516a14..340675e 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -4,6 +4,7 @@ import numpy as np import time import matplotlib.pyplot as plt +import matplotlib.patches as mpatches import os import argparse @@ -655,7 +656,7 @@ def gc_gantt_chart(plt, conf, bench, data): for e in collection_events: # [event, start, time] => (start, time) values.append((e[1], e[2])) - plt.broken_barh(values, (0, 1), color="black") + plt.broken_barh(values, (0, 1), color="black", label = "collection") event_type_to_color = { "mark": ("red", "darkred"), "sweep": ("blue", "darkblue"), "concmark": ("red", "darkred"), "concsweep": ("blue", "darkblue"), "mark_batch": ("red", "darkred"), "sweep_batch": ("blue", "darkblue"), "coalesce_batch": ("green", "darkgreen") @@ -673,7 +674,7 @@ def gc_gantt_chart(plt, conf, bench, data): time = e[2] if event == et: values.append((start, time)) - plt.broken_barh(values, (end, 1), color = event_type_to_color[et]) + plt.broken_barh(values, (end, 1), facecolors = event_type_to_color[et], label = et) for thread in sorted(batch_events_by_thread.keys()): end = len(labels) @@ -687,12 +688,16 @@ def gc_gantt_chart(plt, conf, bench, data): time = e[2] if event == et: values.append((start, time)) - plt.broken_barh(values, (end, 1), facecolors=event_type_to_color[et]) + plt.broken_barh(values, (end, 1), facecolors=event_type_to_color[et], label = et) plt.yticks(np.arange(len(labels)), labels) plt.xlabel("Time since start (ms)") - plt.title(conf + " last garbage collection") - plt.legend() + plt.title(conf + " " + bench + " last garbage collection") + plt.legend(handles=[(mpatches.Patch(color='black', label='collection')), + (mpatches.Patch(color='red', label='mark')), + (mpatches.Patch(color='blue', label='sweep')), + (mpatches.Patch(color='green', label='coalesce'))]) + return plt @@ -831,33 +836,33 @@ def chart_md(md_file, plt, rootdir, name): def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=False, size_charts=False): interesting_percentiles = [50, 90, 99] - # md_file.write("# Summary\n") - # for p in interesting_percentiles: - # md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) - # data = percentile(configurations, benchmarks, p) - # chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data, p), rootdir, - # "relative_percentile_" + str(p) + ".png") - # write_md_table(md_file, configurations, benchmarks, data) - # - # md_file.write("## Benchmark total run time (ms) \n") - # data = totals(configurations, benchmarks) - # chart_md(md_file, total_execution_times(plt, configurations, benchmarks, data), rootdir, - # "relative_total.png") - # write_md_table(md_file, configurations, benchmarks, data) - # - # if gc_charts: - # md_file.write("## Total GC time on Application thread (ms) \n") - # mark, sweep, total = total_gc(configurations, benchmarks) - # chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, mark, total), rootdir, - # "relative_gc_total.png") - # write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total) - # - # for p in interesting_percentiles: - # md_file.write("## GC pause time (ms) at {} percentile \n".format(p)) - # _, _, total = percentile_gc(configurations, benchmarks, p) - # chart_md(md_file, relative_gc_pauses(plt, configurations, benchmarks, total, p), rootdir, - # "relative_gc_percentile_" + str(p) + ".png") - # write_md_table(md_file, configurations, benchmarks, total) + md_file.write("# Summary\n") + for p in interesting_percentiles: + md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) + data = percentile(configurations, benchmarks, p) + chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data, p), rootdir, + "relative_percentile_" + str(p) + ".png") + write_md_table(md_file, configurations, benchmarks, data) + + md_file.write("## Benchmark total run time (ms) \n") + data = totals(configurations, benchmarks) + chart_md(md_file, total_execution_times(plt, configurations, benchmarks, data), rootdir, + "relative_total.png") + write_md_table(md_file, configurations, benchmarks, data) + + if gc_charts: + md_file.write("## Total GC time on Application thread (ms) \n") + mark, sweep, total = total_gc(configurations, benchmarks) + chart_md(md_file, bar_chart_gc_relative(plt, configurations, benchmarks, mark, total), rootdir, + "relative_gc_total.png") + write_md_table_gc(md_file, configurations, benchmarks, mark, sweep, total) + + for p in interesting_percentiles: + md_file.write("## GC pause time (ms) at {} percentile \n".format(p)) + _, _, total = percentile_gc(configurations, benchmarks, p) + chart_md(md_file, relative_gc_pauses(plt, configurations, benchmarks, total, p), rootdir, + "relative_gc_percentile_" + str(p) + ".png") + write_md_table(md_file, configurations, benchmarks, total) md_file.write("# Individual benchmarks\n") for bench in benchmarks: From ab777a40416a7015d031401a2cfb08a3bd0b4785 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 15 Dec 2018 21:44:58 +0100 Subject: [PATCH 119/169] stretch gantt horizontally --- scripts/summary.py | 52 ++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 340675e..b7c9dd8 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -262,6 +262,7 @@ def percentile_gc(configurations, benchmarks, percentile): return out_mark, out_sweep, out_total + def total_gc(configurations, benchmarks): out_mark = [] out_sweep = [] @@ -274,7 +275,6 @@ def total_gc(configurations, benchmarks): return out_mark, out_sweep, out_total - def percentile_gc_bench(configurations, bench, p): res_mark = [] res_sweep = [] @@ -526,7 +526,7 @@ def sizes_per_conf(parent_configuration): parts = f[len("size_"):].split("-") min_sizes.append(to_gb(parts[0])) max_sizes.append(to_gb(parts[1])) - child_confs.append(os.path.join(parent_configuration,f)) + child_confs.append(os.path.join(parent_configuration, f)) return min_sizes, max_sizes, child_confs @@ -543,7 +543,7 @@ def size_compare_chart_generic(plt, parent_configurations, bench, get_percentile equal_confs.append(child_conf) # sorts all by size in GB - equal_sizes, equal_confs = zip(*[(x,y) for x,y in sorted(zip(equal_sizes,equal_confs))]) + equal_sizes, equal_confs = zip(*[(x, y) for x, y in sorted(zip(equal_sizes, equal_confs))]) percentiles = get_percentile(equal_confs, bench, p) plt.plot(np.array(equal_sizes), percentiles, label=parent_conf) plt.legend() @@ -567,11 +567,11 @@ def size_compare_chart_gc_combined(plt, parent_configurations, bench): equal_confs.append(child_conf) # sorts all by size in GB - equal_sizes, equal_confs = zip(*[(x,y) for x,y in sorted(zip(equal_sizes,equal_confs))]) + equal_sizes, equal_confs = zip(*[(x, y) for x, y in sorted(zip(equal_sizes, equal_confs))]) mark, _, total = total_gc_bench(equal_confs, bench) - plt.plot(np.array(equal_sizes), total, label=parent_conf + "-sweep") # total (look like sweep) - plt.plot(np.array(equal_sizes), mark, label=parent_conf + "-mark") # mark time + plt.plot(np.array(equal_sizes), total, label=parent_conf + "-sweep") # total (look like sweep) + plt.plot(np.array(equal_sizes), mark, label=parent_conf + "-mark") # mark time plt.legend() plt.xlim(xmin=0) plt.ylim(ymin=0) @@ -586,7 +586,7 @@ def size_compare_chart(plt, parent_configurations, bench, p): plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_bench, p) plt.title("{} at {} percentile".format(bench, p)) plt.ylabel("Run time (ms)") - return plt + return plt def size_compare_chart_gc(plt, parent_configurations, bench, p): @@ -602,6 +602,7 @@ def size_compare_chart_gc_mark(plt, parent_configurations, bench, p): plt.ylabel("GC mark time (ms)") return plt + def size_compare_chart_gc_sweep(plt, parent_configurations, bench, p): plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_gc_bench_sweep, p) plt.title("{}: GC sweep pause time at {} percentile".format(bench, p)) @@ -630,12 +631,14 @@ def percentiles_chart(plt, configurations, bench, limit=99): plt.ylabel("Run time (ms)") return plt + def gc_pause_time_chart(plt, configurations, bench, limit=100): plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_total, limit) plt.title(bench + ": Garbage Collector Pause Times") plt.ylabel("GC pause time (ms)") return plt + def print_table(configurations, benchmarks, data): leading = ['name'] for conf in configurations: @@ -648,7 +651,7 @@ def print_table(configurations, benchmarks, data): def gc_gantt_chart(plt, conf, bench, data): plt.clf() plt.cla() - + plt.figure(figsize=(100, 24)) labels = ["Collections"] collection_events, phase_events_by_thread, batch_events_by_thread = data @@ -656,9 +659,10 @@ def gc_gantt_chart(plt, conf, bench, data): for e in collection_events: # [event, start, time] => (start, time) values.append((e[1], e[2])) - plt.broken_barh(values, (0, 1), color="black", label = "collection") + plt.broken_barh(values, (0, 1), color="black", label="collection") event_type_to_color = { - "mark": ("red", "darkred"), "sweep": ("blue", "darkblue"), "concmark": ("red", "darkred"), "concsweep": ("blue", "darkblue"), + "mark": ("red", "darkred"), "sweep": ("blue", "darkblue"), "concmark": ("red", "darkred"), + "concsweep": ("blue", "darkblue"), "mark_batch": ("red", "darkred"), "sweep_batch": ("blue", "darkblue"), "coalesce_batch": ("green", "darkgreen") } @@ -674,7 +678,7 @@ def gc_gantt_chart(plt, conf, bench, data): time = e[2] if event == et: values.append((start, time)) - plt.broken_barh(values, (end, 1), facecolors = event_type_to_color[et], label = et) + plt.broken_barh(values, (end, 1), facecolors=event_type_to_color[et], label=et) for thread in sorted(batch_events_by_thread.keys()): end = len(labels) @@ -688,7 +692,7 @@ def gc_gantt_chart(plt, conf, bench, data): time = e[2] if event == et: values.append((start, time)) - plt.broken_barh(values, (end, 1), facecolors=event_type_to_color[et], label = et) + plt.broken_barh(values, (end, 1), facecolors=event_type_to_color[et], label=et) plt.yticks(np.arange(len(labels)), labels) plt.xlabel("Time since start (ms)") @@ -701,7 +705,6 @@ def gc_gantt_chart(plt, conf, bench, data): return plt - def write_md_table(file, configurations, benchmarks, data): header = ['name'] header.append(configurations[0]) @@ -809,8 +812,6 @@ def write_md_table_gc(file, configurations, benchmarks, mark_data, sweep_data, t file.write("|\n") - - def cell(x, base): if base > 0: percent_diff = (float(x) / base - 1) * 100 @@ -830,11 +831,13 @@ def benchmark_short_name(bench): def chart_md(md_file, plt, rootdir, name): - plt.savefig(rootdir + name) + plt.savefig(rootdir + name, pad_inches=0, bbox_inches='tight') + plt.figure(figsize=(32, 24)) md_file.write("![Chart]({})\n\n".format(name)) -def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=False, size_charts=False): +def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=False, + size_charts=False): interesting_percentiles = [50, 90, 99] md_file.write("# Summary\n") for p in interesting_percentiles: @@ -904,8 +907,11 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench "example_run_" + str(run) + "_" + bench + ".png") if gc_charts: for conf in configurations: - chart_md(md_file, gc_gantt_chart(plt, conf, bench, gc_events_for_last_n_collections(bench, conf, run)), rootdir, - "example_gc_last_" + "_conf" + str(configurations.index(conf)) +"_" + str(run) + "_" + bench + ".png") + chart_md(md_file, + gc_gantt_chart(plt, conf, bench, gc_events_for_last_n_collections(bench, conf, run)), + rootdir, + "example_gc_last_" + "_conf" + str(configurations.index(conf)) + "_" + str( + run) + "_" + bench + ".png") def any_run_exists(bench, configurations, run): @@ -924,9 +930,9 @@ def discover_benchmarks(configurations): parent_folders = next(os.walk(os.path.join("results", conf)))[1] for pf in parent_folders: if is_subconfig(pf): - for child in next(os.walk(os.path.join("results", conf, pf)))[1]: - if child not in benchmarks: - benchmarks.append(child) + for child in next(os.walk(os.path.join("results", conf, pf)))[1]: + if child not in benchmarks: + benchmarks.append(child) else: if pf not in benchmarks: benchmarks.append(pf) @@ -989,7 +995,7 @@ def is_subconfig(subconf): benchmarks = all_benchmarks report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + comment + "/" - plt.rcParams["figure.figsize"] = [32.0, 24.0] + plt.figure(figsize=(32, 24)) plt.rcParams["font.size"] = 20.0 mkdir(report_dir) with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: From cdbf845783011933c94ea2369411e6c87a8b1a8c Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 16 Dec 2018 10:13:59 +0100 Subject: [PATCH 120/169] clear the figures --- scripts/summary.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/summary.py b/scripts/summary.py index b7c9dd8..72a5a2a 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -832,6 +832,7 @@ def benchmark_short_name(bench): def chart_md(md_file, plt, rootdir, name): plt.savefig(rootdir + name, pad_inches=0, bbox_inches='tight') + plt.close("all") plt.figure(figsize=(32, 24)) md_file.write("![Chart]({})\n\n".format(name)) From 84d4cd8b905f0be08e01afd374523358c1c21b49 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 24 Dec 2018 18:18:44 +0100 Subject: [PATCH 121/169] exclude list benchmark from summary --- scripts/summary.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 72a5a2a..e187806 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -993,7 +993,8 @@ def is_subconfig(subconf): for b in args.benchmark: benchmarks += filter(lambda s: s.startswith(b), all_benchmarks) else: - benchmarks = all_benchmarks + excluded_benchmarks = ['list.ListBenchmark'] + benchmarks = [x for x in all_benchmarks if x not in excluded_benchmarks] report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + comment + "/" plt.figure(figsize=(32, 24)) From 8b5668f9ede60eb64e96d9f5d0175543a97c0110 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 28 Dec 2018 10:10:42 +0100 Subject: [PATCH 122/169] option for commix gc --- confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt | 6 ++++++ confs/scala-native-0.3.9-SNAPSHOT-commix/compile | 1 + confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt | 1 + confs/scala-native-0.3.9-SNAPSHOT-commix/run | 1 + 4 files changed, 9 insertions(+) create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-commix/compile create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt create mode 100644 confs/scala-native-0.3.9-SNAPSHOT-commix/run diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt b/confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt new file mode 100644 index 0000000..2104b7a --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "commix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/compile b/confs/scala-native-0.3.9-SNAPSHOT-commix/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt b/confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt new file mode 100644 index 0000000..c1423b6 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") diff --git a/confs/scala-native-0.3.9-SNAPSHOT-commix/run b/confs/scala-native-0.3.9-SNAPSHOT-commix/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.3.9-SNAPSHOT-commix/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out From 24d31e5c25e49110dc804a26fee85b858cbc669d Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 2 Jan 2019 13:13:16 +0100 Subject: [PATCH 123/169] print compile errors when compiling the test --- scripts/run.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 0543a7f..d9f6674 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -128,7 +128,7 @@ def compile(conf, bench, compilecmd, debug, trace): if trace: cmd.append('set nativeCompileOptions +="-DDEBUG_PRINT"') cmd.append(compilecmd) - return run(cmd) + return try_run(cmd) sbt = where('sbt') @@ -406,7 +406,6 @@ def create_symlink(generalized_dir, root_dir): for conf in configurations: conf_name, ref = ref_parse(conf) - generalized_dir = os.path.join('results', conf + suffix) if ref == None: sha1 = None @@ -479,7 +478,10 @@ def create_symlink(generalized_dir, root_dir): else: os.remove('project/plugins.sbt') - compile(conf, bench, compilecmd, args.gcdebug, args.gctrace) + compile_success = compile(conf, bench, compilecmd, args.gcdebug, args.gctrace) + if not compile_success: + compile_fail += [conf] + break resultsdir = os.path.join(subconfig_dir, bench) print "results in", resultsdir From dae1ae7d5646d5cdcd309b1150c135132bb00d97 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 2 Jan 2019 13:44:22 +0100 Subject: [PATCH 124/169] only on errors --- scripts/run.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index d9f6674..23d99f1 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -46,6 +46,15 @@ def try_run(cmd, env=None, wd=None): return False +def try_run_silent(cmd, env=None, wd=None): + try: + run(cmd, env, wd) + return True + except subp.CalledProcessError as err: + print err.output + return False + + def run(cmd, env=None, wd=None): print(">>> " + str(cmd)) if wd == None: @@ -128,7 +137,7 @@ def compile(conf, bench, compilecmd, debug, trace): if trace: cmd.append('set nativeCompileOptions +="-DDEBUG_PRINT"') cmd.append(compilecmd) - return try_run(cmd) + return try_run_silent(cmd) sbt = where('sbt') From 03445786b3fb528b5bb2f1c5bfaca9f7aa7d6298 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 11 Jan 2019 13:00:42 +0100 Subject: [PATCH 125/169] improve readability --- scripts/summary.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index e187806..f744b55 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -648,12 +648,27 @@ def print_table(configurations, benchmarks, data): print ','.join([bench] + list(map(str, res))) -def gc_gantt_chart(plt, conf, bench, data): +def thread_id_tostring(n): + if int(n) < 0: + return "mutator" + n + else: + return n + + +def gc_gantt_chart(plt, conf, bench, data, only_batches = False): plt.clf() plt.cla() plt.figure(figsize=(100, 24)) - labels = ["Collections"] - collection_events, phase_events_by_thread, batch_events_by_thread = data + if only_batches: + labels = [] + else: + labels = ["Collections"] + if only_batches: + _, _, batch_events_by_thread = data + collection_events = dict() + phase_events_by_thread = dict() + else: + collection_events, phase_events_by_thread, batch_events_by_thread = data values = [] for e in collection_events: @@ -668,7 +683,7 @@ def gc_gantt_chart(plt, conf, bench, data): for thread in sorted(phase_events_by_thread.keys()): end = len(labels) - labels.append("Phases" + str(thread)) + labels.append("Phases " + thread_id_tostring(thread)) raw_values = phase_events_by_thread[thread] for et in phase_event_types: values = [] @@ -682,7 +697,7 @@ def gc_gantt_chart(plt, conf, bench, data): for thread in sorted(batch_events_by_thread.keys()): end = len(labels) - labels.append("Batches" + str(thread)) + labels.append("Batches " + thread_id_tostring(thread)) raw_values = batch_events_by_thread[thread] for et in batch_events_types: values = [] @@ -908,11 +923,17 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench "example_run_" + str(run) + "_" + bench + ".png") if gc_charts: for conf in configurations: + gc_data = gc_events_for_last_n_collections(bench, conf, run) chart_md(md_file, - gc_gantt_chart(plt, conf, bench, gc_events_for_last_n_collections(bench, conf, run)), + gc_gantt_chart(plt, conf, bench, gc_data), rootdir, "example_gc_last_" + "_conf" + str(configurations.index(conf)) + "_" + str( run) + "_" + bench + ".png") + chart_md(md_file, + gc_gantt_chart(plt, conf, bench, gc_data, only_batches=True), + rootdir, + "example_gc_last_batches" + "_conf" + str(configurations.index(conf)) + "_" + str( + run) + "_" + bench + ".png") def any_run_exists(bench, configurations, run): From 58230f12fb66c6f092a7be47c8deedeac7fcbb59 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 11 Jan 2019 18:55:04 +0100 Subject: [PATCH 126/169] scripts for using it as a cronjob --- .gitignore | 4 +++- scripts/start.sh | 8 ++++++++ scripts/stop.sh | 8 ++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100755 scripts/start.sh create mode 100755 scripts/stop.sh diff --git a/.gitignore b/.gitignore index dfc26b9..1b650a7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ bin/.scalafmt* results/ reports/ *.iprof -.idea \ No newline at end of file +.idea +*.pid +jobs.sh \ No newline at end of file diff --git a/scripts/start.sh b/scripts/start.sh new file mode 100755 index 0000000..4b718bf --- /dev/null +++ b/scripts/start.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +echo $$ | tee .pid + +if [ -f jobs.sh ]; then + ./jobs.sh +fi + +rm .pid \ No newline at end of file diff --git a/scripts/stop.sh b/scripts/stop.sh new file mode 100755 index 0000000..71ca1f3 --- /dev/null +++ b/scripts/stop.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +if [ -f .pid ]; then + PID=$(cat .pid) + kill -9 $PID + rm .pid +fi + From a56373d1232dcb813785a24abbc7e7c52c2d0e29 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 11 Jan 2019 20:55:09 +0100 Subject: [PATCH 127/169] write logs --- .gitignore | 3 ++- scripts/start.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 1b650a7..8542310 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ reports/ *.iprof .idea *.pid -jobs.sh \ No newline at end of file +jobs.sh +logs/ \ No newline at end of file diff --git a/scripts/start.sh b/scripts/start.sh index 4b718bf..3c50eca 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -2,7 +2,8 @@ echo $$ | tee .pid if [ -f jobs.sh ]; then - ./jobs.sh + mkdir -p logs + ./jobs.sh | tee logs/job_$(date +%Y%m%d_%H%M%S).log fi rm .pid \ No newline at end of file From 1931fdaff175cea16bd49b6bb79ecfc0f7745344 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 12 Jan 2019 12:05:39 +0100 Subject: [PATCH 128/169] refactor: split compile and directory creation --- scripts/run.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 23d99f1..da398b8 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -321,6 +321,8 @@ def create_symlink(generalized_dir, root_dir): parser.add_argument("--runs", help="number of runs", type=int, default=default_runs) parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) parser.add_argument("--benchmark", help="benchmarks to run", action='append') + parser.add_argument("--argnames", help="compile arguments to set, mark flags with a '?' at the end, split with ','", type=str) + parser.add_argument("--argv", help="argument values, split with ',', booleans as true or false", action='append') parser.add_argument("--size", help="different size settings to use", action='append') parser.add_argument("--gcthreads", help="different number of garbage collection threads to use", action='append') parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) @@ -415,18 +417,13 @@ def create_symlink(generalized_dir, root_dir): for conf in configurations: conf_name, ref = ref_parse(conf) - generalized_dir = os.path.join('results', conf + suffix) if ref == None: sha1 = None - root_dir = generalized_dir else: sha1 = get_ref(ref) if sha1 == None: compile_fail += [conf] continue - root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) - - mkdir(root_dir) if sha1 != None: success = compile_scala_native(ref, sha1) @@ -434,6 +431,15 @@ def create_symlink(generalized_dir, root_dir): compile_fail += [conf] continue + + # derived configurations + generalized_dir = os.path.join('results', conf + suffix) + if sha1 == None: + root_dir = generalized_dir + else: + root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) + + mkdir(root_dir) symlink = None if generalized_dir != root_dir: create_symlink(generalized_dir, root_dir) From 22058d781032469fc30b655ec0363cc4907f6b52 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 12 Jan 2019 12:19:55 +0100 Subject: [PATCH 129/169] empty derived configs --- scripts/run.py | 182 +++++++++++++++++++++++++------------------------ 1 file changed, 92 insertions(+), 90 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index da398b8..6dee51a 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -385,6 +385,8 @@ def create_symlink(generalized_dir, root_dir): should_fetch = True break + derived_configs = [("", dict())] + if should_fetch: git_fetch(scala_native_dir) @@ -431,99 +433,99 @@ def create_symlink(generalized_dir, root_dir): compile_fail += [conf] continue - # derived configurations - generalized_dir = os.path.join('results', conf + suffix) - if sha1 == None: - root_dir = generalized_dir - else: - root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix) - - mkdir(root_dir) - symlink = None - if generalized_dir != root_dir: - create_symlink(generalized_dir, root_dir) - symlinks += [[generalized_dir, root_dir]] - symlink = generalized_dir - if args.upload or args.gitupload: - create_symlink(os.path.join(upload_dir, generalized_dir), root_dir) - - # subconfigurations - for (size, gcThreads) in itertools.product(sizes, gcThreadCounts): - - if size == ["default", "default"] and gcThreads == "default": - subconfig_dir = root_dir + for (der_suffix, extra_args) in derived_configs: + generalized_dir = os.path.join('results', conf + suffix + der_suffix) + if sha1 == None: + root_dir = generalized_dir + der_suffix else: - size_str = [] - if size != ["default", "default"] : - size_str = ["size_" + size[0] + "-" + size[1]] - gcThreads_str = [] - if gcThreads != "default": - gcThreads_str = ["gcthreads_" + gcThreads] - subconf_str = "_".join(size_str + gcThreads_str) - subconfig_dir = os.path.join(root_dir, subconf_str) - - if not args.overwrite and os.path.isfile(os.path.join(subconfig_dir, ".complete")): - print subconfig_dir, "already complete, skipping" - skipped += [subconfig_dir] - continue - - if not args.append: - sh.rmtree(subconfig_dir, ignore_errors=True) - - mkdir(subconfig_dir) - - for bench in benchmarks: - print('--- heap size: {} GC threads: {} conf: {}, bench: {}'.format(size, gcThreads, conf, bench)) - - input = slurp(os.path.join('input', bench)) - output = slurp(os.path.join('output', bench)) - compilecmd = slurp(os.path.join('confs', conf_name, 'compile')) - runcmd = slurp(os.path.join('confs', conf_name, 'run')) \ - .replace('$BENCH', bench) \ - .replace('$HOME', os.environ['HOME']).split(' ') - - if os.path.exists(os.path.join('confs', conf_name, 'build.sbt')): - sh.copyfile(os.path.join('confs', conf_name, 'build.sbt'), 'build.sbt') - else: - os.remove('build.sbt') - - if os.path.exists(os.path.join('confs', conf_name, 'plugins.sbt')): - sh.copyfile(os.path.join('confs', conf_name, 'plugins.sbt'), 'project/plugins.sbt') + root_dir = os.path.join('results', conf + "." + sha1 + "." + suffix + der_suffix) + + mkdir(root_dir) + symlink = None + if generalized_dir != root_dir: + create_symlink(generalized_dir, root_dir) + symlinks += [[generalized_dir, root_dir]] + symlink = generalized_dir + if args.upload or args.gitupload: + create_symlink(os.path.join(upload_dir, generalized_dir), root_dir) + + # subconfigurations + for (size, gcThreads) in itertools.product(sizes, gcThreadCounts): + + if size == ["default", "default"] and gcThreads == "default": + subconfig_dir = root_dir else: - os.remove('project/plugins.sbt') - - compile_success = compile(conf, bench, compilecmd, args.gcdebug, args.gctrace) - if not compile_success: - compile_fail += [conf] - break - - resultsdir = os.path.join(subconfig_dir, bench) - print "results in", resultsdir - mkdir(resultsdir) - - cmd = [] - cmd.extend(runcmd) - cmd.extend([str(batches), str(batch_size), input, output]) - - to_run = [] - for n in xrange(runs): - to_run += [ - dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc, - size=size, gcThreads=gcThreads)] - - if par == 1: - for tr in to_run: - failed += single_run(tr) - else: - failed += sum(pool.map(single_run, to_run), []) - - # mark it as complete - open(os.path.join(subconfig_dir, ".complete"), 'w+').close() - result_dirs += [subconfig_dir] - - if args.upload or args.gitupload: - upload(subconfig_dir, symlink, args.gitupload, args.overwrite) + size_str = [] + if size != ["default", "default"] : + size_str = ["size_" + size[0] + "-" + size[1]] + gcThreads_str = [] + if gcThreads != "default": + gcThreads_str = ["gcthreads_" + gcThreads] + subconf_str = "_".join(size_str + gcThreads_str) + subconfig_dir = os.path.join(root_dir, subconf_str) + + if not args.overwrite and os.path.isfile(os.path.join(subconfig_dir, ".complete")): + print subconfig_dir, "already complete, skipping" + skipped += [subconfig_dir] + continue + + if not args.append: + sh.rmtree(subconfig_dir, ignore_errors=True) + + mkdir(subconfig_dir) + + for bench in benchmarks: + print('--- heap size: {} GC threads: {} conf: {}, bench: {}'.format(size, gcThreads, conf, bench)) + + input = slurp(os.path.join('input', bench)) + output = slurp(os.path.join('output', bench)) + compilecmd = slurp(os.path.join('confs', conf_name, 'compile')) + runcmd = slurp(os.path.join('confs', conf_name, 'run')) \ + .replace('$BENCH', bench) \ + .replace('$HOME', os.environ['HOME']).split(' ') + + if os.path.exists(os.path.join('confs', conf_name, 'build.sbt')): + sh.copyfile(os.path.join('confs', conf_name, 'build.sbt'), 'build.sbt') + else: + os.remove('build.sbt') + + if os.path.exists(os.path.join('confs', conf_name, 'plugins.sbt')): + sh.copyfile(os.path.join('confs', conf_name, 'plugins.sbt'), 'project/plugins.sbt') + else: + os.remove('project/plugins.sbt') + + compile_success = compile(conf, bench, compilecmd, args.gcdebug, args.gctrace) + if not compile_success: + compile_fail += [conf] + break + + resultsdir = os.path.join(subconfig_dir, bench) + print "results in", resultsdir + mkdir(resultsdir) + + cmd = [] + cmd.extend(runcmd) + cmd.extend([str(batches), str(batch_size), input, output]) + + to_run = [] + for n in xrange(runs): + to_run += [ + dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc, + size=size, gcThreads=gcThreads)] + + if par == 1: + for tr in to_run: + failed += single_run(tr) + else: + failed += sum(pool.map(single_run, to_run), []) + + # mark it as complete + open(os.path.join(subconfig_dir, ".complete"), 'w+').close() + result_dirs += [subconfig_dir] + + if args.upload or args.gitupload: + upload(subconfig_dir, symlink, args.gitupload, args.overwrite) print "results:" for dir in result_dirs: From 17897b68df3798fe4c1a62e5db46595eca51953f Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 12 Jan 2019 12:39:53 +0100 Subject: [PATCH 130/169] take extra compile arguments --- scripts/run.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 6dee51a..d5646cf 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -128,7 +128,7 @@ def compile_scala_native(ref, sha1): return False -def compile(conf, bench, compilecmd, debug, trace): +def compile(conf, bench, compilecmd, debug, trace, extra_args): cmd = [sbt, '-no-colors', '-J-Xmx2G', 'clean'] cmd.append('set mainClass in Compile := Some("{}")'.format(bench)) if conf.startswith("scala-native"): @@ -136,6 +136,11 @@ def compile(conf, bench, compilecmd, debug, trace): cmd.append('set nativeCompileOptions ++= Seq("-g", "-DDEBUG_ASSERT")') if trace: cmd.append('set nativeCompileOptions +="-DDEBUG_PRINT"') + for k,v in extra_args.iteritems(): + if k.endswith("?"): + cmd.append('set nativeCompileOptions +="-D{}"'.format(k[:-1])) + else: + cmd.append('set nativeCompileOptions +="-D{}={}"'.format(k,v)) cmd.append(compilecmd) return try_run_silent(cmd) @@ -385,7 +390,22 @@ def create_symlink(generalized_dir, root_dir): should_fetch = True break - derived_configs = [("", dict())] + if args.argnames != None and args.argv != None: + derived_configs = [] + argnames = args.argnames.split(",") + for valset in args.argv : + values = valset.split(",") + suffix = "-".join(values) + extra_args = dict() + for (name, value) in zip(argnames, values): + if name.endswith("?"): + if value in ["1", "true", "TRUE", "True"]: + extra_args[name[:-1]] = True + else: + extra_args[name] = value + derived_configs.append((suffix, extra_args)) + else: + derived_configs = [("", dict())] if should_fetch: git_fetch(scala_native_dir) @@ -495,7 +515,7 @@ def create_symlink(generalized_dir, root_dir): else: os.remove('project/plugins.sbt') - compile_success = compile(conf, bench, compilecmd, args.gcdebug, args.gctrace) + compile_success = compile(conf, bench, compilecmd, args.gcdebug, args.gctrace, extra_args) if not compile_success: compile_fail += [conf] break From 2f88b74d6651cfe172053c01234082d3afbe10fa Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 12 Jan 2019 13:00:21 +0100 Subject: [PATCH 131/169] a for arguments --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index d5646cf..9081169 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -395,7 +395,7 @@ def create_symlink(generalized_dir, root_dir): argnames = args.argnames.split(",") for valset in args.argv : values = valset.split(",") - suffix = "-".join(values) + suffix = "-a" + ("-".join(values)) extra_args = dict() for (name, value) in zip(argnames, values): if name.endswith("?"): From 5d3e36cce2c9860bb61c677e588d4d9c4a289b70 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 14 Jan 2019 18:41:30 +0100 Subject: [PATCH 132/169] customize benchmark inputs --- scripts/run.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 9081169..f290daf 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -203,6 +203,14 @@ def expand_wild_cards(arg): return arg +def benchmark_parse(arg): + parts = arg.split("@") + if len(parts) == 2: + return parts[0], parts[1] + else: + return arg, None + + def ref_parse(arg): parts = arg.split("@") if len(parts) == 3: @@ -349,7 +357,15 @@ def create_symlink(generalized_dir, root_dir): if args.benchmark != None: benchmarks = [] for b in args.benchmark: - benchmarks += filter(lambda s: s.startswith(b), all_benchmarks) + if b == "default": + benchmarks += all_benchmarks + else: + bname, bargs = benchmark_parse(b) + matching = filter(lambda s: s.startswith(bname), all_benchmarks) + if bargs != None: + benchmarks += map(lambda x: (x, bargs), matching) + else: + benchmarks += matching else: benchmarks = all_benchmarks @@ -495,10 +511,16 @@ def create_symlink(generalized_dir, root_dir): mkdir(subconfig_dir) - for bench in benchmarks: - print('--- heap size: {} GC threads: {} conf: {}, bench: {}'.format(size, gcThreads, conf, bench)) + for bconf in benchmarks: + if type(bconf) is tuple: + bench, input = bconf + bfullname = bench + "@" + input + else: + bench = bconf + input = slurp(os.path.join('input', bench)) + bfullname = bench + print('--- heap size: {} GC threads: {} conf: {}, bench: {}'.format(size, gcThreads, conf, bfullname)) - input = slurp(os.path.join('input', bench)) output = slurp(os.path.join('output', bench)) compilecmd = slurp(os.path.join('confs', conf_name, 'compile')) runcmd = slurp(os.path.join('confs', conf_name, 'run')) \ @@ -520,7 +542,7 @@ def create_symlink(generalized_dir, root_dir): compile_fail += [conf] break - resultsdir = os.path.join(subconfig_dir, bench) + resultsdir = os.path.join(subconfig_dir, bfullname) print "results in", resultsdir mkdir(resultsdir) From 34c345c93075d4c472ca13bf7103eaa12477a612 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 14 Jan 2019 19:45:35 +0100 Subject: [PATCH 133/169] histogram benchmark --- input/histogram.Histogram | 1 + output/histogram.Histogram | 1 + scripts/run.py | 1 + src/main/scala/histogram/Histogram.scala | 21 +++++++++++++++++++++ 4 files changed, 24 insertions(+) create mode 100644 input/histogram.Histogram create mode 100644 output/histogram.Histogram create mode 100644 src/main/scala/histogram/Histogram.scala diff --git a/input/histogram.Histogram b/input/histogram.Histogram new file mode 100644 index 0000000..3365996 --- /dev/null +++ b/input/histogram.Histogram @@ -0,0 +1 @@ +300000,100000 \ No newline at end of file diff --git a/output/histogram.Histogram b/output/histogram.Histogram new file mode 100644 index 0000000..f32a580 --- /dev/null +++ b/output/histogram.Histogram @@ -0,0 +1 @@ +true \ No newline at end of file diff --git a/scripts/run.py b/scripts/run.py index f290daf..3cc684a 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -163,6 +163,7 @@ def compile(conf, bench, compilecmd, debug, trace, extra_args): 'mandelbrot.MandelbrotBenchmark', 'nbody.NbodyBenchmark', 'sudoku.SudokuBenchmark', + 'histogram.Histogram', ] stable = 'scala-native-0.3.8' diff --git a/src/main/scala/histogram/Histogram.scala b/src/main/scala/histogram/Histogram.scala new file mode 100644 index 0000000..b5b4e54 --- /dev/null +++ b/src/main/scala/histogram/Histogram.scala @@ -0,0 +1,21 @@ +package histogram + +import scala.util.Random + +object Histogram extends communitybench.Benchmark{ + override def run(input: String): Any = { + val Array(items, k) = input.split(",").map(_.toInt) + var histogram = Map.empty[Int, Int] + val random = new Random(13371337) + (1 to items).foreach { + _ => + val key = random.nextInt(k) + val newValue = histogram.getOrElse(key, 0) + 1 + histogram += key -> newValue + } + histogram.values.sum == items + } + + override def main(args: Array[String]): Unit = + super.main(args) +} From 9b7236cead190d1b03e32b6543160f0276ce7d35 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 14 Jan 2019 21:23:19 +0100 Subject: [PATCH 134/169] kill all subprocesses --- scripts/stop.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/stop.sh b/scripts/stop.sh index 71ca1f3..b6deb96 100755 --- a/scripts/stop.sh +++ b/scripts/stop.sh @@ -2,7 +2,7 @@ if [ -f .pid ]; then PID=$(cat .pid) - kill -9 $PID + kill -9 $(ps -s $PID -o pid=) rm .pid fi From 93db2d80a95599d6251ffec4032f8f4f615b272a Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 14 Jan 2019 21:38:10 +0100 Subject: [PATCH 135/169] use session ids --- scripts/start.sh | 9 +-------- scripts/start0.sh | 9 +++++++++ 2 files changed, 10 insertions(+), 8 deletions(-) create mode 100755 scripts/start0.sh diff --git a/scripts/start.sh b/scripts/start.sh index 3c50eca..d6c4d46 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -1,9 +1,2 @@ #!/usr/bin/env bash -echo $$ | tee .pid - -if [ -f jobs.sh ]; then - mkdir -p logs - ./jobs.sh | tee logs/job_$(date +%Y%m%d_%H%M%S).log -fi - -rm .pid \ No newline at end of file +setsid scripts/start0.sh \ No newline at end of file diff --git a/scripts/start0.sh b/scripts/start0.sh new file mode 100755 index 0000000..3c50eca --- /dev/null +++ b/scripts/start0.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +echo $$ | tee .pid + +if [ -f jobs.sh ]; then + mkdir -p logs + ./jobs.sh | tee logs/job_$(date +%Y%m%d_%H%M%S).log +fi + +rm .pid \ No newline at end of file From e9704105ff37070508e08f3ea4a26cc70c0bbe03 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 16 Jan 2019 14:02:23 +0100 Subject: [PATCH 136/169] do not run histogram by default --- scripts/run.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 3cc684a..b71fbee 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -147,7 +147,7 @@ def compile(conf, bench, compilecmd, debug, trace, extra_args): sbt = where('sbt') -all_benchmarks = [ +default_benchmarks = [ 'bounce.BounceBenchmark', 'list.ListBenchmark', 'richards.RichardsBenchmark', @@ -163,6 +163,9 @@ def compile(conf, bench, compilecmd, debug, trace, extra_args): 'mandelbrot.MandelbrotBenchmark', 'nbody.NbodyBenchmark', 'sudoku.SudokuBenchmark', +] + +all_benchmarks = default_benchmarks + [ 'histogram.Histogram', ] @@ -359,7 +362,7 @@ def create_symlink(generalized_dir, root_dir): benchmarks = [] for b in args.benchmark: if b == "default": - benchmarks += all_benchmarks + benchmarks += default_benchmarks else: bname, bargs = benchmark_parse(b) matching = filter(lambda s: s.startswith(bname), all_benchmarks) @@ -368,7 +371,7 @@ def create_symlink(generalized_dir, root_dir): else: benchmarks += matching else: - benchmarks = all_benchmarks + benchmarks = default_benchmarks if args.size != None: sizes = [] From 32f54989178a56e9ceed3d6611df1920f823b76f Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 17 Jan 2019 07:37:57 +0100 Subject: [PATCH 137/169] do not clip the chart --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index f744b55..36af7e2 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -625,7 +625,7 @@ def percentiles_chart_generic(plt, configurations, bench, get_data, limit): return plt -def percentiles_chart(plt, configurations, bench, limit=99): +def percentiles_chart(plt, configurations, bench, limit=100): plt = percentiles_chart_generic(plt, configurations, bench, config_data, limit) plt.title(bench) plt.ylabel("Run time (ms)") From d48a5e9f928cb6776d4725bb864daa22bc4f4efd Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 19 Jan 2019 19:54:12 +0100 Subject: [PATCH 138/169] handle internal events --- scripts/summary.py | 55 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 36af7e2..28ba486 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -156,7 +156,7 @@ def gc_events_for_last_n_collections(bench, conf, run=3, n=1): try: with open(main_file) as data: header = data.readline().strip() - collection_events, _, _ = parse_events(data, main_file, header) + collection_events, _, _, _ = parse_events(data, main_file, header) except IOError: print "run does not exist", main_file return [], dict(), dict() @@ -170,6 +170,7 @@ def gc_events_for_last_n_collections(bench, conf, run=3, n=1): phase_events_by_thread = dict() batch_events_by_thread = dict() + internal_events_by_thread = dict() for part in parts: try: @@ -177,13 +178,14 @@ def gc_events_for_last_n_collections(bench, conf, run=3, n=1): with open(file) as data: header = data.readline().strip() # no collection events on other threads - _, phase_events_by_thread0, batch_events_by_thread0 = parse_events(data, file, header, time_filter) + _, phase_events_by_thread0, batch_events_by_thread0, internal_events_by_thread0 = parse_events(data, file, header, time_filter) merge_or_create(phase_events_by_thread, phase_events_by_thread0) merge_or_create(batch_events_by_thread, batch_events_by_thread0) + merge_or_create(internal_events_by_thread, internal_events_by_thread0) except IOError: pass - return collection_events, phase_events_by_thread, batch_events_by_thread + return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread def append_or_create(dict, key, value): @@ -195,6 +197,7 @@ def append_or_create(dict, key, value): phase_event_types = ["mark", "sweep", "concmark", "concsweep"] batch_events_types = ["mark_batch", "sweep_batch", "coalesce_batch"] +internal_events_types = ["mark_waiting", "sync"] # event = [type, start, end] @@ -204,6 +207,7 @@ def parse_events(data, file, header, timeFilter=(lambda t: True)): collection_events = [] phase_events_by_thread = dict() batch_events_by_thread = dict() + internal_events_by_thread = dict() event_type_index = 0 start_ns_index = -1 @@ -241,8 +245,10 @@ def parse_events(data, file, header, timeFilter=(lambda t: True)): append_or_create(phase_events_by_thread, thread, [event, start, time]) elif event in batch_events_types: append_or_create(batch_events_by_thread, thread, [event, start, time]) + elif event in internal_events_types: + append_or_create(internal_events_by_thread, thread, [event, start, time]) - return collection_events, phase_events_by_thread, batch_events_by_thread + return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread def gc_stats_total(bench, conf): @@ -659,26 +665,20 @@ def gc_gantt_chart(plt, conf, bench, data, only_batches = False): plt.clf() plt.cla() plt.figure(figsize=(100, 24)) + labels = [] if only_batches: - labels = [] - else: - labels = ["Collections"] - if only_batches: - _, _, batch_events_by_thread = data + _, _, batch_events_by_thread, internal_events_by_thread = data collection_events = dict() phase_events_by_thread = dict() else: - collection_events, phase_events_by_thread, batch_events_by_thread = data + collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread = data values = [] - for e in collection_events: - # [event, start, time] => (start, time) - values.append((e[1], e[2])) - plt.broken_barh(values, (0, 1), color="black", label="collection") event_type_to_color = { "mark": ("red", "darkred"), "sweep": ("blue", "darkblue"), "concmark": ("red", "darkred"), "concsweep": ("blue", "darkblue"), - "mark_batch": ("red", "darkred"), "sweep_batch": ("blue", "darkblue"), "coalesce_batch": ("green", "darkgreen") + "mark_batch": ("red", "darkred"), "sweep_batch": ("blue", "darkblue"), "coalesce_batch": ("green", "darkgreen"), + "mark_waiting": ("grey", "dimgrey"), "sync": ("yellow", "gold"), } for thread in sorted(phase_events_by_thread.keys()): @@ -695,10 +695,18 @@ def gc_gantt_chart(plt, conf, bench, data, only_batches = False): values.append((start, time)) plt.broken_barh(values, (end, 1), facecolors=event_type_to_color[et], label=et) + for e in collection_events: + # [event, start, time] => (start, time) + values.append((e[1], e[2])) + plt.broken_barh(values, (len(labels), 1), color="black", label="collection") + if not only_batches: + labels += ["Collections"] + for thread in sorted(batch_events_by_thread.keys()): end = len(labels) labels.append("Batches " + thread_id_tostring(thread)) raw_values = batch_events_by_thread[thread] + raw_internal_values = internal_events_by_thread[thread] for et in batch_events_types: values = [] for e in raw_values: @@ -707,7 +715,17 @@ def gc_gantt_chart(plt, conf, bench, data, only_batches = False): time = e[2] if event == et: values.append((start, time)) - plt.broken_barh(values, (end, 1), facecolors=event_type_to_color[et], label=et) + plt.broken_barh(values, (end, 0.5), facecolors=event_type_to_color[et], label=et) + for et in internal_events_types: + values = [] + for e in raw_internal_values: + event = e[0] + start = e[1] + time = e[2] + if event == et: + values.append((start, time)) + plt.broken_barh(values, (end + 0.5, 0.5), facecolors=event_type_to_color[et], label=et) + plt.yticks(np.arange(len(labels)), labels) plt.xlabel("Time since start (ms)") @@ -715,7 +733,10 @@ def gc_gantt_chart(plt, conf, bench, data, only_batches = False): plt.legend(handles=[(mpatches.Patch(color='black', label='collection')), (mpatches.Patch(color='red', label='mark')), (mpatches.Patch(color='blue', label='sweep')), - (mpatches.Patch(color='green', label='coalesce'))]) + (mpatches.Patch(color='green', label='coalesce')), + (mpatches.Patch(color='grey', label='mark waiting')), + (mpatches.Patch(color='yellow', label='sync')), + ]) return plt From ed55b3f79555119cafad97e4240cebfa4111f116 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 19 Jan 2019 21:13:43 +0100 Subject: [PATCH 139/169] the cake charts --- scripts/summary.py | 68 ++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 28ba486..5c42748 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -666,12 +666,7 @@ def gc_gantt_chart(plt, conf, bench, data, only_batches = False): plt.cla() plt.figure(figsize=(100, 24)) labels = [] - if only_batches: - _, _, batch_events_by_thread, internal_events_by_thread = data - collection_events = dict() - phase_events_by_thread = dict() - else: - collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread = data + collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread = data values = [] event_type_to_color = { @@ -681,51 +676,54 @@ def gc_gantt_chart(plt, conf, bench, data, only_batches = False): "mark_waiting": ("grey", "dimgrey"), "sync": ("yellow", "gold"), } - for thread in sorted(phase_events_by_thread.keys()): - end = len(labels) - labels.append("Phases " + thread_id_tostring(thread)) - raw_values = phase_events_by_thread[thread] - for et in phase_event_types: - values = [] - for e in raw_values: - event = e[0] - start = e[1] - time = e[2] - if event == et: - values.append((start, time)) - plt.broken_barh(values, (end, 1), facecolors=event_type_to_color[et], label=et) - - for e in collection_events: - # [event, start, time] => (start, time) - values.append((e[1], e[2])) - plt.broken_barh(values, (len(labels), 1), color="black", label="collection") - if not only_batches: - labels += ["Collections"] + all_keys = phase_events_by_thread.keys() + batch_events_by_thread.keys() + internal_events_by_thread.keys() + all_threads = sorted(list(set(all_keys))) - for thread in sorted(batch_events_by_thread.keys()): + for thread in all_threads: end = len(labels) - labels.append("Batches " + thread_id_tostring(thread)) - raw_values = batch_events_by_thread[thread] - raw_internal_values = internal_events_by_thread[thread] + labels.append(thread_id_tostring(thread)) + phase_values = phase_events_by_thread.get(thread, []) + batch_values = batch_events_by_thread.get(thread, []) + internal_values = internal_events_by_thread.get(thread, []) + if not only_batches: + values = [] + for e in collection_events: + # [event, start, time] => (start, time) + values.append((e[1], e[2])) + plt.broken_barh(values, (end, 0.25), color="black", label="collection") + for et in phase_event_types: + values = [] + for e in phase_values: + event = e[0] + start = e[1] + time = e[2] + if event == et: + values.append((start, time)) + plt.broken_barh(values, (end + 0.25, 0.25), facecolors=event_type_to_color[et], label=et) for et in batch_events_types: values = [] - for e in raw_values: + for e in batch_values: event = e[0] start = e[1] time = e[2] if event == et: values.append((start, time)) - plt.broken_barh(values, (end, 0.5), facecolors=event_type_to_color[et], label=et) + if only_batches: + plt.broken_barh(values, (end, 0.5), facecolors=event_type_to_color[et], label=et) + else: + plt.broken_barh(values, (end + 0.50, 0.25), facecolors=event_type_to_color[et], label=et) for et in internal_events_types: values = [] - for e in raw_internal_values: + for e in internal_values: event = e[0] start = e[1] time = e[2] if event == et: values.append((start, time)) - plt.broken_barh(values, (end + 0.5, 0.5), facecolors=event_type_to_color[et], label=et) - + if only_batches: + plt.broken_barh(values, (end + 0.5, 0.5), facecolors=event_type_to_color[et], label=et) + else: + plt.broken_barh(values, (end + 0.75, 0.25), facecolors=event_type_to_color[et], label=et) plt.yticks(np.arange(len(labels)), labels) plt.xlabel("Time since start (ms)") From 56b636a028baaf289f3147858ea1551d066f1770 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 19 Jan 2019 21:46:53 +0100 Subject: [PATCH 140/169] move mark_waiting one level lower --- scripts/summary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 5c42748..d0c6095 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -196,8 +196,8 @@ def append_or_create(dict, key, value): phase_event_types = ["mark", "sweep", "concmark", "concsweep"] -batch_events_types = ["mark_batch", "sweep_batch", "coalesce_batch"] -internal_events_types = ["mark_waiting", "sync"] +batch_events_types = ["mark_batch", "sweep_batch", "coalesce_batch", "mark_waiting"] +internal_events_types = ["sync"] # event = [type, start, end] From 7c5bded1477c2d22b9db10ad6fd6035619ac8a52 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 20 Jan 2019 15:04:46 +0100 Subject: [PATCH 141/169] move mark_waiting one level lower --- scripts/summary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index d0c6095..aefe872 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -159,11 +159,11 @@ def gc_events_for_last_n_collections(bench, conf, run=3, n=1): collection_events, _, _, _ = parse_events(data, main_file, header) except IOError: print "run does not exist", main_file - return [], dict(), dict() + return [], dict(), dict(), dict() collection_events = collection_events[-n:] if len(collection_events) == 0: - return [], dict(), dict() + return [], dict(), dict(), dict() min_time = collection_events[0][1] time_filter = (lambda t: t > min_time) From e11515606032dda8db5ae34cdbd676c1d9a53d47 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 23 Jan 2019 20:30:55 +0100 Subject: [PATCH 142/169] support different levels of GC stat verbosity --- scripts/run.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index b71fbee..d1b8d83 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -128,7 +128,7 @@ def compile_scala_native(ref, sha1): return False -def compile(conf, bench, compilecmd, debug, trace, extra_args): +def compile(conf, bench, compilecmd, gcstats, debug, trace, extra_args): cmd = [sbt, '-no-colors', '-J-Xmx2G', 'clean'] cmd.append('set mainClass in Compile := Some("{}")'.format(bench)) if conf.startswith("scala-native"): @@ -136,6 +136,8 @@ def compile(conf, bench, compilecmd, debug, trace, extra_args): cmd.append('set nativeCompileOptions ++= Seq("-g", "-DDEBUG_ASSERT")') if trace: cmd.append('set nativeCompileOptions +="-DDEBUG_PRINT"') + if gcstats != None: + cmd.append('set nativeCompileOptions +="-DENABLE_GC_STATS{}"'.format(gcstats)) for k,v in extra_args.iteritems(): if k.endswith("?"): cmd.append('set nativeCompileOptions +="-D{}"'.format(k[:-1])) @@ -257,7 +259,7 @@ def single_run(to_run): print('--- run {}/{}'.format(n, runs)) my_env = os.environ.copy() - if gcstats: + if gcstats != None: my_env["SCALANATIVE_STATS_FILE"] = os.path.join(resultsdir, str(n) + ".gc.csv") if minsize != "default": @@ -344,6 +346,8 @@ def create_symlink(generalized_dir, root_dir): parser.add_argument("--gcthreads", help="different number of garbage collection threads to use", action='append') parser.add_argument("--par", help="number of parallel processes", type=int, default=default_par) parser.add_argument("--gc", help="gather gc statistics", action="store_true") + parser.add_argument("--gcv", help="gather gc statistics verbose - batches", action="store_true") + parser.add_argument("--gcvv", help="gather gc statistics very verbose - sync events", action="store_true") parser.add_argument("--upload", help="copy the results to ../scala-native-benchmark-results", action="store_true") parser.add_argument("--gitupload", help="copy the results to ../scala-native-benchmark-results and commit and push to git", action="store_true") parser.add_argument("--overwrite", help="overwrite old results", action="store_true") @@ -437,8 +441,20 @@ def create_symlink(generalized_dir, root_dir): suffix += "-b" + str(batches) if par != default_par: suffix += "-p" + str(par) - if args.gc: + + + if args.gcvv: + suffix += "-gcvv" + gcstats = "_SYNC" + elif args.gcv: + suffix += "-gcv" + gcstats = "_BATCHES" + elif args.gc: suffix += "-gc" + gcstats = "" + else: + gcstats = None + if args.gcdebug: suffix += "-gcdebug" if args.gctrace: @@ -541,7 +557,7 @@ def create_symlink(generalized_dir, root_dir): else: os.remove('project/plugins.sbt') - compile_success = compile(conf, bench, compilecmd, args.gcdebug, args.gctrace, extra_args) + compile_success = compile(conf, bench, compilecmd, gcstats, args.gcdebug, args.gctrace, extra_args) if not compile_success: compile_fail += [conf] break @@ -557,7 +573,7 @@ def create_symlink(generalized_dir, root_dir): to_run = [] for n in xrange(runs): to_run += [ - dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=args.gc, + dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=gcstats, size=size, gcThreads=gcThreads)] if par == 1: From 3d5bd0246c99ece1153f86255d4475bf16707191 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 23 Jan 2019 22:02:57 +0100 Subject: [PATCH 143/169] records perf data --- scripts/run.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index d1b8d83..4d2120e 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -256,6 +256,7 @@ def single_run(to_run): minsize = to_run["size"][0] maxsize = to_run["size"][1] gcThreads = to_run["gcThreads"] + perf = to_run["perf"] print('--- run {}/{}'.format(n, runs)) my_env = os.environ.copy() @@ -277,7 +278,13 @@ def single_run(to_run): elif "SCALANATIVE_GC_THREADS" in my_env: del my_env["SCALANATIVE_GC_THREADS"] - cmd = [] + if perf == "sudo": + myuser = os.environ.get('USER') + cmd = ["sudo","perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "25000", "--", "sudo" , "-u", str(myuser)] + elif perf == "normal": + cmd = ["perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "25000", "--"] + else: + cmd = [] for token in unexpanded_cmd: if token == "$JAVA_ARGS": if minsize != "default": @@ -337,6 +344,8 @@ def create_symlink(generalized_dir, root_dir): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--suffix", help="suffix added to results") + parser.add_argument("--perf", help="records perf data", action="store_true") + parser.add_argument("--sperf", help="records perf data using sudo rights", action="store_true") parser.add_argument("--runs", help="number of runs", type=int, default=default_runs) parser.add_argument("--batches", help="number of batches per run", type=int, default=default_batches) parser.add_argument("--benchmark", help="benchmarks to run", action='append') @@ -442,6 +451,12 @@ def create_symlink(generalized_dir, root_dir): if par != default_par: suffix += "-p" + str(par) + if args.sperf: + perf = "sudo" + suffix +="-Perf" + elif args.perf: + perf = "normal" + suffix +="-Perf" if args.gcvv: suffix += "-gcvv" @@ -574,7 +589,7 @@ def create_symlink(generalized_dir, root_dir): for n in xrange(runs): to_run += [ dict(runs=runs, cmd=cmd, resultsdir=resultsdir, conf=conf, bench=bench, n=n, gcstats=gcstats, - size=size, gcThreads=gcThreads)] + size=size, gcThreads=gcThreads, perf = perf)] if par == 1: for tr in to_run: From e8baaedff8b8cac088d02963dc60c83f6b1e7a1b Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 24 Jan 2019 23:05:46 +0100 Subject: [PATCH 144/169] just warn about bad lines --- scripts/summary.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index aefe872..1b23ab9 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -24,8 +24,11 @@ def config_data(bench, conf): points = [] with open(os.path.join("results", conf, bench, run)) as data: for line in data.readlines(): - # in ms - points.append(float(line) / 1000000) + try: + # in ms + points.append(float(line) / 1000000) + except Exception as e: + print e # take only last 1000 to account for startup out += points[-1000:] except IOError: @@ -83,13 +86,16 @@ def parse_gc_pause_events(data, file, header): for line in data.readlines(): arr = line.split(",") event = arr[event_type_index] - time = float(arr[time_ns_index]) / ns_to_ms_div - if event == "mark": - mark_times.append(time) - elif event == "sweep": - sweep_times.append(time) - if event == "mark" or event == "sweep": - gc_times.append(time) + try: + time = float(arr[time_ns_index]) / ns_to_ms_div + if event == "mark": + mark_times.append(time) + elif event == "sweep": + sweep_times.append(time) + if event == "mark" or event == "sweep": + gc_times.append(time) + except Exception as e: + print e return mark_times, sweep_times, gc_times @@ -497,7 +503,10 @@ def example_run_plot(plt, configurations, bench, run=3): try: with open('results/{}/{}/{}'.format(conf, bench, run)) as data: for line in data.readlines(): - points.append(float(line) / 1000000) + try: + points.append(float(line) / 1000000) + except Exception as e: + print e except IOError: pass ind = np.arange(len(points)) From a5d6b3d83fe6fd3d7395a21b66e7c7eead015cef Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 24 Jan 2019 23:31:10 +0100 Subject: [PATCH 145/169] fix perf option --- scripts/run.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/run.py b/scripts/run.py index 4d2120e..9200a82 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -457,6 +457,8 @@ def create_symlink(generalized_dir, root_dir): elif args.perf: perf = "normal" suffix +="-Perf" + else: + perf = None if args.gcvv: suffix += "-gcvv" From 9a11c957950143df28d0c802884d2e9be8287d21 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 25 Jan 2019 12:20:17 +0100 Subject: [PATCH 146/169] perf reports make no sense without the binary --- scripts/run.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/run.py b/scripts/run.py index 9200a82..5148cff 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -583,6 +583,13 @@ def create_symlink(generalized_dir, root_dir): print "results in", resultsdir mkdir(resultsdir) + if conf.startswith("scala-native") and perf in ["sudo", "normal"]: + # perf needs the original binary for the reports to make any sense + runnable = runcmd[0] + saved_binary = os.path.join(resultsdir, "binary") + sh.copyfile(runnable, saved_binary) + runcmd = [saved_binary] + runcmd[1:] + cmd = [] cmd.extend(runcmd) cmd.extend([str(batches), str(batch_size), input, output]) From c656f7604597d22d5650f7a98794ac09aab3b753 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 25 Jan 2019 13:00:57 +0100 Subject: [PATCH 147/169] make it executable --- scripts/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/run.py b/scripts/run.py index 5148cff..50a3069 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -588,6 +588,7 @@ def create_symlink(generalized_dir, root_dir): runnable = runcmd[0] saved_binary = os.path.join(resultsdir, "binary") sh.copyfile(runnable, saved_binary) + os.chmod(saved_binary, 0775) runcmd = [saved_binary] + runcmd[1:] cmd = [] From 29fd1a6126ab9874e90af85529f587d8ccc3ef58 Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 25 Jan 2019 14:26:02 +0100 Subject: [PATCH 148/169] lower sample rate due to failures --- scripts/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 50a3069..30d67ef 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -280,9 +280,9 @@ def single_run(to_run): if perf == "sudo": myuser = os.environ.get('USER') - cmd = ["sudo","perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "25000", "--", "sudo" , "-u", str(myuser)] + cmd = ["sudo","perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "15000", "--", "sudo" , "-u", str(myuser)] elif perf == "normal": - cmd = ["perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "25000", "--"] + cmd = ["perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "15000", "--"] else: cmd = [] for token in unexpanded_cmd: From 740d7a89b92ee5bd9b3689863bffa89a97304c1f Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 25 Jan 2019 15:57:09 +0100 Subject: [PATCH 149/169] preserve the environment --- scripts/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 30d67ef..1246fbb 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -280,7 +280,7 @@ def single_run(to_run): if perf == "sudo": myuser = os.environ.get('USER') - cmd = ["sudo","perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "15000", "--", "sudo" , "-u", str(myuser)] + cmd = ["sudo", "-E", "perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "15000", "--", "sudo", "-E", "-u", str(myuser)] elif perf == "normal": cmd = ["perf", "record", "-o", os.path.join(resultsdir, str(n) + ".perf"), "-g", "-F", "15000", "--"] else: From 2f4fc488d95c489c888a759747af7c39f561ae55 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 26 Jan 2019 16:58:59 +0100 Subject: [PATCH 150/169] handle collection events from other threads --- scripts/summary.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 1b23ab9..18d7591 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -153,19 +153,21 @@ def gc_events_for_last_n_collections(bench, conf, run=3, n=1): benchmark_dir = os.path.join("results", conf, bench) files = next(os.walk(benchmark_dir), [[], [], []])[2] main_file_name = str(run) + ".gc.csv" - main_file = os.path.join("results", conf, bench, main_file_name) parts = [] for file in files: if file.startswith(main_file_name): parts.append(file) - try: - with open(main_file) as data: - header = data.readline().strip() - collection_events, _, _, _ = parse_events(data, main_file, header) - except IOError: - print "run does not exist", main_file - return [], dict(), dict(), dict() + collection_events = [] + for part in parts: + try: + file = os.path.join("results", conf, bench, part) + with open(file) as data: + header = data.readline().strip() + collection_events0, _, _, _ = parse_events(data, file, header) + collection_events += collection_events0 + except IOError: + pass collection_events = collection_events[-n:] if len(collection_events) == 0: From aeeea2fe739706bdc649d7cdcfb98da22739321f Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 28 Jan 2019 14:00:04 +0100 Subject: [PATCH 151/169] zoom in into last 1000 runs --- scripts/summary.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 18d7591..7df1f13 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -496,22 +496,29 @@ def bar_chart_gc_absolute(plt, configurations, benchmarks, percentile): return plt -def example_run_plot(plt, configurations, bench, run=3): +def example_run_plot(plt, configurations, bench, run=3, lastn=-1): plt.clf() plt.cla() for conf in configurations: - points = [] + rawpoints = [] try: with open('results/{}/{}/{}'.format(conf, bench, run)) as data: for line in data.readlines(): try: - points.append(float(line) / 1000000) + rawpoints.append(float(line) / 1000000) except Exception as e: print e except IOError: pass - ind = np.arange(len(points)) + + total_len = len(rawpoints) + if lastn != -1: + first = total_len - lastn + else: + first = 0 + ind = np.arange(first, total_len) + points = rawpoints[first:] plt.plot(ind, points, label=conf) plt.title("{} run #{}".format(bench, str(run))) plt.xlabel("Iteration") @@ -949,8 +956,10 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench run -= 1 if run >= 0: + chart_md(md_file, example_run_plot(plt, configurations, bench, run, 1000), rootdir, + "example_run_last1000_" + str(run) + "_" + bench + ".png") chart_md(md_file, example_run_plot(plt, configurations, bench, run), rootdir, - "example_run_" + str(run) + "_" + bench + ".png") + "example_run_full_" + str(run) + "_" + bench + ".png") if gc_charts: for conf in configurations: gc_data = gc_events_for_last_n_collections(bench, conf, run) From 52a113749ec3cf883bb0933ebc87ab3cb9d15dd1 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 28 Jan 2019 14:56:07 +0100 Subject: [PATCH 152/169] higher resolution --- scripts/summary.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 7df1f13..2037a22 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -640,7 +640,8 @@ def percentiles_chart_generic(plt, configurations, bench, get_data, limit): for conf in configurations: data = get_data(bench, conf) if data.size > 0: - percentiles = np.arange(0, limit) + step = 0.1 + percentiles = np.arange(0, limit + step, step) percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) plt.plot(percentiles, percvalue, label=conf) plt.legend() @@ -891,7 +892,7 @@ def chart_md(md_file, plt, rootdir, name): def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=False, size_charts=False): - interesting_percentiles = [50, 90, 99] + interesting_percentiles = [50, 90, 99, 99.9] md_file.write("# Summary\n") for p in interesting_percentiles: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) From 8a0d50c15b833c33a4aa7fc8387ee40ad2634859 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 28 Jan 2019 22:23:44 +0100 Subject: [PATCH 153/169] zoomed in at 80+ percentile --- scripts/summary.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 2037a22..c701e1f 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -634,14 +634,14 @@ def size_compare_chart_gc_sweep(plt, parent_configurations, bench, p): return plt -def percentiles_chart_generic(plt, configurations, bench, get_data, limit): +def percentiles_chart_generic(plt, configurations, bench, get_data, first , last): plt.clf() plt.cla() for conf in configurations: data = get_data(bench, conf) if data.size > 0: step = 0.1 - percentiles = np.arange(0, limit + step, step) + percentiles = np.arange(first, last + step, step) percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) plt.plot(percentiles, percvalue, label=conf) plt.legend() @@ -650,15 +650,15 @@ def percentiles_chart_generic(plt, configurations, bench, get_data, limit): return plt -def percentiles_chart(plt, configurations, bench, limit=100): - plt = percentiles_chart_generic(plt, configurations, bench, config_data, limit) +def percentiles_chart(plt, configurations, bench, first=0, last=100): + plt = percentiles_chart_generic(plt, configurations, bench, config_data, first, last) plt.title(bench) plt.ylabel("Run time (ms)") return plt -def gc_pause_time_chart(plt, configurations, bench, limit=100): - plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_total, limit) +def gc_pause_time_chart(plt, configurations, bench, first=0, last=100): + plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_total, first, last) plt.title(bench + ": Garbage Collector Pause Times") plt.ylabel("GC pause time (ms)") return plt @@ -931,9 +931,12 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench md_file.write("\n") chart_md(md_file, percentiles_chart(plt, configurations, bench), rootdir, "percentile_" + bench + ".png") + chart_md(md_file, percentiles_chart(plt, configurations, bench, first=80), rootdir, "percentile_80plus_" + bench + ".png") if gc_charts: chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") + chart_md(md_file, gc_pause_time_chart(plt, configurations, bench, first=80), rootdir, + "gc_pause_times_80plus_" + bench + ".png") if size_charts: for p in interesting_percentiles: chart_md(md_file, size_compare_chart_gc_mark(plt, parent_configurations, bench, p), rootdir, From 1eafdfe7101538bc117e87be20d673dd3c0eed4e Mon Sep 17 00:00:00 2001 From: Valdis Date: Fri, 1 Feb 2019 20:39:48 +0100 Subject: [PATCH 154/169] higher precision for smaller range for high percentiles --- scripts/summary.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index c701e1f..b97f157 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -634,13 +634,12 @@ def size_compare_chart_gc_sweep(plt, parent_configurations, bench, p): return plt -def percentiles_chart_generic(plt, configurations, bench, get_data, first , last): +def percentiles_chart_generic(plt, configurations, bench, get_data, first, last, step): plt.clf() plt.cla() for conf in configurations: data = get_data(bench, conf) if data.size > 0: - step = 0.1 percentiles = np.arange(first, last + step, step) percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) plt.plot(percentiles, percvalue, label=conf) @@ -650,15 +649,15 @@ def percentiles_chart_generic(plt, configurations, bench, get_data, first , last return plt -def percentiles_chart(plt, configurations, bench, first=0, last=100): - plt = percentiles_chart_generic(plt, configurations, bench, config_data, first, last) +def percentiles_chart(plt, configurations, bench, first=0, last=100, step=0.1): + plt = percentiles_chart_generic(plt, configurations, bench, config_data, first, last, step) plt.title(bench) plt.ylabel("Run time (ms)") return plt -def gc_pause_time_chart(plt, configurations, bench, first=0, last=100): - plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_total, first, last) +def gc_pause_time_chart(plt, configurations, bench, first=0, last=100, step=0.1): + plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_total, first, last, step) plt.title(bench + ": Garbage Collector Pause Times") plt.ylabel("GC pause time (ms)") return plt @@ -931,11 +930,11 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench md_file.write("\n") chart_md(md_file, percentiles_chart(plt, configurations, bench), rootdir, "percentile_" + bench + ".png") - chart_md(md_file, percentiles_chart(plt, configurations, bench, first=80), rootdir, "percentile_80plus_" + bench + ".png") + chart_md(md_file, percentiles_chart(plt, configurations, bench, first=95, step=0.01), rootdir, "percentile_80plus_" + bench + ".png") if gc_charts: chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") - chart_md(md_file, gc_pause_time_chart(plt, configurations, bench, first=80), rootdir, + chart_md(md_file, gc_pause_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir, "gc_pause_times_80plus_" + bench + ".png") if size_charts: for p in interesting_percentiles: From 79996163fb810640773f804ebdf0da7f2863143f Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 6 Feb 2019 13:34:48 +0100 Subject: [PATCH 155/169] introduce warmup --- scripts/summary.py | 47 ++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index b97f157..36e267b 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -9,7 +9,7 @@ import argparse -def config_data(bench, conf): +def config_data(bench, conf, warmup): benchmark_dir = os.path.join("results", conf, bench) files = next(os.walk(benchmark_dir), [[], [], []])[2] runs = [] @@ -29,8 +29,7 @@ def config_data(bench, conf): points.append(float(line) / 1000000) except Exception as e: print e - # take only last 1000 to account for startup - out += points[-1000:] + out += points[warmup:] except IOError: pass return np.array(out) @@ -338,35 +337,35 @@ def percentile_gc_bench_total(configurations, bench, p): return total -def percentile(configurations, benchmarks, p): +def percentile(configurations, benchmarks, warmup, p): out = [] for bench in benchmarks: - out.append(percentile_bench(configurations, bench, p)) + out.append(percentile_bench(configurations, warmup, bench, p)) return out -def percentile_bench(configurations, bench, p): +def percentile_bench(configurations, bench, warmup, p): res = [] for conf in configurations: try: - res.append(np.percentile(config_data(bench, conf), p)) + res.append(np.percentile(config_data(bench, conf, warmup), p)) except IndexError: res.append(0) return res -def totals(configurations, benchmarks): +def totals(configurations, benchmarks, warmup): out = [] for bench in benchmarks: - out.append(totals_bench(configurations, bench)) + out.append(totals_bench(configurations, bench, warmup)) return out -def totals_bench(configurations, bench): +def totals_bench(configurations, bench, warmup): res = [] for conf in configurations: try: - res.append(np.sum(config_data(bench, conf))) + res.append(np.sum(config_data(bench, conf, warmup))) except IndexError: res.append(0) return res @@ -606,8 +605,8 @@ def size_compare_chart_gc_combined(plt, parent_configurations, bench): return plt -def size_compare_chart(plt, parent_configurations, bench, p): - plt = size_compare_chart_generic(plt, parent_configurations, bench, percentile_bench, p) +def size_compare_chart(plt, parent_configurations, bench, warmup, p): + plt = size_compare_chart_generic(plt, parent_configurations, bench, lambda configurations, benchmark, p: percentile_bench(configurations, benchmark, warmup, p), p) plt.title("{} at {} percentile".format(bench, p)) plt.ylabel("Run time (ms)") return plt @@ -649,8 +648,8 @@ def percentiles_chart_generic(plt, configurations, bench, get_data, first, last, return plt -def percentiles_chart(plt, configurations, bench, first=0, last=100, step=0.1): - plt = percentiles_chart_generic(plt, configurations, bench, config_data, first, last, step) +def percentiles_chart(plt, configurations, bench, warmup, first=0, last=100, step=0.1): + plt = percentiles_chart_generic(plt, configurations, bench, lambda bench, conf : config_data(bench, conf, warmup), first, last, step) plt.title(bench) plt.ylabel("Run time (ms)") return plt @@ -889,19 +888,19 @@ def chart_md(md_file, plt, rootdir, name): md_file.write("![Chart]({})\n\n".format(name)) -def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, gc_charts=False, +def write_md_file(rootdir, md_file, parent_configurations, configurations, benchmarks, warmup, gc_charts=False, size_charts=False): interesting_percentiles = [50, 90, 99, 99.9] md_file.write("# Summary\n") for p in interesting_percentiles: md_file.write("## Benchmark run time (ms) at {} percentile \n".format(p)) - data = percentile(configurations, benchmarks, p) + data = percentile(configurations, benchmarks, warmup, p) chart_md(md_file, relative_execution_times(plt, configurations, benchmarks, data, p), rootdir, "relative_percentile_" + str(p) + ".png") write_md_table(md_file, configurations, benchmarks, data) md_file.write("## Benchmark total run time (ms) \n") - data = totals(configurations, benchmarks) + data = totals(configurations, benchmarks, warmup) chart_md(md_file, total_execution_times(plt, configurations, benchmarks, data), rootdir, "relative_total.png") write_md_table(md_file, configurations, benchmarks, data) @@ -929,8 +928,8 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench md_file.write(bench) md_file.write("\n") - chart_md(md_file, percentiles_chart(plt, configurations, bench), rootdir, "percentile_" + bench + ".png") - chart_md(md_file, percentiles_chart(plt, configurations, bench, first=95, step=0.01), rootdir, "percentile_80plus_" + bench + ".png") + chart_md(md_file, percentiles_chart(plt, configurations, bench, warmup), rootdir, "percentile_" + bench + ".png") + chart_md(md_file, percentiles_chart(plt, configurations, bench, warmup, first=95, step=0.01), rootdir, "percentile_80plus_" + bench + ".png") if gc_charts: chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") @@ -951,7 +950,7 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench if size_charts: for p in interesting_percentiles: - chart_md(md_file, size_compare_chart(plt, parent_configurations, bench, p), rootdir, + chart_md(md_file, size_compare_chart(plt, parent_configurations, bench, warmup, p), rootdir, "size_chart_" + bench + "percentile_" + str(p) + ".png") run = 3 @@ -1008,6 +1007,9 @@ def is_subconfig(subconf): return subconf.startswith("size_") or subconf.startswith("gcthreads_") +default_warmup = 2000 + + if __name__ == '__main__': all_configs = next(os.walk("results"))[1] # added subconfigurations @@ -1024,6 +1026,7 @@ def is_subconfig(subconf): parser.add_argument("--comment", help="comment at the suffix of the report name") parser.add_argument("--gc", help="enable charts about garbage collector", action="store_true") parser.add_argument("--vssize", help="enable charts against heap size", action="store_true") + parser.add_argument("--warmup", help="number of iterations to skip before calculating percentiles", type=int, default=default_warmup) parser.add_argument("--benchmark", help="benchmarks to use in comparision", action='append') parser.add_argument("comparisons", nargs='*', choices=results + ["all"], default="all") @@ -1064,6 +1067,6 @@ def is_subconfig(subconf): plt.rcParams["font.size"] = 20.0 mkdir(report_dir) with open(os.path.join(report_dir, "Readme.md"), 'w+') as md_file: - write_md_file(report_dir, md_file, parent_configurations, configurations, benchmarks, args.gc, args.vssize) + write_md_file(report_dir, md_file, parent_configurations, configurations, benchmarks, args.warmup, args.gc, args.vssize) print report_dir From 78427025fc0899d4934de210f1389853e1d271f5 Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 6 Feb 2019 13:40:22 +0100 Subject: [PATCH 156/169] do not use xwindows when generating reports --- scripts/summary.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 36e267b..0b83875 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1,10 +1,12 @@ #!/usr/bin/env python2 from run import mkdir, expand_wild_cards, generate_choices -import numpy as np -import time +import matplotlib +matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.patches as mpatches +import numpy as np +import time import os import argparse From 147a3bcfb5f26b89f0b24c35e29142db5d0b99bd Mon Sep 17 00:00:00 2001 From: Valdis Date: Wed, 6 Feb 2019 17:50:56 +0100 Subject: [PATCH 157/169] deal with numpy handling integer and floating point ranges differently --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index b97f157..528c1c5 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -640,7 +640,7 @@ def percentiles_chart_generic(plt, configurations, bench, get_data, first, last, for conf in configurations: data = get_data(bench, conf) if data.size > 0: - percentiles = np.arange(first, last + step, step) + percentiles = filter(lambda x: 0 <= x <= 100, np.arange(first, last + step, step)) percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) plt.plot(percentiles, percvalue, label=conf) plt.legend() From fec83f10fe09d2f8208dbd168f9573fffd42f834 Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 7 Feb 2019 12:08:18 +0100 Subject: [PATCH 158/169] mark and sweep batch charts --- scripts/summary.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/scripts/summary.py b/scripts/summary.py index 528c1c5..89f6dbe 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -259,11 +259,81 @@ def parse_events(data, file, header, timeFilter=(lambda t: True)): return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread +def parse_batch_times(data, file, header): + + mark_batches = [] + sweep_batches = [] + + event_type_index = 0 + start_ns_index = -1 + time_ns_index = -1 + thread_index = -1 + ns_to_ms_div = 1000 * 1000 + for i, h in enumerate(header.split(',')): + if h == "start_ns": + start_ns_index = i + if h == "time_ns": + time_ns_index = i + if h == "gc_thread": + thread_index = i + + if start_ns_index == -1: + print "Header does not have start_ns", header, "at", file + if time_ns_index == -1: + print "Header does not have time_ns", header, "at", file + if thread_index == -1: + print "Header does not have gc_thread", header, "at", file + if start_ns_index == -1 or time_ns_index == -1 or thread_index == -1: + return mark_batches, sweep_batches + + for line in data.readlines(): + arr = line.split(",") + event = arr[event_type_index] + time = float(arr[time_ns_index]) / ns_to_ms_div + if event == "mark_batch": + mark_batches.append(time) + elif event == "sweep_batch": + sweep_batches.append(time) + + return mark_batches, sweep_batches + + +def gc_batch_times(bench, conf): + benchmark_dir = os.path.join("results", conf, bench) + files = next(os.walk(benchmark_dir), [[], [], []])[2] + parts = [] + for file in files: + if ".gc.csv" in file: + parts.append(file) + sweep_batches = [] + mark_batches = [] + for part in parts: + try: + file = os.path.join("results", conf, bench, part) + with open(file) as data: + header = data.readline().strip() + mark_batches0, sweep_batches0 = parse_batch_times(data, file, header) + mark_batches += mark_batches0 + sweep_batches += sweep_batches0 + except IOError: + pass + + return mark_batches, sweep_batches + + def gc_stats_total(bench, conf): _, _, total = gc_pauses_main_thread(bench, conf) return total +def gc_stats_mark_batches(bench, conf): + mark,_ = gc_batch_times(bench, conf) + return mark + +def gc_stats_sweep_batches(bench, conf): + _, sweep = gc_batch_times(bench, conf) + return sweep + def percentile_gc(configurations, benchmarks, percentile): out_mark = [] out_sweep = [] @@ -663,6 +733,19 @@ def gc_pause_time_chart(plt, configurations, bench, first=0, last=100, step=0.1) return plt +def gc_mark_batch_time_chart(plt, configurations, bench, first=0, last=100, step=0.1): + plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_mark_batches, first, last, step) + plt.title(bench + ": Mark Batch Times") + plt.ylabel("Mark Batch Time (ms)") + return plt + +def gc_sweep_batch_time_chart(plt, configurations, bench, first=0, last=100, step=0.1): + plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_sweep_batches, first, last, step) + plt.title(bench + ": Sweep Batch Times") + plt.ylabel("Sweep Batch Time (ms)") + return plt + + def print_table(configurations, benchmarks, data): leading = ['name'] for conf in configurations: @@ -936,6 +1019,10 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench "gc_pause_times_" + bench + ".png") chart_md(md_file, gc_pause_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir, "gc_pause_times_80plus_" + bench + ".png") + chart_md(md_file, gc_mark_batch_time_chart(plt, configurations, bench), rootdir, + "gc_mark_batches_" + bench + ".png") + chart_md(md_file, gc_sweep_batch_time_chart(plt, configurations, bench), rootdir, + "gc_sweep_batches_" + bench + ".png") if size_charts: for p in interesting_percentiles: chart_md(md_file, size_compare_chart_gc_mark(plt, parent_configurations, bench, p), rootdir, From a09313750c7df15c7143b86d4f25385a005c16de Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 7 Feb 2019 12:27:30 +0100 Subject: [PATCH 159/169] they supposed to be np arrays --- scripts/summary.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 89f6dbe..56c8792 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -328,11 +328,13 @@ def gc_stats_total(bench, conf): def gc_stats_mark_batches(bench, conf): mark,_ = gc_batch_times(bench, conf) - return mark + return np.array(mark) + def gc_stats_sweep_batches(bench, conf): _, sweep = gc_batch_times(bench, conf) - return sweep + return np.array(sweep) + def percentile_gc(configurations, benchmarks, percentile): out_mark = [] From d717bce69f9906f79581d4e0351f33dcb1be5209 Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 7 Feb 2019 17:10:10 +0100 Subject: [PATCH 160/169] zoom in --- scripts/summary.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 56c8792..36e66a8 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1015,16 +1015,20 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench md_file.write("\n") chart_md(md_file, percentiles_chart(plt, configurations, bench), rootdir, "percentile_" + bench + ".png") - chart_md(md_file, percentiles_chart(plt, configurations, bench, first=95, step=0.01), rootdir, "percentile_80plus_" + bench + ".png") + chart_md(md_file, percentiles_chart(plt, configurations, bench, first=95, step=0.01), rootdir, "percentile_95plus_" + bench + ".png") if gc_charts: chart_md(md_file, gc_pause_time_chart(plt, configurations, bench), rootdir, "gc_pause_times_" + bench + ".png") chart_md(md_file, gc_pause_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir, - "gc_pause_times_80plus_" + bench + ".png") + "gc_pause_times_95plus_" + bench + ".png") chart_md(md_file, gc_mark_batch_time_chart(plt, configurations, bench), rootdir, "gc_mark_batches_" + bench + ".png") + chart_md(md_file, gc_mark_batch_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir, + "gc_mark_batches_95plus_" + bench + ".png") chart_md(md_file, gc_sweep_batch_time_chart(plt, configurations, bench), rootdir, "gc_sweep_batches_" + bench + ".png") + chart_md(md_file, gc_sweep_batch_time_chart(plt, configurations, bench, first=95, step=0.01), rootdir, + "gc_sweep_batches_95plus_" + bench + ".png") if size_charts: for p in interesting_percentiles: chart_md(md_file, size_compare_chart_gc_mark(plt, parent_configurations, bench, p), rootdir, From 96c52c4bc5a3044668d1d1a954c60e2b354f6757 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 11 Feb 2019 19:42:17 +0100 Subject: [PATCH 161/169] fix gc logging for when not having events --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index 36e66a8..f0acce3 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -237,7 +237,7 @@ def parse_events(data, file, header, timeFilter=(lambda t: True)): if thread_index == -1: print "Header does not have gc_thread", header, "at", file if start_ns_index == -1 or time_ns_index == -1 or thread_index == -1: - return collection_events, phase_events_by_thread, batch_events_by_thread + return collection_events, phase_events_by_thread, batch_events_by_thread, internal_events_by_thread for line in data.readlines(): arr = line.split(",") From 9be470dcf978f06ba7d5d616ebd548078797f327 Mon Sep 17 00:00:00 2001 From: Valdis Date: Tue, 12 Feb 2019 18:30:33 +0100 Subject: [PATCH 162/169] fix warmup --- scripts/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/summary.py b/scripts/summary.py index b3a0b48..606a958 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -414,7 +414,7 @@ def percentile_gc_bench_total(configurations, bench, p): def percentile(configurations, benchmarks, warmup, p): out = [] for bench in benchmarks: - out.append(percentile_bench(configurations, warmup, bench, p)) + out.append(percentile_bench(configurations, bench, warmup, p)) return out From 198684dcbce20d0b3f893f82db84002fb85f1654 Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 14 Feb 2019 23:57:47 +0100 Subject: [PATCH 163/169] 0.4.0 --- confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt | 6 ++++++ confs/scala-native-0.4.0-SNAPSHOT-commix/compile | 1 + confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt | 1 + confs/scala-native-0.4.0-SNAPSHOT-commix/run | 1 + confs/scala-native-0.4.0-SNAPSHOT/build.sbt | 6 ++++++ confs/scala-native-0.4.0-SNAPSHOT/compile | 1 + confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt | 1 + confs/scala-native-0.4.0-SNAPSHOT/run | 1 + project/plugins.sbt | 2 +- scripts/run.py | 2 +- 10 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt create mode 100644 confs/scala-native-0.4.0-SNAPSHOT-commix/compile create mode 100644 confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt create mode 100644 confs/scala-native-0.4.0-SNAPSHOT-commix/run create mode 100644 confs/scala-native-0.4.0-SNAPSHOT/build.sbt create mode 100644 confs/scala-native-0.4.0-SNAPSHOT/compile create mode 100644 confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt create mode 100644 confs/scala-native-0.4.0-SNAPSHOT/run diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt b/confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt new file mode 100644 index 0000000..2104b7a --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "commix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/compile b/confs/scala-native-0.4.0-SNAPSHOT-commix/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt b/confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt new file mode 100644 index 0000000..2a63bf0 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT") diff --git a/confs/scala-native-0.4.0-SNAPSHOT-commix/run b/confs/scala-native-0.4.0-SNAPSHOT-commix/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT-commix/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/confs/scala-native-0.4.0-SNAPSHOT/build.sbt b/confs/scala-native-0.4.0-SNAPSHOT/build.sbt new file mode 100644 index 0000000..ae87f31 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/scala-native-0.4.0-SNAPSHOT/compile b/confs/scala-native-0.4.0-SNAPSHOT/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt b/confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt new file mode 100644 index 0000000..2a63bf0 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT") diff --git a/confs/scala-native-0.4.0-SNAPSHOT/run b/confs/scala-native-0.4.0-SNAPSHOT/run new file mode 100644 index 0000000..ae89e34 --- /dev/null +++ b/confs/scala-native-0.4.0-SNAPSHOT/run @@ -0,0 +1 @@ +target/scala-2.11/scala-native-benchmarks-out diff --git a/project/plugins.sbt b/project/plugins.sbt index c1423b6..2a63bf0 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1 +1 @@ -addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.9-SNAPSHOT") +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT") diff --git a/scripts/run.py b/scripts/run.py index 1246fbb..9985f88 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -172,7 +172,7 @@ def compile(conf, bench, compilecmd, gcstats, debug, trace, extra_args): ] stable = 'scala-native-0.3.8' -latest = 'scala-native-0.3.9-SNAPSHOT' +latest = 'scala-native-0.4.0-SNAPSHOT' baseline = [ 'jvm', stable, From 4c45a55d97e4aaf6e9be990e94882f2c2935a432 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 18 Feb 2019 11:26:23 +0100 Subject: [PATCH 164/169] compare runs in each configuration --- scripts/summary.py | 100 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 606a958..8aef89d 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -37,6 +37,23 @@ def config_data(bench, conf, warmup): return np.array(out) +def config_data_run(bench, conf, run, warmup): + out = [] + try: + points = [] + with open(os.path.join("results", conf, bench, str(run))) as data: + for line in data.readlines(): + try: + # in ms + points.append(float(line) / 1000000) + except Exception as e: + print e + out = points[warmup:] + except IOError: + pass + return np.array(out) + + def gc_pauses_main_thread(bench, conf): benchmark_dir = os.path.join("results", conf, bench) files = next(os.walk(benchmark_dir), [[], [], []])[2] @@ -600,6 +617,40 @@ def example_run_plot(plt, configurations, bench, run=3, lastn=-1): return plt +def example_all_runs_plot(plt, conf, bench, lastn=-1): + plt.clf() + plt.cla() + max_run = find_last_run(conf, bench) + + for run in np.arange(0, max_run + 1): + rawpoints = [] + try: + with open('results/{}/{}/{}'.format(conf, bench, run)) as data: + for line in data.readlines(): + try: + rawpoints.append(float(line) / 1000000) + except Exception as e: + print e + except IOError: + pass + + total_len = len(rawpoints) + if total_len == 0: + continue + if lastn != -1: + first = total_len - lastn + else: + first = 0 + ind = np.arange(first, total_len) + points = rawpoints[first:] + plt.plot(ind, points, label=run) + plt.title("{} all runs for {}".format(bench, conf)) + plt.xlabel("Iteration") + plt.ylabel("Run time (ms)") + plt.legend() + return plt + + def to_gb(size_str): if size_str[-1] == "k" or size_str[-1] == "K": return float(size_str[:-1]) / 1024 / 1024 @@ -707,6 +758,23 @@ def size_compare_chart_gc_sweep(plt, parent_configurations, bench, p): return plt +def percentiles_chart_generic_runs(plt, conf, bench, get_data, first, last, step): + plt.clf() + plt.cla() + max_run = find_last_run(conf, bench) + + for run in np.arange(0, max_run + 1): + data = get_data(bench, conf, run) + if data.size > 0: + percentiles = filter(lambda x: 0 <= x <= 100, np.arange(first, last + step, step)) + percvalue = np.array([np.percentile(data, perc) for perc in percentiles]) + plt.plot(percentiles, percvalue, label=run) + plt.legend() + plt.ylim(ymin=0) + plt.xlabel("Percentile") + return plt + + def percentiles_chart_generic(plt, configurations, bench, get_data, first, last, step): plt.clf() plt.cla() @@ -729,6 +797,15 @@ def percentiles_chart(plt, configurations, bench, warmup, first=0, last=100, ste return plt +def percentiles_chart_runs(plt, conf, bench, warmup, first=0, last=100, step=0.1): + plt = percentiles_chart_generic_runs(plt, conf, bench, + lambda bench, conf, run: config_data_run(bench, conf, run, warmup), first, + last, step) + plt.title(bench + " " + conf) + plt.ylabel("Run time (ms)") + return plt + + def gc_pause_time_chart(plt, configurations, bench, first=0, last=100, step=0.1): plt = percentiles_chart_generic(plt, configurations, bench, gc_stats_total, first, last, step) plt.title(bench + ": Garbage Collector Pause Times") @@ -1057,8 +1134,14 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench "example_run_last1000_" + str(run) + "_" + bench + ".png") chart_md(md_file, example_run_plot(plt, configurations, bench, run), rootdir, "example_run_full_" + str(run) + "_" + bench + ".png") - if gc_charts: - for conf in configurations: + for conf in configurations: + chart_md(md_file, percentiles_chart_runs(plt, conf, bench, warmup), rootdir, "percentile_" + bench + "_conf" + str(configurations.index(conf))+ ".png") + chart_md(md_file, percentiles_chart_runs(plt, conf, bench, warmup, first=95, step=0.01), rootdir, "percentile_95plus_" + bench + "_conf" + str(configurations.index(conf))+ ".png") + chart_md(md_file, example_all_runs_plot(plt, conf, bench, 1000), rootdir, + "example_allruns_last1000_conf" + str(configurations.index(conf)) + "_" + bench + ".png") + chart_md(md_file, example_all_runs_plot(plt, conf, bench), rootdir, + "example_allruns_full_conf" + str(configurations.index(conf)) + "_" + bench + ".png") + if gc_charts: gc_data = gc_events_for_last_n_collections(bench, conf, run) chart_md(md_file, gc_gantt_chart(plt, conf, bench, gc_data), @@ -1082,6 +1165,19 @@ def any_run_exists(bench, configurations, run): return exits +def find_last_run(conf, bench): + max_run = 0 + while True: + file = 'results/{}/{}/{}'.format(conf, bench, max_run) + if not os.path.exists(file): + break + max_run += 1 + max_run -= 1 + + return max_run + + + def discover_benchmarks(configurations): benchmarks = [] for conf in configurations: From f83b407569e6399db4cc78fdc73984ee6ef1cb72 Mon Sep 17 00:00:00 2001 From: Valdis Date: Mon, 18 Feb 2019 11:28:11 +0100 Subject: [PATCH 165/169] too many charts --- scripts/summary.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 8aef89d..74b3daa 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1130,17 +1130,17 @@ def write_md_file(rootdir, md_file, parent_configurations, configurations, bench run -= 1 if run >= 0: - chart_md(md_file, example_run_plot(plt, configurations, bench, run, 1000), rootdir, - "example_run_last1000_" + str(run) + "_" + bench + ".png") + # chart_md(md_file, example_run_plot(plt, configurations, bench, run, 1000), rootdir, + # "example_run_last1000_" + str(run) + "_" + bench + ".png") chart_md(md_file, example_run_plot(plt, configurations, bench, run), rootdir, "example_run_full_" + str(run) + "_" + bench + ".png") for conf in configurations: chart_md(md_file, percentiles_chart_runs(plt, conf, bench, warmup), rootdir, "percentile_" + bench + "_conf" + str(configurations.index(conf))+ ".png") chart_md(md_file, percentiles_chart_runs(plt, conf, bench, warmup, first=95, step=0.01), rootdir, "percentile_95plus_" + bench + "_conf" + str(configurations.index(conf))+ ".png") - chart_md(md_file, example_all_runs_plot(plt, conf, bench, 1000), rootdir, - "example_allruns_last1000_conf" + str(configurations.index(conf)) + "_" + bench + ".png") - chart_md(md_file, example_all_runs_plot(plt, conf, bench), rootdir, - "example_allruns_full_conf" + str(configurations.index(conf)) + "_" + bench + ".png") + # chart_md(md_file, example_all_runs_plot(plt, conf, bench, 1000), rootdir, + # "example_allruns_last1000_conf" + str(configurations.index(conf)) + "_" + bench + ".png") + # chart_md(md_file, example_all_runs_plot(plt, conf, bench), rootdir, + # "example_allruns_full_conf" + str(configurations.index(conf)) + "_" + bench + ".png") if gc_charts: gc_data = gc_events_for_last_n_collections(bench, conf, run) chart_md(md_file, From 057c3f591d9ea0e96fd988a065dd76fb2bda4c75 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sat, 23 Feb 2019 08:37:23 +0100 Subject: [PATCH 166/169] discard 20% of the worst runs --- scripts/summary.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index 74b3daa..1cd2006 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -11,7 +11,7 @@ import argparse -def config_data(bench, conf, warmup): +def config_data_goodruns(bench, conf, warmup): benchmark_dir = os.path.join("results", conf, bench) files = next(os.walk(benchmark_dir), [[], [], []])[2] runs = [] @@ -20,21 +20,28 @@ def config_data(bench, conf, warmup): # regular benchmark data runs.append(file) - out = [] + points_with_50percentile = [] for run in runs: try: - points = [] + raw_points = [] with open(os.path.join("results", conf, bench, run)) as data: for line in data.readlines(): try: # in ms - points.append(float(line) / 1000000) + raw_points.append(float(line) / 1000000) except Exception as e: print e - out += points[warmup:] + points = raw_points[warmup:] + points_with_50percentile += [(points, np.percentile(points, 50))] except IOError: pass - return np.array(out) + to_discard = int(0.2 * len(points_with_50percentile)) + if to_discard > 0: + sorted_arr = sorted(points_with_50percentile, key=lambda x: -x[1]) + out = map(lambda x: x[0], sorted_arr[to_discard:]) + else: + out = map(lambda x: x[0], points_with_50percentile) + return np.array(sum(out, [])) def config_data_run(bench, conf, run, warmup): @@ -439,7 +446,7 @@ def percentile_bench(configurations, bench, warmup, p): res = [] for conf in configurations: try: - res.append(np.percentile(config_data(bench, conf, warmup), p)) + res.append(np.percentile(config_data_goodruns(bench, conf, warmup), p)) except IndexError: res.append(0) return res @@ -456,7 +463,7 @@ def totals_bench(configurations, bench, warmup): res = [] for conf in configurations: try: - res.append(np.sum(config_data(bench, conf, warmup))) + res.append(np.sum(config_data_goodruns(bench, conf, warmup))) except IndexError: res.append(0) return res @@ -791,7 +798,7 @@ def percentiles_chart_generic(plt, configurations, bench, get_data, first, last, def percentiles_chart(plt, configurations, bench, warmup, first=0, last=100, step=0.1): - plt = percentiles_chart_generic(plt, configurations, bench, lambda bench, conf : config_data(bench, conf, warmup), first, last, step) + plt = percentiles_chart_generic(plt, configurations, bench, lambda bench, conf : config_data_goodruns(bench, conf, warmup), first, last, step) plt.title(bench) plt.ylabel("Run time (ms)") return plt From efd21aa758dfd73747d1b4de6e4fb45475e067e8 Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 24 Feb 2019 08:20:12 +0100 Subject: [PATCH 167/169] exclude list and manderbrot from running by default --- scripts/run.py | 4 ++-- scripts/summary.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 9985f88..41c8702 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -151,7 +151,6 @@ def compile(conf, bench, compilecmd, gcstats, debug, trace, extra_args): default_benchmarks = [ 'bounce.BounceBenchmark', - 'list.ListBenchmark', 'richards.RichardsBenchmark', 'queens.QueensBenchmark', 'permute.PermuteBenchmark', @@ -162,13 +161,14 @@ def compile(conf, bench, compilecmd, gcstats, debug, trace, extra_args): 'cd.CDBenchmark', 'kmeans.KmeansBenchmark', 'gcbench.GCBenchBenchmark', - 'mandelbrot.MandelbrotBenchmark', 'nbody.NbodyBenchmark', 'sudoku.SudokuBenchmark', ] all_benchmarks = default_benchmarks + [ 'histogram.Histogram', + 'list.ListBenchmark', + 'mandelbrot.MandelbrotBenchmark', ] stable = 'scala-native-0.3.8' diff --git a/scripts/summary.py b/scripts/summary.py index 1cd2006..de04c3c 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -1257,7 +1257,7 @@ def is_subconfig(subconf): for b in args.benchmark: benchmarks += filter(lambda s: s.startswith(b), all_benchmarks) else: - excluded_benchmarks = ['list.ListBenchmark'] + excluded_benchmarks = ['list.ListBenchmark', 'mandelbrot.MandelbrotBenchmark'] benchmarks = [x for x in all_benchmarks if x not in excluded_benchmarks] report_dir = "reports/summary_" + time.strftime('%Y%m%d_%H%M%S') + "_" + comment + "/" From 798ecf1a5370f3ffedd26fc29856a14356fada79 Mon Sep 17 00:00:00 2001 From: Valdis Date: Thu, 28 Feb 2019 10:27:19 +0100 Subject: [PATCH 168/169] handle new heap sizing names --- scripts/run.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/scripts/run.py b/scripts/run.py index 41c8702..bac70cf 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -265,13 +265,24 @@ def single_run(to_run): if minsize != "default": my_env["SCALANATIVE_MIN_HEAP_SIZE"] = minsize - elif "SCALANATIVE_MIN_HEAP_SIZE" in my_env: - del my_env["SCALANATIVE_MIN_HEAP_SIZE"] + # in 0.4.0 the heap settings names changed. + my_env["SCALANATIVE_MIN_SIZE"] = minsize + else: + if "SCALANATIVE_MIN_HEAP_SIZE" in my_env: + del my_env["SCALANATIVE_MIN_HEAP_SIZE"] + if "SCALANATIVE_MIN_SIZE" in my_env: + del my_env["SCALANATIVE_MIN_SIZE"] + if maxsize != "default": my_env["SCALANATIVE_MAX_HEAP_SIZE"] = maxsize - elif "SCALANATIVE_MAX_HEAP_SIZE" in my_env: - del my_env["SCALANATIVE_MAX_HEAP_SIZE"] + # in 0.4.0 the heap settings names changed. + my_env["SCALANATIVE_MAX_SIZE"] = maxsize + else: + if "SCALANATIVE_MAX_HEAP_SIZE" in my_env: + del my_env["SCALANATIVE_MAX_HEAP_SIZE"] + if "SCALANATIVE_MAX_SIZE" in my_env: + del my_env["SCALANATIVE_MAX_SIZE"] if gcThreads != "default": my_env["SCALANATIVE_GC_THREADS"] = gcThreads From 7308640bdda77d68f30bec0db8bf6df7e8e2223a Mon Sep 17 00:00:00 2001 From: Valdis Date: Sun, 3 Mar 2019 11:12:57 +0100 Subject: [PATCH 169/169] remove outliers from the non-50 percentiles --- scripts/summary.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/summary.py b/scripts/summary.py index de04c3c..cec2423 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -11,7 +11,7 @@ import argparse -def config_data_goodruns(bench, conf, warmup): +def config_data_goodruns(bench, conf, warmup, p=50): benchmark_dir = os.path.join("results", conf, bench) files = next(os.walk(benchmark_dir), [[], [], []])[2] runs = [] @@ -32,7 +32,7 @@ def config_data_goodruns(bench, conf, warmup): except Exception as e: print e points = raw_points[warmup:] - points_with_50percentile += [(points, np.percentile(points, 50))] + points_with_50percentile += [(points, np.percentile(points, p))] except IOError: pass to_discard = int(0.2 * len(points_with_50percentile)) @@ -446,7 +446,7 @@ def percentile_bench(configurations, bench, warmup, p): res = [] for conf in configurations: try: - res.append(np.percentile(config_data_goodruns(bench, conf, warmup), p)) + res.append(np.percentile(config_data_goodruns(bench, conf, warmup, p), p)) except IndexError: res.append(0) return res