From 013fc7f282c03d780b2d3c2da4917a0e7ad0a9b9 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sat, 15 Apr 2017 01:46:47 +0200 Subject: [PATCH 01/10] Fix: SR-4597 Benchmark results have wrong MEAN, MEDIAN and SD --- benchmark/scripts/Benchmark_Driver | 44 ++++++------------------------ 1 file changed, 8 insertions(+), 36 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 22cb3e87ea25c..a54a622efe540 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -81,42 +81,14 @@ def submit_to_lnt(data, url): def instrument_test(driver_path, test, num_samples): """Run a test and instrument its peak memory use""" - test_outputs = [] - for _ in range(num_samples): - test_output_raw = subprocess.check_output( - ['time', '-lp', driver_path, test], - stderr=subprocess.STDOUT - ) - peak_memory = re.match('\s*(\d+)\s*maximum resident set size', - test_output_raw.split('\n')[-15]).group(1) - test_outputs.append(test_output_raw.split()[1].split(',') + - [peak_memory]) - - # Average sample results - num_samples_index = 2 - min_index = 3 - max_index = 4 - avg_start_index = 5 - - # TODO: Correctly take stdev - avg_test_output = test_outputs[0] - avg_test_output[avg_start_index:] = map(int, - avg_test_output[avg_start_index:]) - for test_output in test_outputs[1:]: - for i in range(avg_start_index, len(test_output)): - avg_test_output[i] += int(test_output[i]) - for i in range(avg_start_index, len(avg_test_output)): - avg_test_output[i] = int(round(avg_test_output[i] / - float(len(test_outputs)))) - avg_test_output[num_samples_index] = num_samples - avg_test_output[min_index] = min( - test_outputs, key=lambda x: int(x[min_index]))[min_index] - avg_test_output[max_index] = max( - test_outputs, key=lambda x: int(x[max_index]))[max_index] - avg_test_output = map(str, avg_test_output) - - return avg_test_output - + test_output_raw = subprocess.check_output( + ['time', '-lp', driver_path, test, '--num-samples='+str(num_samples)], + stderr=subprocess.STDOUT + ) + peak_memory = re.match('\s*(\d+)\s*maximum resident set size', + test_output_raw.split('\n')[-15]).group(1) + test_output = (test_output_raw.split()[1].split(',') + [peak_memory]) + return test_output def get_tests(driver_path): """Return a list of available performance tests""" From eff45e29acc56479d7a26a1bc5e9d9c3a84ee9e3 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sat, 15 Apr 2017 05:06:13 +0200 Subject: [PATCH 02/10] Fix SR-4598 Add option to run subset of benchmarks matching a prefix --- benchmark/scripts/Benchmark_Driver | 85 +++++++++++++++++------------- 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index a54a622efe540..246815d8e47b3 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -90,9 +90,17 @@ def instrument_test(driver_path, test, num_samples): test_output = (test_output_raw.split()[1].split(',') + [peak_memory]) return test_output -def get_tests(driver_path): + +def get_tests(driver_path, args): """Return a list of available performance tests""" - return subprocess.check_output([driver_path, '--list']).split()[2:] + tests = subprocess.check_output([driver_path, '--list']).split()[2:] + if args.filter: + prefix = args.filter + tests = filter(lambda name: name.startswith(prefix), tests) + elif args.benchmark: + benchmarks = set(args.benchmark) + tests = sorted(list(set(tests).intersection(benchmarks))) + return tests def get_current_git_branch(git_repo_path): @@ -140,9 +148,7 @@ def run_benchmarks(driver, benchmarks=[], num_samples=10, verbose=False, line_format = '{:>3} {:<25} {:>7} {:>7} {:>7} {:>8} {:>6} {:>10} {:>10}' if verbose and log_directory: print(line_format.format(*headings)) - for test in get_tests(driver): - if benchmarks and test not in benchmarks: - continue + for test in benchmarks: test_output = instrument_test(driver, test, num_samples) if test_output[0] == 'Totals': continue @@ -191,7 +197,7 @@ def submit(args): file = os.path.join(args.tests, "Benchmark_" + optset) try: res = run_benchmarks( - file, benchmarks=args.benchmark, + file, benchmarks=get_tests(file, args), num_samples=args.iterations) data['Tests'].extend(parse_results(res, optset)) except subprocess.CalledProcessError as e: @@ -215,7 +221,7 @@ def run(args): optset = args.optimization file = os.path.join(args.tests, "Benchmark_" + optset) run_benchmarks( - file, benchmarks=args.benchmarks, + file, benchmarks=get_tests(file, args), num_samples=args.iterations, verbose=True, log_directory=args.output_dir, swift_repo=args.swift_repo) @@ -318,17 +324,40 @@ def positive_int(value): def main(): - parser = argparse.ArgumentParser(description='Swift benchmarks driver') - subparsers = parser.add_subparsers() + parser = argparse.ArgumentParser( + epilog='Example: ./Benchmark_Driver run -i 5 -f Array' + ) + subparsers = parser.add_subparsers( + title='Swift benchmark driver commands', + help='See COMMAND -h for additional arguments', metavar='') - submit_parser = subparsers.add_parser( - 'submit', - help='run benchmarks and submit results to LNT') - submit_parser.add_argument( + parent_parser = argparse.ArgumentParser(add_help=False) + benchmarks_group = parent_parser.add_mutually_exclusive_group() + benchmarks_group.add_argument( + 'benchmark', + default=[], + help='benchmark to run (default: all)', nargs='*', metavar="BENCHMARK") + benchmarks_group.add_argument( + '-f', '--filter', + help='run all tests whose name starts with PREFIX', metavar="PREFIX") + parent_parser.add_argument( '-t', '--tests', help='directory containing Benchmark_O{,none,unchecked} ' + '(default: DRIVER_DIR)', default=DRIVER_DIR) + + submit_parser = subparsers.add_parser( + 'submit', + help='Run benchmarks and submit results to LNT', + parents=[parent_parser]) + submit_parser.add_argument( + '-o', '--optimization', nargs='+', + help='optimization levels to use (default: O Onone Ounchecked)', + default=['O', 'Onone', 'Ounchecked']) + submit_parser.add_argument( + '-i', '--iterations', + help='number of times to run each test (default: 10)', + type=positive_int, default=10) submit_parser.add_argument( '-m', '--machine', required=True, help='LNT machine name') @@ -338,48 +367,32 @@ def main(): submit_parser.add_argument( '-l', '--lnt_host', required=True, help='LNT host to submit results to') - submit_parser.add_argument( - '-i', '--iterations', - help='number of times to run each test (default: 10)', - type=positive_int, default=10) - submit_parser.add_argument( - '-o', '--optimization', nargs='+', - help='optimization levels to use (default: O Onone Ounchecked)', - default=['O', 'Onone', 'Ounchecked']) - submit_parser.add_argument( - 'benchmark', - help='benchmark to run (default: all)', nargs='*') submit_parser.set_defaults(func=submit) run_parser = subparsers.add_parser( 'run', - help='run benchmarks and output results to stdout') + help='Run benchmarks and output results to stdout', + parents=[parent_parser]) run_parser.add_argument( - '-t', '--tests', - help='directory containing Benchmark_O{,none,unchecked} ' + - '(default: DRIVER_DIR)', - default=DRIVER_DIR) + '-o', '--optimization', + metavar='OPT', + choices=['O', 'Onone', 'Ounchecked'], + help='optimization level to use (default: O)', default='O') run_parser.add_argument( '-i', '--iterations', help='number of times to run each test (default: 1)', type=positive_int, default=1) - run_parser.add_argument( - '-o', '--optimization', - help='optimization level to use (default: O)', default='O') run_parser.add_argument( '--output-dir', help='log results to directory (default: no logging)') run_parser.add_argument( '--swift-repo', help='absolute path to Swift source repo for branch comparison') - run_parser.add_argument( - 'benchmarks', - help='benchmark to run (default: all)', nargs='*') run_parser.set_defaults(func=run) compare_parser = subparsers.add_parser( 'compare', - help='compare benchmark results') + help='Compare benchmark results') compare_parser.add_argument( '--log-dir', required=True, help='directory containing benchmark logs') From baefb6665794b1fb0638a74a6a8cd50ef61202ae Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sat, 15 Apr 2017 05:59:27 +0200 Subject: [PATCH 03/10] Fix SR-4590 compare_perf_tests.py fails when new benchmarks are added --- benchmark/scripts/Benchmark_Driver | 9 ++++--- benchmark/scripts/compare_perf_tests.py | 33 +++++++++++-------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 246815d8e47b3..3ffdb15749f84 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -82,7 +82,8 @@ def submit_to_lnt(data, url): def instrument_test(driver_path, test, num_samples): """Run a test and instrument its peak memory use""" test_output_raw = subprocess.check_output( - ['time', '-lp', driver_path, test, '--num-samples='+str(num_samples)], + ['time', '-lp', driver_path, test, '--num-samples=' + + str(num_samples)], stderr=subprocess.STDOUT ) peak_memory = re.match('\s*(\d+)\s*maximum resident set size', @@ -351,9 +352,9 @@ def main(): help='Run benchmarks and submit results to LNT', parents=[parent_parser]) submit_parser.add_argument( - '-o', '--optimization', nargs='+', - help='optimization levels to use (default: O Onone Ounchecked)', - default=['O', 'Onone', 'Ounchecked']) + '-o', '--optimization', nargs='+', + help='optimization levels to use (default: O Onone Ounchecked)', + default=['O', 'Onone', 'Ounchecked']) submit_parser.add_argument( '-i', '--iterations', help='number of times to run each test (default: 10)', diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 755d05498c58c..8203e166a7016 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -128,29 +128,24 @@ def main(): RATIO_MAX = 1 + float(args.delta_threshold) for row in old_data: - if (len(row) > 7 and row[MIN].isdigit()): - if row[TESTNAME] in old_results: - if old_results[row[TESTNAME]] > int(row[MIN]): - old_results[row[TESTNAME]] = int(row[MIN]) - if old_max_results[row[TESTNAME]] < int(row[MAX]): - old_max_results[row[TESTNAME]] = int(row[MAX]) - else: - old_results[row[TESTNAME]] = int(row[MIN]) - old_max_results[row[TESTNAME]] = int(row[MAX]) + if (len(row) > 8): # skip Totals row + old_results[row[TESTNAME]] = int(row[MIN]) + old_max_results[row[TESTNAME]] = int(row[MAX]) for row in new_data: - if (len(row) > 7 and row[MIN].isdigit()): - if row[TESTNAME] in new_results: - if int(new_results[row[TESTNAME]]) > int(row[MIN]): - new_results[row[TESTNAME]] = int(row[MIN]) - if new_max_results[row[TESTNAME]] < int(row[MAX]): - new_max_results[row[TESTNAME]] = int(row[MAX]) - else: - new_results[row[TESTNAME]] = int(row[MIN]) - new_max_results[row[TESTNAME]] = int(row[MAX]) + if (len(row) > 8): # skip Totals row + new_results[row[TESTNAME]] = int(row[MIN]) + new_max_results[row[TESTNAME]] = int(row[MAX]) ratio_total = 0 - for key in new_results.keys(): + + new_tests = set(new_results.keys()) + old_tests = set(old_results.keys()) + # added_tests = new_tests.difference(old_tests) + # removed_tests = old_tests.difference(new_tests) + comparable_tests = new_tests.intersection(old_tests) + + for key in comparable_tests: ratio = (old_results[key] + 0.001) / (new_results[key] + 0.001) ratio_list[key] = round(ratio, 2) ratio_total *= ratio From 72efbd677ebfc2e56e191537eb300851bec872a9 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Tue, 18 Apr 2017 00:28:27 +0200 Subject: [PATCH 04/10] Addressed python style issue raised during review --- benchmark/scripts/Benchmark_Driver | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 3ffdb15749f84..1bad9be7b3805 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -82,8 +82,8 @@ def submit_to_lnt(data, url): def instrument_test(driver_path, test, num_samples): """Run a test and instrument its peak memory use""" test_output_raw = subprocess.check_output( - ['time', '-lp', driver_path, test, '--num-samples=' + - str(num_samples)], + ['time', '-lp', driver_path, test, + '--num-samples={}'.format(num_samples)], stderr=subprocess.STDOUT ) peak_memory = re.match('\s*(\d+)\s*maximum resident set size', @@ -96,12 +96,10 @@ def get_tests(driver_path, args): """Return a list of available performance tests""" tests = subprocess.check_output([driver_path, '--list']).split()[2:] if args.filter: - prefix = args.filter - tests = filter(lambda name: name.startswith(prefix), tests) - elif args.benchmark: - benchmarks = set(args.benchmark) - tests = sorted(list(set(tests).intersection(benchmarks))) - return tests + return filter(lambda name: name.startswith(args.filter), tests) + if not args.benchmark: + return tests + return sorted(list(set(tests).intersection(set(args.benchmark)))) def get_current_git_branch(git_repo_path): From 324a7408e7797059d21496bcea9c47b515be4f52 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Tue, 18 Apr 2017 05:34:12 +0200 Subject: [PATCH 05/10] Refactored Markdown formatting --- benchmark/scripts/compare_perf_tests.py | 289 ++++++++++-------------- 1 file changed, 122 insertions(+), 167 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 8203e166a7016..71527511aea5d 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -19,79 +19,8 @@ import csv import sys -TESTNAME = 1 -SAMPLES = 2 -MIN = 3 -MAX = 4 -MEAN = 5 -SD = 6 -MEDIAN = 7 - -HTML = """ - - - - - - -{0} - -""" - -HTML_TABLE = """ - - - - - - - - - {5} -
{0}{1}{2}{3}{4}
-""" - -HTML_ROW = """ - - {0} - {1} - {2} - {3} - {5} - -""" - -MARKDOWN_ROW = "{0} | {1} | {2} | {3} | {4} \n" -HEADER_SPLIT = "---" -MARKDOWN_DETAIL = """ -
- {0} ({1}) - {2} -
-""" - -PAIN_DETAIL = """ -{0}: {1}""" - -RATIO_MIN = None -RATIO_MAX = None - def main(): - global RATIO_MIN - global RATIO_MAX - - old_results = {} - new_results = {} - old_max_results = {} - new_max_results = {} - ratio_list = {} - delta_list = {} - unknown_list = {} - complete_perf_list = [] - increased_perf_list = [] - decreased_perf_list = [] - normal_perf_list = [] parser = argparse.ArgumentParser(description="Compare Performance tests.") parser.add_argument('--old-file', @@ -101,7 +30,8 @@ def main(): help='New performance test suite (csv file)', required=True) parser.add_argument('--format', - help='Supported format git, html and markdown', + choices=['markdown', 'git', 'html'], + help='Output format. Default is markdown.', default="markdown") parser.add_argument('--output', help='Output file name') parser.add_argument('--changes-only', @@ -111,33 +41,20 @@ def main(): parser.add_argument('--old-branch', help='Name of the old branch', default="OLD_MIN") parser.add_argument('--delta-threshold', - help='delta threshold', default="0.05") + help='Delta threshold. Default 0.05.', default="0.05") args = parser.parse_args() - old_file = args.old_file - new_file = args.new_file - new_branch = args.new_branch old_branch = args.old_branch - old_data = csv.reader(open(old_file)) - new_data = csv.reader(open(new_file)) - + global RATIO_MIN + global RATIO_MAX RATIO_MIN = 1 - float(args.delta_threshold) RATIO_MAX = 1 + float(args.delta_threshold) - for row in old_data: - if (len(row) > 8): # skip Totals row - old_results[row[TESTNAME]] = int(row[MIN]) - old_max_results[row[TESTNAME]] = int(row[MAX]) - - for row in new_data: - if (len(row) > 8): # skip Totals row - new_results[row[TESTNAME]] = int(row[MIN]) - new_max_results[row[TESTNAME]] = int(row[MAX]) - - ratio_total = 0 + (old_results, old_max_results) = load_tests_CSV(args.old_file) + (new_results, new_max_results) = load_tests_CSV(args.new_file) new_tests = set(new_results.keys()) old_tests = set(old_results.keys()) @@ -145,10 +62,13 @@ def main(): # removed_tests = old_tests.difference(new_tests) comparable_tests = new_tests.intersection(old_tests) + ratio_list = {} + delta_list = {} + unknown_list = {} + for key in comparable_tests: ratio = (old_results[key] + 0.001) / (new_results[key] + 0.001) ratio_list[key] = round(ratio, 2) - ratio_total *= ratio delta = (((float(new_results[key] + 0.001) / (old_results[key] + 0.001)) - 1) * 100) delta_list[key] = round(delta, 2) @@ -168,69 +88,55 @@ def main(): """ Create markdown formatted table """ - test_name_width = max_width(ratio_list, title='TEST', key_len=True) - new_time_width = max_width(new_results, title=new_branch) - old_time_width = max_width(old_results, title=old_branch) - delta_width = max_width(delta_list, title='DELTA') - - markdown_table_header = "\n" + MARKDOWN_ROW.format( - "TEST".ljust(test_name_width), - old_branch.ljust(old_time_width), - new_branch.ljust(new_time_width), - "DELTA".ljust(delta_width), - "SPEEDUP".ljust(2)) - markdown_table_header += MARKDOWN_ROW.format( - HEADER_SPLIT.ljust(test_name_width), - HEADER_SPLIT.ljust(old_time_width), - HEADER_SPLIT.ljust(new_time_width), - HEADER_SPLIT.ljust(delta_width), - HEADER_SPLIT.ljust(2)) - markdown_regression = "" - for i, key in enumerate(decreased_perf_list): - ratio = "{0:.2f}x".format(ratio_list[key]) - if i == 0: - markdown_regression = markdown_table_header - markdown_regression += MARKDOWN_ROW.format( - key.ljust(test_name_width), - str(old_results[key]).ljust(old_time_width), - str(new_results[key]).ljust(new_time_width), - ("{0:+.1f}%".format(delta_list[key])).ljust(delta_width), - "**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key])) - - markdown_improvement = "" - for i, key in enumerate(increased_perf_list): - ratio = "{0:.2f}x".format(ratio_list[key]) - if i == 0: - markdown_improvement = markdown_table_header - markdown_improvement += MARKDOWN_ROW.format( - key.ljust(test_name_width), - str(old_results[key]).ljust(old_time_width), - str(new_results[key]).ljust(new_time_width), - ("{0:+.1f}%".format(delta_list[key])).ljust(delta_width), - "**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key])) - - markdown_normal = "" - for i, key in enumerate(normal_perf_list): - ratio = "{0:.2f}x".format(ratio_list[key]) - if i == 0: - markdown_normal = markdown_table_header - markdown_normal += MARKDOWN_ROW.format( - key.ljust(test_name_width), - str(old_results[key]).ljust(old_time_width), - str(new_results[key]).ljust(new_time_width), - ("{0:+.1f}%".format(delta_list[key])).ljust(delta_width), - "{0}{1}".format(str(ratio).ljust(2), unknown_list[key])) - - markdown_data = MARKDOWN_DETAIL.format("Regression", - len(decreased_perf_list), - markdown_regression, "open") - markdown_data += MARKDOWN_DETAIL.format("Improvement", - len(increased_perf_list), - markdown_improvement, "") + + def max_width(items, title, key_len=False): + def length(key): + return len(str(key)) if key_len else len(str(items[key])) + return max(len(title), max(map(length, items.keys()))) + + widths = ( # column widths + max_width(ratio_list, 'TEST', key_len=True), + max_width(new_results, str(new_branch)), + max_width(old_results, str(old_branch)), + max_width(delta_list, 'DELTA (%)'), + 2 + ) + + def justify_columns(contents): + return tuple(map(lambda (w, c): c.ljust(w), zip(widths, contents))) + + def add_row(contents): + return MARKDOWN_ROW.format(* justify_columns(contents)) + + header = ("TEST", old_branch, new_branch, "DELTA", "SPEEDUP") + markdown_table_header = "\n" + add_row(header) + markdown_table_header += add_row(tuple([HEADER_SPLIT] * len(header))) + + def markdown_table(perf_list, strong): + markdown_table = markdown_table_header + for key in perf_list: + ratio = "{0:.2f}x".format(ratio_list[key]) + markdown_table += add_row( + ( + key, str(old_results[key]), str(new_results[key]), + "{0:+.1f}%".format(delta_list[key]), + ("**{0}{1}**" if strong else "{0}{1}") + .format(str(ratio), unknown_list[key]) + ) + ) + return markdown_table + + markdown_regression = markdown_table(decreased_perf_list, True) + markdown_improvement = markdown_table(increased_perf_list, True) + markdown_normal = markdown_table(normal_perf_list, False) + + markdown_data = MARKDOWN_DETAIL.format( + "Regression", len(decreased_perf_list), markdown_regression, "open") + markdown_data += MARKDOWN_DETAIL.format( + "Improvement", len(increased_perf_list), markdown_improvement, "") if not args.changes_only: - markdown_data += MARKDOWN_DETAIL.format("No Changes", - len(normal_perf_list), - markdown_normal, "") + markdown_data += MARKDOWN_DETAIL.format( + "No Changes", len(normal_perf_list), markdown_normal, "") if args.format: if args.format.lower() != "markdown": @@ -266,6 +172,24 @@ def main(): sys.exit(1) +def load_tests_CSV(filename): + TESTNAME = 1 + # SAMPLES = 2 + MIN = 3 + MAX = 4 + # MEAN = 5 + # SD = 6 + # MEDIAN = 7 + + results = {} + max_results = {} + for row in csv.reader(open(filename)): + if (len(row) > 8): # skip Totals row + results[row[TESTNAME]] = int(row[MIN]) + max_results[row[TESTNAME]] = int(row[MAX]) + return (results, max_results) + + def convert_to_html(ratio_list, old_results, new_results, delta_list, unknown_list, old_branch, new_branch, changes_only): (complete_perf_list, @@ -347,20 +271,51 @@ def sort_ratio_list(ratio_list, changes_only=False): decreased_perf_list, sorted_normal_perf_list) -def max_width(items, title, key_len=False): - """ - Returns the max length of string in the list - """ - width = len(str(title)) - for key in items.keys(): - if key_len: - if width < len(str(key)): - width = len(str(key)) - else: - if width < len(str(items[key])): - width = len(str(items[key])) - return width +HTML = """ + + + + + + +{0} + +""" + +HTML_TABLE = """ + + + + + + + + + {5} +
{0}{1}{2}{3}{4}
+""" + +HTML_ROW = """ + + {0} + {1} + {2} + {3} + {5} + +""" +MARKDOWN_ROW = "{0} | {1} | {2} | {3} | {4} \n" +HEADER_SPLIT = "---" +MARKDOWN_DETAIL = """ +
+ {0} ({1}) + {2} +
+""" + +PAIN_DETAIL = """ +{0}: {1}""" if __name__ == "__main__": - sys.exit(main()) + sys.exit(main()) From 458c64f5c43ef179b4e46b5f3ada88bfefd338af Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 19 Apr 2017 22:57:09 +0200 Subject: [PATCH 06/10] Refactoring: Condensed HTML formatting, extracted Markdown formatting and list computations --- benchmark/scripts/compare_perf_tests.py | 353 ++++++++++++++---------- 1 file changed, 208 insertions(+), 145 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 71527511aea5d..0e7764ad4cef0 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -20,6 +20,110 @@ import sys +class PerformanceTestResult: + def __init__(self, csv_row): + # csv_row[0] is just an ordinal number of the test - skip that + self.name = csv_row[1] # Name of the performance test + self.samples = int(csv_row[2]) # Number of measurement samples taken + self.min = int(csv_row[3]) # Minimum runtime (ms) + self.max = int(csv_row[4]) # Maximum runtime (ms) + self.mean = int(csv_row[5]) # Mean (average) runtime (ms) + self.sd = int(csv_row[6]) # Standard Deviation (ms) + self.median = int(csv_row[7]) # Median runtime (ms) + + +class ResultComparison: + def __init__(self, old, new): + self.old = old + self.new = new + + # Speedup ratio + self.ratio = round((old.min + 0.001) / (new.min + 0.001), 2) + + # Test runtime improvement in % + ratio = (new.min + 0.001) / (old.min + 0.001) + self.delta = round(((ratio - 1) * 100), 2) + + self.is_dubious = ( + "(?)" if ((old.min < new.min and new.min < old.max) or + (new.min < old.min and old.min < new.max)) + else "") + + +class TestComparator: + def __init__(self, old_file, new_file, delta_threshold, changes_only): + self.delta_threshold = float(delta_threshold) # TODO remove + self.changes_only = changes_only + + def load_from_CSV(filename): + def skip_totals(row): + return len(row) > 8 + tests = map(PerformanceTestResult, + filter(skip_totals, csv.reader(open(filename)))) + names = map(lambda t: t.name, tests) + return dict(zip(names, tests)) + + self.old_results = load_from_CSV(old_file) + self.new_results = load_from_CSV(new_file) + self.old_tests = set(self.old_results.keys()) + self.new_tests = set(self.new_results.keys()) + self.added_tests = self.new_tests.difference(self.old_tests) + self.removed_tests = self.old_tests.difference(self.new_tests) + self.comparable_tests = self.new_tests.intersection(self.old_tests) + + def extract(property, list): + return lambda name: (name, getattr(list[name], property)) + + old, new = [sorted(list(s)) for s in [self.old_tests, self.new_tests]] + self.old_min_results = dict(map(extract("min", self.old_results), old)) + self.old_max_results = dict(map(extract("max", self.old_results), old)) + self.new_min_results = dict(map(extract("min", self.new_results), new)) + self.new_max_results = dict(map(extract("max", self.new_results), new)) + + def compare(property): + return lambda name: (name, getattr(self.compare(name), property)) + + self.ratio_list = dict(map(compare("ratio"), self.comparable_tests)) + self.delta_list = dict(map(compare("delta"), self.comparable_tests)) + self.unknown_list = dict(map(compare("is_dubious"), + self.comparable_tests)) + + def has_decreased((_, speedup_ratio)): + return speedup_ratio < (1 - delta_threshold) + + def has_increased((_, speedup_ratio)): + return speedup_ratio > (1 + delta_threshold) + + def partition(l, p): + return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], [])) + + tests_by_speedup = sorted(self.ratio_list.items(), key=lambda x: x[1]) + decreased, not_decreased = partition(tests_by_speedup, has_decreased) + increased, unchanged = partition(not_decreased, has_increased) + + def get_name((name, _)): + return name + + self.decreased_perf_list = map(get_name, decreased) + self.increased_perf_list = map(get_name, increased) + + # following double sorting is required to replicate same sort order as + # orignal script, for purposes of validation via diff, while refactoring + unchanged = dict(unchanged) + unchanged = sorted(unchanged.items(), key=lambda x: x[1], reverse=True) + + self.normal_perf_list = map(get_name, unchanged) + + changes = self.decreased_perf_list + self.increased_perf_list + + self.complete_perf_list = (changes if self.changes_only else + changes + self.normal_perf_list) + + + def compare(self, name): + return ResultComparison(self.old_results[name], self.new_results[name]) + + def main(): parser = argparse.ArgumentParser(description="Compare Performance tests.") @@ -53,41 +157,85 @@ def main(): RATIO_MIN = 1 - float(args.delta_threshold) RATIO_MAX = 1 + float(args.delta_threshold) - (old_results, old_max_results) = load_tests_CSV(args.old_file) - (new_results, new_max_results) = load_tests_CSV(args.new_file) - - new_tests = set(new_results.keys()) - old_tests = set(old_results.keys()) - # added_tests = new_tests.difference(old_tests) - # removed_tests = old_tests.difference(new_tests) - comparable_tests = new_tests.intersection(old_tests) - - ratio_list = {} - delta_list = {} - unknown_list = {} - - for key in comparable_tests: - ratio = (old_results[key] + 0.001) / (new_results[key] + 0.001) - ratio_list[key] = round(ratio, 2) - delta = (((float(new_results[key] + 0.001) / - (old_results[key] + 0.001)) - 1) * 100) - delta_list[key] = round(delta, 2) - if ((old_results[key] < new_results[key] and - new_results[key] < old_max_results[key]) or - (new_results[key] < old_results[key] and - old_results[key] < new_max_results[key])): - unknown_list[key] = "(?)" + comparator = TestComparator(args.old_file, args.new_file, + float(args.delta_threshold), args.changes_only) + + old_results = comparator.old_min_results + old_max_results = comparator.old_max_results + new_results = comparator.new_min_results + new_max_results = comparator.new_max_results + + ratio_list = comparator.ratio_list + delta_list = comparator.delta_list + unknown_list = comparator.unknown_list + + decreased_perf_list = comparator.decreased_perf_list + increased_perf_list = comparator.increased_perf_list + normal_perf_list = comparator.normal_perf_list + complete_perf_list = comparator.complete_perf_list + + (markdown_regression, + markdown_improvement, + markdown_normal) = convert_to_markdown(comparator, + old_branch, new_branch, + args.changes_only) + + markdown_data = MARKDOWN_DETAIL.format( + "Regression", len(decreased_perf_list), markdown_regression, "open") + markdown_data += MARKDOWN_DETAIL.format( + "Improvement", len(increased_perf_list), markdown_improvement, "") + if not args.changes_only: + markdown_data += MARKDOWN_DETAIL.format( + "No Changes", len(normal_perf_list), markdown_normal, "") + + if args.format: + if args.format.lower() != "markdown": + pain_data = PAIN_DETAIL.format("Regression", markdown_regression) + pain_data += PAIN_DETAIL.format("Improvement", + markdown_improvement) + if not args.changes_only: + pain_data += PAIN_DETAIL.format("No Changes", markdown_normal) + + print(pain_data.replace("|", " ").replace("-", " ")) + else: + print(markdown_data) + + if args.format: + if args.format.lower() == "html": + """ + Create HTML formatted table + """ + html_data = convert_to_html(comparator, old_branch, new_branch, + args.changes_only) + + if args.output: + write_to_file(args.output, html_data) else: - unknown_list[key] = "" + print("Error: missing --output flag.") + sys.exit(1) + elif args.format.lower() == "markdown": + if args.output: + write_to_file(args.output, markdown_data) + elif args.format.lower() != "git": + print("{0} is unknown format.".format(args.format)) + sys.exit(1) - (complete_perf_list, - increased_perf_list, - decreased_perf_list, - normal_perf_list) = sort_ratio_list(ratio_list, args.changes_only) +def convert_to_markdown(comparator, old_branch, new_branch, changes_only): """ Create markdown formatted table """ + old_results = comparator.old_min_results + new_results = comparator.new_min_results + + ratio_list = comparator.ratio_list + delta_list = comparator.delta_list + unknown_list = comparator.unknown_list + + decreased_perf_list = comparator.decreased_perf_list + increased_perf_list = comparator.increased_perf_list + normal_perf_list = comparator.normal_perf_list + complete_perf_list = comparator.complete_perf_list def max_width(items, title, key_len=False): def length(key): @@ -130,100 +278,46 @@ def markdown_table(perf_list, strong): markdown_improvement = markdown_table(increased_perf_list, True) markdown_normal = markdown_table(normal_perf_list, False) - markdown_data = MARKDOWN_DETAIL.format( - "Regression", len(decreased_perf_list), markdown_regression, "open") - markdown_data += MARKDOWN_DETAIL.format( - "Improvement", len(increased_perf_list), markdown_improvement, "") - if not args.changes_only: - markdown_data += MARKDOWN_DETAIL.format( - "No Changes", len(normal_perf_list), markdown_normal, "") + return (markdown_regression, markdown_improvement, markdown_normal) - if args.format: - if args.format.lower() != "markdown": - pain_data = PAIN_DETAIL.format("Regression", markdown_regression) - pain_data += PAIN_DETAIL.format("Improvement", - markdown_improvement) - if not args.changes_only: - pain_data += PAIN_DETAIL.format("No Changes", markdown_normal) - print(pain_data.replace("|", " ").replace("-", " ")) - else: - print(markdown_data) - - if args.format: - if args.format.lower() == "html": - """ - Create HTML formatted table - """ - html_data = convert_to_html(ratio_list, old_results, new_results, - delta_list, unknown_list, old_branch, - new_branch, args.changes_only) - - if args.output: - write_to_file(args.output, html_data) - else: - print("Error: missing --output flag.") - sys.exit(1) - elif args.format.lower() == "markdown": - if args.output: - write_to_file(args.output, markdown_data) - elif args.format.lower() != "git": - print("{0} is unknown format.".format(args.format)) - sys.exit(1) +def convert_to_html(comparator, old_branch, new_branch, changes_only): + old_results = comparator.old_min_results + new_results = comparator.new_min_results + ratio_list = comparator.ratio_list + delta_list = comparator.delta_list + unknown_list = comparator.unknown_list -def load_tests_CSV(filename): - TESTNAME = 1 - # SAMPLES = 2 - MIN = 3 - MAX = 4 - # MEAN = 5 - # SD = 6 - # MEDIAN = 7 + decreased_perf_list = comparator.decreased_perf_list + increased_perf_list = comparator.increased_perf_list + normal_perf_list = comparator.normal_perf_list + complete_perf_list = comparator.complete_perf_list - results = {} - max_results = {} - for row in csv.reader(open(filename)): - if (len(row) > 8): # skip Totals row - results[row[TESTNAME]] = int(row[MIN]) - max_results[row[TESTNAME]] = int(row[MAX]) - return (results, max_results) + def add_row(name, old, new, delta, speedup, speedup_color): + return HTML_ROW.format(name, old, new, delta, speedup_color, speedup) + def separator_header(title): + return add_row("{0}:".format(title), + "", "", "", "", "black") -def convert_to_html(ratio_list, old_results, new_results, delta_list, - unknown_list, old_branch, new_branch, changes_only): - (complete_perf_list, - increased_perf_list, - decreased_perf_list, - normal_perf_list) = sort_ratio_list(ratio_list, changes_only) + def results_table(title, list, speedup_color): + html_rows = separator_header(title) + for key in list: + html_rows += add_row(key, old_results[key], new_results[key], + "{0:+.1f}%".format(delta_list[key]), + "{0:.2f}x {1}".format(ratio_list[key], + unknown_list[key]), + speedup_color) + return html_rows html_rows = "" - for key in complete_perf_list: - if ratio_list[key] < RATIO_MIN: - color = "red" - elif ratio_list[key] > RATIO_MAX: - color = "green" - else: - color = "black" - if len(decreased_perf_list) > 0 and key == decreased_perf_list[0]: - html_rows += HTML_ROW.format( - "Regression:", - "", "", "", "black", "", "") - if len(increased_perf_list) > 0 and key == increased_perf_list[0]: - html_rows += HTML_ROW.format( - "Improvement:", - "", "", "", "black", "", "") - if len(normal_perf_list) > 0 and key == normal_perf_list[0]: - html_rows += HTML_ROW.format( - "No Changes:", - "", "", "", "black", "", "") - - html_rows += HTML_ROW.format(key, old_results[key], - new_results[key], - "{0:+.1f}%".format(delta_list[key]), - color, - "{0:.2f}x {1}".format(ratio_list[key], - unknown_list[key])) + if len(decreased_perf_list) > 0: + html_rows += results_table('Regression', decreased_perf_list, 'red') + if len(increased_perf_list) > 0: + html_rows += results_table('Improvement', increased_perf_list, 'green') + if len(normal_perf_list) > 0: + html_rows += results_table('No Changes', normal_perf_list, 'black') html_table = HTML_TABLE.format("TEST", old_branch, new_branch, "DELTA", "SPEEDUP", html_rows) @@ -240,37 +334,6 @@ def write_to_file(file_name, data): file.close -def sort_ratio_list(ratio_list, changes_only=False): - """ - Return 3 sorted list improvement, regression and normal. - """ - decreased_perf_list = [] - increased_perf_list = [] - sorted_normal_perf_list = [] - normal_perf_list = {} - - for key, v in sorted(ratio_list.items(), key=lambda x: x[1]): - if ratio_list[key] < RATIO_MIN: - decreased_perf_list.append(key) - elif ratio_list[key] > RATIO_MAX: - increased_perf_list.append(key) - else: - normal_perf_list[key] = v - - for key, v in sorted(normal_perf_list.items(), key=lambda x: x[1], - reverse=True): - sorted_normal_perf_list.append(key) - - if changes_only: - complete_perf_list = decreased_perf_list + increased_perf_list - else: - complete_perf_list = (decreased_perf_list + increased_perf_list + - sorted_normal_perf_list) - - return (complete_perf_list, increased_perf_list, - decreased_perf_list, sorted_normal_perf_list) - - HTML = """ From 0d90b97d9cc47d6ca3fa2165b43d77a30912c8da Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Fri, 21 Apr 2017 20:23:39 +0200 Subject: [PATCH 07/10] Fix SR-4659 Benchmark logs should be tied to tested tree version --- benchmark/scripts/Benchmark_Driver | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 1bad9be7b3805..a3d43fd1c12e1 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -108,6 +108,12 @@ def get_current_git_branch(git_repo_path): ['git', '-C', git_repo_path, 'rev-parse', '--abbrev-ref', 'HEAD'], stderr=subprocess.STDOUT).strip() +def get_git_head_ID(git_repo_path): + """Return the short identifier for the HEAD commit of the repo + `git_repo_path`""" + return subprocess.check_output( + ['git', '-C', git_repo_path, 'rev-parse', + '--short', 'HEAD'], stderr=subprocess.STDOUT).strip() def log_results(log_directory, driver, formatted_output, swift_repo=None): """Log `formatted_output` to a branch specific directory in @@ -117,6 +123,10 @@ def log_results(log_directory, driver, formatted_output, swift_repo=None): branch = get_current_git_branch(swift_repo) except (OSError, subprocess.CalledProcessError): branch = None + try: + head_ID = '-' + get_git_head_ID(swift_repo) + except (OSError, subprocess.CalledProcessError): + head_ID = '' timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime()) if branch: output_directory = os.path.join(log_directory, branch) @@ -128,7 +138,7 @@ def log_results(log_directory, driver, formatted_output, swift_repo=None): except OSError: pass log_file = os.path.join(output_directory, - driver_name + '-' + timestamp + '.log') + driver_name + '-' + timestamp + head_ID + '.log') print('Logging results to: %s' % log_file) with open(log_file, 'w') as f: f.write(formatted_output) From 09dd0ed13ca2933ec6bdb7c482c5d873c6e2be1e Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Fri, 21 Apr 2017 21:37:21 +0200 Subject: [PATCH 08/10] Benchmark_Driver - Report totals for all columns --- benchmark/scripts/Benchmark_Driver | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index a3d43fd1c12e1..96518b96e4ff4 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -157,6 +157,7 @@ def run_benchmarks(driver, benchmarks=[], num_samples=10, verbose=False, line_format = '{:>3} {:<25} {:>7} {:>7} {:>7} {:>8} {:>6} {:>10} {:>10}' if verbose and log_directory: print(line_format.format(*headings)) + totals = tuple([0] * 7) for test in benchmarks: test_output = instrument_test(driver, test, num_samples) if test_output[0] == 'Totals': @@ -167,16 +168,12 @@ def run_benchmarks(driver, benchmarks=[], num_samples=10, verbose=False, else: print(','.join(test_output)) output.append(test_output) - (samples, _min, _max, mean) = map(int, test_output[2:6]) + totals = map(sum, zip(totals, map(int, test_output[3:]))) total_tests += 1 - total_min += _min - total_max += _max - total_mean += mean if not output: return formatted_output = '\n'.join([','.join(l) for l in output]) - totals = map(str, ['Totals', total_tests, total_min, total_max, - total_mean, '0', '0', '0']) + totals = map(str, ['Totals', total_tests] + totals) totals_output = '\n\n' + ','.join(totals) if verbose: if log_directory: From 3c59bdf4da824a587e59846f4fc565b01156bf05 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sun, 23 Apr 2017 04:00:33 +0200 Subject: [PATCH 09/10] Refactoring compare_perf_test.py complete Created ReportFormatter. The script still produces same output as original legacy version. --- benchmark/scripts/compare_perf_tests.py | 447 ++++++++++-------------- 1 file changed, 194 insertions(+), 253 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 0e7764ad4cef0..15b11ae79ed2d 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -36,24 +36,30 @@ class ResultComparison: def __init__(self, old, new): self.old = old self.new = new + self.name = old.name # Test name, convenience accessor - # Speedup ratio + # Speedup ratio FIXME no need to round here - we do that in values() self.ratio = round((old.min + 0.001) / (new.min + 0.001), 2) # Test runtime improvement in % ratio = (new.min + 0.001) / (old.min + 0.001) - self.delta = round(((ratio - 1) * 100), 2) + self.delta = round(((ratio - 1) * 100), 2) # FIXME no need to round - self.is_dubious = ( + self.is_dubious = ( # FIXME this is legacy "(?)" if ((old.min < new.min and new.min < old.max) or (new.min < old.min and old.min < new.max)) else "") + # Tuple of values formatted for display in results table: + # (name, old value, new value, delta [%], speedup ratio) + def values(self): + return (self.name, str(self.old.min), str(self.new.min), + "{0:+.1f}%".format(self.delta), + "{0:.2f}x {1}".format(self.ratio, self.is_dubious)) + class TestComparator: def __init__(self, old_file, new_file, delta_threshold, changes_only): - self.delta_threshold = float(delta_threshold) # TODO remove - self.changes_only = changes_only def load_from_CSV(filename): def skip_totals(row): @@ -63,65 +69,197 @@ def skip_totals(row): names = map(lambda t: t.name, tests) return dict(zip(names, tests)) - self.old_results = load_from_CSV(old_file) - self.new_results = load_from_CSV(new_file) - self.old_tests = set(self.old_results.keys()) - self.new_tests = set(self.new_results.keys()) - self.added_tests = self.new_tests.difference(self.old_tests) - self.removed_tests = self.old_tests.difference(self.new_tests) - self.comparable_tests = self.new_tests.intersection(self.old_tests) - - def extract(property, list): - return lambda name: (name, getattr(list[name], property)) - - old, new = [sorted(list(s)) for s in [self.old_tests, self.new_tests]] - self.old_min_results = dict(map(extract("min", self.old_results), old)) - self.old_max_results = dict(map(extract("max", self.old_results), old)) - self.new_min_results = dict(map(extract("min", self.new_results), new)) - self.new_max_results = dict(map(extract("max", self.new_results), new)) - - def compare(property): - return lambda name: (name, getattr(self.compare(name), property)) - - self.ratio_list = dict(map(compare("ratio"), self.comparable_tests)) - self.delta_list = dict(map(compare("delta"), self.comparable_tests)) - self.unknown_list = dict(map(compare("is_dubious"), - self.comparable_tests)) + old_results = load_from_CSV(old_file) + new_results = load_from_CSV(new_file) + old_tests = set(old_results.keys()) + new_tests = set(new_results.keys()) + # added_tests = new_tests.difference(old_tests) + # removed_tests = old_tests.difference(new_tests) + comparable_tests = new_tests.intersection(old_tests) - def has_decreased((_, speedup_ratio)): - return speedup_ratio < (1 - delta_threshold) + def compare(name): + return ResultComparison(old_results[name], new_results[name]) - def has_increased((_, speedup_ratio)): - return speedup_ratio > (1 + delta_threshold) + comparisons = map(compare, comparable_tests) def partition(l, p): return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], [])) - tests_by_speedup = sorted(self.ratio_list.items(), key=lambda x: x[1]) - decreased, not_decreased = partition(tests_by_speedup, has_decreased) - increased, unchanged = partition(not_decreased, has_increased) + # # When done refactoring, and about to change sort order, use this: + # # TODO use standard deviation - SD + # decreased, not_decreased = partition( + # comparisons, lambda c: c.ratio < (1 - delta_threshold)) + # increased, unchanged = partition( + # not_decreased, lambda c: c.ratio > (1 + delta_threshold)) + # # TODO sort decreased ascending, increased descending, + # # unchanged alphabetically + + # To enable validation using diff during refactoring, this replicates + # the "semi-stable" sort that due to rounding of ratio to 2 decimals + # heavily depends on the internal implementation of dictionary. (!!!) + ratio_map = dict(map(lambda c: (c.name, c.ratio), comparisons)) + tests_by_speedup = sorted(ratio_map.items(), key=lambda x: x[1]) + + decreased, not_decreased = partition( + tests_by_speedup, lambda x: x[1] < (1 - delta_threshold)) + increased, unchanged = partition( + not_decreased, lambda x: x[1] > (1 + delta_threshold)) + # extra mangling for unchanged... + u = dict(unchanged) + unchanged = sorted(u.items(), key=lambda x: x[1], reverse=True) + + # sorted partitions + comp_map = dict(zip(map(lambda c: c.name, comparisons), comparisons)) + self.decreased = map(lambda t: comp_map[t[0]], decreased) + self.increased = map(lambda t: comp_map[t[0]], increased) + self.unchanged = map(lambda t: comp_map[t[0]], unchanged) + # TODO add alphabetically sorted lists for added, removed + + +class ReportFormatter: + def __init__(self, comparator, old_branch, new_branch, changes_only): + self.comparator = comparator + self.old_branch = old_branch + self.new_branch = new_branch + self.changes_only = changes_only - def get_name((name, _)): - return name + MARKDOWN_DETAIL = """ +
+ {0} ({2}) + {1} +
+""" + GIT_DETAIL = """ +{0}: {1}""" - self.decreased_perf_list = map(get_name, decreased) - self.increased_perf_list = map(get_name, increased) + def markdown(self): + return self.__formatted_text( + ROW='{0} | {1} | {2} | {3} | {4} \n', + HEADER_SEPARATOR='---', + DETAIL=self.MARKDOWN_DETAIL) + + def git(self): + return self.__formatted_text( + ROW='{0} {1} {2} {3} {4} \n', + HEADER_SEPARATOR=' ', + DETAIL=self.GIT_DETAIL).replace('-', ' ') # FIXME legacy replace + + def __column_widths(self, header): + changed = self.comparator.decreased + self.comparator.increased + comparisons = (changed if self.changes_only else + changed + self.comparator.unchanged) + values = map(lambda c: c.values(), comparisons) + + def col_widths(contents): + return map(len, contents) + + def max_widths(maximum, contents): + return tuple(map(max, zip(maximum, col_widths(contents)))) + + widths = reduce(max_widths, values, col_widths(header)) + widths = widths[:-1] + (2,) # FIXME legacy formatting + return widths + + def __formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL): + header = ( + 'TEST', self.old_branch, self.new_branch, 'DELTA', 'SPEEDUP') + + widths = self.__column_widths(header) + + def justify_columns(contents): + return tuple(map(lambda (w, c): c.ljust(w), zip(widths, contents))) + + def row(contents): + return ROW.format(*justify_columns(contents)) + + header = '\n' + row(header) + row(tuple([HEADER_SEPARATOR] * 5)) + + def format_result(r, strong): + # FIXME this mangling just reproduces different formatting + # between HTML and Markdown versions of the legacy script + r = r[:-1] + (r[-1].replace(' ', ''), ) + return (r if not strong else + r[:-1] + ('**{0}**'.format(r[-1]), )) + + def results(title, comp_list, is_strong=False, is_open=False): + rows = [ + row(format_result(result_comparison.values(), is_strong)) + for result_comparison in comp_list + ] + return ('' if not rows else + DETAIL.format(*[ + title, + (header + ''.join(rows)), + len(comp_list), + ('open' if is_open else '') + ])) + + return ''.join([ + results('Regression', self.comparator.decreased, True, True), + results('Improvement', self.comparator.increased, True), + results('No Changes', self.comparator.unchanged), + ]) + + HTML = """ + + + + + + +{0} + +""" - # following double sorting is required to replicate same sort order as - # orignal script, for purposes of validation via diff, while refactoring - unchanged = dict(unchanged) - unchanged = sorted(unchanged.items(), key=lambda x: x[1], reverse=True) + HTML_TABLE = """ + + + + + + + + + {5} +
{0}{1}{2}{3}{4}
+""" + + HTML_ROW = """ + + {0} + {1} + {2} + {3} + {5} + +""" - self.normal_perf_list = map(get_name, unchanged) + def html(self): - changes = self.decreased_perf_list + self.increased_perf_list + def row(name, old, new, delta, speedup, speedup_color): + return self.HTML_ROW.format( + name, old, new, delta, speedup_color, speedup) - self.complete_perf_list = (changes if self.changes_only else - changes + self.normal_perf_list) + def header(title): + return row("{0}:".format(title), + "", "", "", "", "black") + def results(title, comp_list, speedup_color): + rows = [ + row(*(result_comparison.values() + (speedup_color,))) + for result_comparison in comp_list + ] + return ('' if not rows else + header(title) + ''.join(rows)) - def compare(self, name): - return ResultComparison(self.old_results[name], self.new_results[name]) + return self.HTML.format(self.HTML_TABLE.format( + "TEST", self.old_branch, self.new_branch, "DELTA", "SPEEDUP", + ''.join([ + results('Regression', self.comparator.decreased, 'red'), + results('Improvement', self.comparator.increased, 'green'), + ('' if self.changes_only else + results('No Changes', self.comparator.unchanged, 'black')) + ]))) def main(): @@ -148,183 +286,32 @@ def main(): help='Delta threshold. Default 0.05.', default="0.05") args = parser.parse_args() - - new_branch = args.new_branch - old_branch = args.old_branch - - global RATIO_MIN - global RATIO_MAX - RATIO_MIN = 1 - float(args.delta_threshold) - RATIO_MAX = 1 + float(args.delta_threshold) - comparator = TestComparator(args.old_file, args.new_file, float(args.delta_threshold), args.changes_only) - - old_results = comparator.old_min_results - old_max_results = comparator.old_max_results - new_results = comparator.new_min_results - new_max_results = comparator.new_max_results - - ratio_list = comparator.ratio_list - delta_list = comparator.delta_list - unknown_list = comparator.unknown_list - - decreased_perf_list = comparator.decreased_perf_list - increased_perf_list = comparator.increased_perf_list - normal_perf_list = comparator.normal_perf_list - complete_perf_list = comparator.complete_perf_list - - (markdown_regression, - markdown_improvement, - markdown_normal) = convert_to_markdown(comparator, - old_branch, new_branch, - args.changes_only) - - markdown_data = MARKDOWN_DETAIL.format( - "Regression", len(decreased_perf_list), markdown_regression, "open") - markdown_data += MARKDOWN_DETAIL.format( - "Improvement", len(increased_perf_list), markdown_improvement, "") - if not args.changes_only: - markdown_data += MARKDOWN_DETAIL.format( - "No Changes", len(normal_perf_list), markdown_normal, "") + formatter = ReportFormatter(comparator, args.old_branch, args.new_branch, + args.changes_only) if args.format: if args.format.lower() != "markdown": - pain_data = PAIN_DETAIL.format("Regression", markdown_regression) - pain_data += PAIN_DETAIL.format("Improvement", - markdown_improvement) - if not args.changes_only: - pain_data += PAIN_DETAIL.format("No Changes", markdown_normal) - - print(pain_data.replace("|", " ").replace("-", " ")) + print(formatter.git()) else: - print(markdown_data) + print(formatter.markdown()) if args.format: if args.format.lower() == "html": - """ - Create HTML formatted table - """ - html_data = convert_to_html(comparator, old_branch, new_branch, - args.changes_only) - if args.output: - write_to_file(args.output, html_data) + write_to_file(args.output, formatter.html()) else: print("Error: missing --output flag.") sys.exit(1) elif args.format.lower() == "markdown": if args.output: - write_to_file(args.output, markdown_data) + write_to_file(args.output, formatter.markdown()) elif args.format.lower() != "git": print("{0} is unknown format.".format(args.format)) sys.exit(1) -def convert_to_markdown(comparator, old_branch, new_branch, changes_only): - """ - Create markdown formatted table - """ - old_results = comparator.old_min_results - new_results = comparator.new_min_results - - ratio_list = comparator.ratio_list - delta_list = comparator.delta_list - unknown_list = comparator.unknown_list - - decreased_perf_list = comparator.decreased_perf_list - increased_perf_list = comparator.increased_perf_list - normal_perf_list = comparator.normal_perf_list - complete_perf_list = comparator.complete_perf_list - - def max_width(items, title, key_len=False): - def length(key): - return len(str(key)) if key_len else len(str(items[key])) - return max(len(title), max(map(length, items.keys()))) - - widths = ( # column widths - max_width(ratio_list, 'TEST', key_len=True), - max_width(new_results, str(new_branch)), - max_width(old_results, str(old_branch)), - max_width(delta_list, 'DELTA (%)'), - 2 - ) - - def justify_columns(contents): - return tuple(map(lambda (w, c): c.ljust(w), zip(widths, contents))) - - def add_row(contents): - return MARKDOWN_ROW.format(* justify_columns(contents)) - - header = ("TEST", old_branch, new_branch, "DELTA", "SPEEDUP") - markdown_table_header = "\n" + add_row(header) - markdown_table_header += add_row(tuple([HEADER_SPLIT] * len(header))) - - def markdown_table(perf_list, strong): - markdown_table = markdown_table_header - for key in perf_list: - ratio = "{0:.2f}x".format(ratio_list[key]) - markdown_table += add_row( - ( - key, str(old_results[key]), str(new_results[key]), - "{0:+.1f}%".format(delta_list[key]), - ("**{0}{1}**" if strong else "{0}{1}") - .format(str(ratio), unknown_list[key]) - ) - ) - return markdown_table - - markdown_regression = markdown_table(decreased_perf_list, True) - markdown_improvement = markdown_table(increased_perf_list, True) - markdown_normal = markdown_table(normal_perf_list, False) - - return (markdown_regression, markdown_improvement, markdown_normal) - - -def convert_to_html(comparator, old_branch, new_branch, changes_only): - old_results = comparator.old_min_results - new_results = comparator.new_min_results - - ratio_list = comparator.ratio_list - delta_list = comparator.delta_list - unknown_list = comparator.unknown_list - - decreased_perf_list = comparator.decreased_perf_list - increased_perf_list = comparator.increased_perf_list - normal_perf_list = comparator.normal_perf_list - complete_perf_list = comparator.complete_perf_list - - def add_row(name, old, new, delta, speedup, speedup_color): - return HTML_ROW.format(name, old, new, delta, speedup_color, speedup) - - def separator_header(title): - return add_row("{0}:".format(title), - "", "", "", "", "black") - - def results_table(title, list, speedup_color): - html_rows = separator_header(title) - for key in list: - html_rows += add_row(key, old_results[key], new_results[key], - "{0:+.1f}%".format(delta_list[key]), - "{0:.2f}x {1}".format(ratio_list[key], - unknown_list[key]), - speedup_color) - return html_rows - - html_rows = "" - if len(decreased_perf_list) > 0: - html_rows += results_table('Regression', decreased_perf_list, 'red') - if len(increased_perf_list) > 0: - html_rows += results_table('Improvement', increased_perf_list, 'green') - if len(normal_perf_list) > 0: - html_rows += results_table('No Changes', normal_perf_list, 'black') - - html_table = HTML_TABLE.format("TEST", old_branch, new_branch, - "DELTA", "SPEEDUP", html_rows) - html_data = HTML.format(html_table) - return html_data - - def write_to_file(file_name, data): """ Write data to given file @@ -334,51 +321,5 @@ def write_to_file(file_name, data): file.close -HTML = """ - - - - - - -{0} - -""" - -HTML_TABLE = """ - - - - - - - - - {5} -
{0}{1}{2}{3}{4}
-""" - -HTML_ROW = """ - - {0} - {1} - {2} - {3} - {5} - -""" - -MARKDOWN_ROW = "{0} | {1} | {2} | {3} | {4} \n" -HEADER_SPLIT = "---" -MARKDOWN_DETAIL = """ -
- {0} ({1}) - {2} -
-""" - -PAIN_DETAIL = """ -{0}: {1}""" - if __name__ == "__main__": sys.exit(main()) From 75f13452d94f300e822a9abe57bd26bee36e6d5c Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Mon, 24 Apr 2017 23:10:14 +0200 Subject: [PATCH 10/10] Reproduce legacy behavior for --change-only Also: use single quote strings --- benchmark/scripts/compare_perf_tests.py | 49 +++++++++++++------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 15b11ae79ed2d..9b8e862cdabad 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -30,6 +30,7 @@ def __init__(self, csv_row): self.mean = int(csv_row[5]) # Mean (average) runtime (ms) self.sd = int(csv_row[6]) # Standard Deviation (ms) self.median = int(csv_row[7]) # Median runtime (ms) + self.max_rss = int(csv_row[8]) # Maximum Resident Set Size (B) class ResultComparison: @@ -46,16 +47,16 @@ def __init__(self, old, new): self.delta = round(((ratio - 1) * 100), 2) # FIXME no need to round self.is_dubious = ( # FIXME this is legacy - "(?)" if ((old.min < new.min and new.min < old.max) or + '(?)' if ((old.min < new.min and new.min < old.max) or (new.min < old.min and old.min < new.max)) - else "") + else '') # Tuple of values formatted for display in results table: # (name, old value, new value, delta [%], speedup ratio) def values(self): return (self.name, str(self.old.min), str(self.new.min), - "{0:+.1f}%".format(self.delta), - "{0:.2f}x {1}".format(self.ratio, self.is_dubious)) + '{0:+.1f}%'.format(self.delta), + '{0:.2f}x {1}'.format(self.ratio, self.is_dubious)) class TestComparator: @@ -146,8 +147,9 @@ def git(self): def __column_widths(self, header): changed = self.comparator.decreased + self.comparator.increased - comparisons = (changed if self.changes_only else - changed + self.comparator.unchanged) + comparisons = changed + self.comparator.unchanged # FIXME legacy + # comparisons = (changed if self.changes_only else + # changed + self.comparator.unchanged) values = map(lambda c: c.values(), comparisons) def col_widths(contents): @@ -157,7 +159,7 @@ def max_widths(maximum, contents): return tuple(map(max, zip(maximum, col_widths(contents)))) widths = reduce(max_widths, values, col_widths(header)) - widths = widths[:-1] + (2,) # FIXME legacy formatting + widths = widths[:-2] + (6, 2) # FIXME legacy formatting return widths def __formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL): @@ -197,7 +199,8 @@ def results(title, comp_list, is_strong=False, is_open=False): return ''.join([ results('Regression', self.comparator.decreased, True, True), results('Improvement', self.comparator.increased, True), - results('No Changes', self.comparator.unchanged), + ('' if self.changes_only else + results('No Changes', self.comparator.unchanged)) ]) HTML = """ @@ -241,8 +244,8 @@ def row(name, old, new, delta, speedup, speedup_color): name, old, new, delta, speedup_color, speedup) def header(title): - return row("{0}:".format(title), - "", "", "", "", "black") + return row('{0}:'.format(title), + '', '', '', '', 'black') def results(title, comp_list, speedup_color): rows = [ @@ -253,7 +256,7 @@ def results(title, comp_list, speedup_color): header(title) + ''.join(rows)) return self.HTML.format(self.HTML_TABLE.format( - "TEST", self.old_branch, self.new_branch, "DELTA", "SPEEDUP", + 'TEST', self.old_branch, self.new_branch, 'DELTA', 'SPEEDUP', ''.join([ results('Regression', self.comparator.decreased, 'red'), results('Improvement', self.comparator.increased, 'green'), @@ -264,7 +267,7 @@ def results(title, comp_list, speedup_color): def main(): - parser = argparse.ArgumentParser(description="Compare Performance tests.") + parser = argparse.ArgumentParser(description='Compare Performance tests.') parser.add_argument('--old-file', help='Baseline performance test suite (csv file)', required=True) @@ -279,11 +282,11 @@ def main(): parser.add_argument('--changes-only', help='Output only affected tests', action='store_true') parser.add_argument('--new-branch', - help='Name of the new branch', default="NEW_MIN") + help='Name of the new branch', default='NEW_MIN') parser.add_argument('--old-branch', - help='Name of the old branch', default="OLD_MIN") + help='Name of the old branch', default='OLD_MIN') parser.add_argument('--delta-threshold', - help='Delta threshold. Default 0.05.', default="0.05") + help='Delta threshold. Default 0.05.', default='0.05') args = parser.parse_args() comparator = TestComparator(args.old_file, args.new_file, @@ -292,23 +295,23 @@ def main(): args.changes_only) if args.format: - if args.format.lower() != "markdown": + if args.format.lower() != 'markdown': print(formatter.git()) else: print(formatter.markdown()) if args.format: - if args.format.lower() == "html": + if args.format.lower() == 'html': if args.output: write_to_file(args.output, formatter.html()) else: - print("Error: missing --output flag.") + print('Error: missing --output flag.') sys.exit(1) - elif args.format.lower() == "markdown": + elif args.format.lower() == 'markdown': if args.output: write_to_file(args.output, formatter.markdown()) - elif args.format.lower() != "git": - print("{0} is unknown format.".format(args.format)) + elif args.format.lower() != 'git': + print('{0} is unknown format.'.format(args.format)) sys.exit(1) @@ -316,10 +319,10 @@ def write_to_file(file_name, data): """ Write data to given file """ - file = open(file_name, "w") + file = open(file_name, 'w') file.write(data) file.close -if __name__ == "__main__": +if __name__ == '__main__': sys.exit(main())