From b7eb476f948a645fbb6ef559c1ef8ba8afe6de38 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sat, 7 Jul 2018 09:33:19 +0200 Subject: [PATCH 01/10] [benchmark] Moved tests for benchmark scripts Moved `lit` test that runs unit tests for benchmark scripts from `validation-test/Python` to `test/benchmark` directory. Run the tests for benchmark infrustructure with single lit invocation: ```` swift-source$ ./llvm/utils/lit/lit.py -sv ${SWIFT_BUILD_DIR}/test-macosx-x86_64/benchmark ```` Documented the invocation of benchmark infrastructure tests in README.md --- benchmark/README.md | 18 ++++++++++++++++++ .../benchmark}/benchmark-scripts.test-sh | 0 2 files changed, 18 insertions(+) rename {validation-test/Python => test/benchmark}/benchmark-scripts.test-sh (100%) diff --git a/benchmark/README.md b/benchmark/README.md index bb1722eafec49..4d1f70847fed0 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -186,3 +186,21 @@ public func run_YourTestName(N: Int) { The current set of tags are defined by the `BenchmarkCategory` enum in `TestsUtils.swift` . + +Testing the Benchmark Drivers +----------------------------- +When working on tests, after the initial build +```` +swift-source$ ./swift/utils/build-script -R -B +```` +you can rebuild just the benchmarks: +```` +swift-source$ export SWIFT_BUILD_DIR=`pwd`/build/Ninja-ReleaseAssert/swift-macosx-x86_64 +swift-source$ ninja -C ${SWIFT_BUILD_DIR} swift-benchmark-macosx-x86_64 +```` + +When modifying the testing infrastructure, you should verify that your changes +pass all the tests: +```` +swift-source$ ./llvm/utils/lit/lit.py -sv ${SWIFT_BUILD_DIR}/test-macosx-x86_64/benchmark +```` diff --git a/validation-test/Python/benchmark-scripts.test-sh b/test/benchmark/benchmark-scripts.test-sh similarity index 100% rename from validation-test/Python/benchmark-scripts.test-sh rename to test/benchmark/benchmark-scripts.test-sh From 7438ffd0bb0fe6c13e9dc1382e2f64b9bb3a91c6 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 11 Jul 2018 23:16:57 +0200 Subject: [PATCH 02/10] [benchmark] Tests for Benchmark_O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added `lit` substitution for running Benchmark_O binary. Introduced `lit` tests for `Bechmark_O` to demonstrate bugs: * listing benchmarks’ tags * when running with `--num-iters=1` --- test/benchmark/Benchmark_O.test-sh | 15 +++++++++++++++ test/lit.cfg | 2 ++ test/lit.site.cfg.in | 3 +++ 3 files changed, 20 insertions(+) create mode 100644 test/benchmark/Benchmark_O.test-sh diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh new file mode 100644 index 0000000000000..62a896da66385 --- /dev/null +++ b/test/benchmark/Benchmark_O.test-sh @@ -0,0 +1,15 @@ +// REQUIRES: OS=macosx +// REQUIRES: asserts +// REQUIRES: benchmark +// REQUIRES: CMAKE_GENERATOR=Ninja + +// RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTTAGS +// LISTTAGS: AngryPhonebook,[ +// LISTTAGS-SAME: TestsUtils.BenchmarkCategory. +// LISTTAGS-SAME: String +// LISTTAGS-SAME: ] + +// RUN: %Benchmark_O AngryPhonebook --num-iters=1 \ +// RUN: | %FileCheck %s --check-prefix NUMITERS1 +// NUMITERS1: AngryPhonebook,1 +// NUMITERS1-SAME: 0,0,0,0,0 diff --git a/test/lit.cfg b/test/lit.cfg index 10a0f0dd2cf7f..830996f48cd93 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -272,6 +272,7 @@ config.complete_test = inferSwiftBinary('complete-test') config.swift_api_digester = inferSwiftBinary('swift-api-digester') config.swift_refactor = inferSwiftBinary('swift-refactor') config.swift_demangle_yamldump = inferSwiftBinary('swift-demangle-yamldump') +config.benchmark_o = inferSwiftBinary('Benchmark_O') config.swift_utils = make_path(config.swift_src_root, 'utils') config.line_directive = make_path(config.swift_utils, 'line-directive') @@ -366,6 +367,7 @@ config.substitutions.append( ('%swift-llvm-opt', config.swift_llvm_opt) ) config.substitutions.append( ('%llvm-dwarfdump', config.llvm_dwarfdump) ) config.substitutions.append( ('%llvm-dis', config.llvm_dis) ) config.substitutions.append( ('%swift-demangle-yamldump', config.swift_demangle_yamldump) ) +config.substitutions.append( ('%Benchmark_O', config.benchmark_o) ) # This must come after all substitutions containing "%swift". config.substitutions.append( diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in index 52f935e1095a3..5d4017d092845 100644 --- a/test/lit.site.cfg.in +++ b/test/lit.site.cfg.in @@ -88,6 +88,9 @@ config.available_features.add("CMAKE_GENERATOR=@CMAKE_GENERATOR@") if "@SWIFT_ENABLE_SOURCEKIT_TESTS@" == "TRUE": config.available_features.add('sourcekit') +if "@SWIFT_BUILD_PERF_TESTSUITE@" == "TRUE": + config.available_features.add('benchmark') + if "@SWIFT_ENABLE_GUARANTEED_NORMAL_ARGUMENTS@" == "TRUE": config.available_features.add('plus_zero_runtime') else: From 4c4c6a24090a59033ca1a37caa47a67717340b3d Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sat, 7 Jul 2018 04:38:37 +0200 Subject: [PATCH 03/10] [benchmark] Fix: Better tags in benchmark list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When listing benchmarks with `--list` parameter, present the tags in format that is actually accepted by the `--tags` and `--skip-tags` parameters. Changes the `--list` output from ```` Enabled Tests,Tags AngryPhonebook,[TestsUtils.BenchmarkCategory.validation, TestsUtils.BenchmarkCategory.api, TestsUtils.BenchmarkCategory.String] ... ```` into ```` Enabled Tests,Tags AngryPhonebook,[String, api, validation] … ```` --- benchmark/utils/DriverUtils.swift | 6 +++--- benchmark/utils/TestsUtils.swift | 12 ++++++++++++ test/benchmark/Benchmark_O.test-sh | 4 ++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift index bf7b4d56a5653..bdb7755b83eda 100644 --- a/benchmark/utils/DriverUtils.swift +++ b/benchmark/utils/DriverUtils.swift @@ -69,7 +69,7 @@ struct Test { /// The benchmark categories that this test belongs to. Used for filtering. var tags: [BenchmarkCategory] { - return benchInfo.tags + return benchInfo.tags.sorted() } /// An optional initialization function for a benchmark that is run before @@ -181,7 +181,7 @@ struct TestConfig { // We support specifying multiple tags by splitting on comma, i.e.: // - // --tags=array,set + // --tags=Array,Dictionary // // FIXME: If we used Error instead of .fail, then we could have a cleaner // impl here using map on x and tags.formUnion. @@ -200,7 +200,7 @@ struct TestConfig { // We support specifying multiple tags by splitting on comma, i.e.: // - // --skip-tags=array,set + // --skip-tags=Array,Set,unstable,skip // // FIXME: If we used Error instead of .fail, then we could have a cleaner // impl here using map on x and tags.formUnion. diff --git a/benchmark/utils/TestsUtils.swift b/benchmark/utils/TestsUtils.swift index 594375c67a6b9..7a876b3d4187e 100644 --- a/benchmark/utils/TestsUtils.swift +++ b/benchmark/utils/TestsUtils.swift @@ -70,6 +70,18 @@ public enum BenchmarkCategory : String { case skip } +extension BenchmarkCategory : CustomStringConvertible { + public var description: String { + return self.rawValue + } +} + +extension BenchmarkCategory : Comparable { + public static func < (lhs: BenchmarkCategory, rhs: BenchmarkCategory) -> Bool { + return lhs.rawValue < rhs.rawValue + } +} + public struct BenchmarkPlatformSet : OptionSet { public let rawValue: Int diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh index 62a896da66385..2e5f4bad98b90 100644 --- a/test/benchmark/Benchmark_O.test-sh +++ b/test/benchmark/Benchmark_O.test-sh @@ -5,8 +5,8 @@ // RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTTAGS // LISTTAGS: AngryPhonebook,[ -// LISTTAGS-SAME: TestsUtils.BenchmarkCategory. -// LISTTAGS-SAME: String +// LISTTAGS-NOT: TestsUtils.BenchmarkCategory. +// LISTTAGS-SAME: String, // LISTTAGS-SAME: ] // RUN: %Benchmark_O AngryPhonebook --num-iters=1 \ From 8b03980cb0a48f0c8bf5870286484a1a03cf4c14 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 11 Jul 2018 22:23:32 +0200 Subject: [PATCH 04/10] [benchmark] DRY: Call setup and teardown only once --- benchmark/utils/DriverUtils.swift | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift index bdb7755b83eda..b220bc07817da 100644 --- a/benchmark/utils/DriverUtils.swift +++ b/benchmark/utils/DriverUtils.swift @@ -382,14 +382,13 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? { let sampler = SampleRunner() for s in 0.. 0 { scale = UInt(time_per_sample / elapsed_time) @@ -413,9 +412,7 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? { if c.verbose { print(" Measuring with scale \(scale).") } - test.setUpFunction?() elapsed_time = sampler.run(test.name, fn: testFn, num_iters: scale) - test.tearDownFunction?() } else { scale = 1 } @@ -424,6 +421,7 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? { if c.verbose { print(" Sample \(s),\(samples[s])") } + test.tearDownFunction?() } let (mean, sd) = internalMeanSD(samples) From d40ddabcd5c28140e6ee6bd474a33195c00b24f7 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sat, 7 Jul 2018 01:47:54 +0200 Subject: [PATCH 05/10] [benchmark] Fix: running with `--num-iters=1` Fixed bug where the `elapsed_time` was always 0 when `--num-iters=1` was specified. --- benchmark/utils/DriverUtils.swift | 3 +++ test/benchmark/Benchmark_O.test-sh | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift index b220bc07817da..017a7580dabea 100644 --- a/benchmark/utils/DriverUtils.swift +++ b/benchmark/utils/DriverUtils.swift @@ -401,6 +401,9 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? { } else { // Compute the scaling factor if a fixed c.fixedNumIters is not specified. scale = c.fixedNumIters + if scale == 1 { + elapsed_time = sampler.run(test.name, fn: testFn, num_iters: 1) + } } // Make integer overflow less likely on platforms where Int is 32 bits wide. // FIXME: Switch BenchmarkInfo to use Int64 for the iteration scale, or fix diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh index 2e5f4bad98b90..2de4127e08732 100644 --- a/test/benchmark/Benchmark_O.test-sh +++ b/test/benchmark/Benchmark_O.test-sh @@ -12,4 +12,4 @@ // RUN: %Benchmark_O AngryPhonebook --num-iters=1 \ // RUN: | %FileCheck %s --check-prefix NUMITERS1 // NUMITERS1: AngryPhonebook,1 -// NUMITERS1-SAME: 0,0,0,0,0 +// NUMITERS1-NOT: 0,0,0,0,0 From cf1d78be6b95827f238d578290fcf6d476f504fa Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sat, 7 Jul 2018 05:15:31 +0200 Subject: [PATCH 06/10] [benchmark] Test running skip-tag-marked benchmark Added test: It should be possible to run benchmark by name, even if its tags match the default skip-tags. Added verification tests for benchmark filtering with `tags` and `skip-tags` parameters. --- test/benchmark/Benchmark_O.test-sh | 31 +++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh index 2de4127e08732..ea9daeaadfda0 100644 --- a/test/benchmark/Benchmark_O.test-sh +++ b/test/benchmark/Benchmark_O.test-sh @@ -10,6 +10,35 @@ // LISTTAGS-SAME: ] // RUN: %Benchmark_O AngryPhonebook --num-iters=1 \ -// RUN: | %FileCheck %s --check-prefix NUMITERS1 +// RUN: | %FileCheck %s --check-prefix NUMITERS1 // NUMITERS1: AngryPhonebook,1 // NUMITERS1-NOT: 0,0,0,0,0 + +// Should run benchmark by name, even if its tags match the default skip-tags +// (unstable,skip). Ackermann is marked unstable +// RUN: %Benchmark_O Ackermann | %FileCheck %s --check-prefix NAMEDSKIP +// NAMEDSKIP-NOT: Ackermann +// RUN: %Benchmark_O Ackermann --skip-tags= \ +// RUN: | %FileCheck %s --check-prefix NAMEDSKIPWORKAROUND +// NAMEDSKIPWORKAROUND: Ackermann + +// RUN: %Benchmark_O --list --tags=Dictionary,Array \ +// RUN: | %FileCheck %s --check-prefix ANDTAGS +// ANDTAGS: TwoSum +// ANDTAGS-NOT: Array2D +// ANDTAGS-NOT: DictionarySwap + +// RUN: %Benchmark_O --list --tags=algorithm --skip-tags=validation \ +// RUN: | %FileCheck %s --check-prefix TAGSANDSKIPTAGS +// TAGSANDSKIPTAGS: Ackermann +// TAGSANDSKIPTAGS: DictOfArraysToArrayOfDicts +// TAGSANDSKIPTAGS: Fibonacci +// TAGSANDSKIPTAGS: RomanNumbers + +// RUN: %Benchmark_O --list --tags=algorithm \ +// RUN: --skip-tags=validation,Dictionary,String \ +// RUN: | %FileCheck %s --check-prefix ORSKIPTAGS +// ORSKIPTAGS: Ackermann +// ORSKIPTAGS-NOT: DictOfArraysToArrayOfDicts +// ORSKIPTAGS: Fibonacci +// ORSKIPTAGS-NOT: RomanNumbers From 2d004970fd6a63e9a4df4f533673d739ebedb6ee Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Mon, 9 Jul 2018 17:41:27 +0200 Subject: [PATCH 07/10] [benchmark] Fix: Running skip-tag-marked benchmark Also updated benchmark documentation with more detailed description of tag handling. --- benchmark/README.md | 13 ++++++++- benchmark/utils/DriverUtils.swift | 44 ++++++++---------------------- benchmark/utils/TestsUtils.swift | 4 +-- test/benchmark/Benchmark_O.test-sh | 5 +--- 4 files changed, 27 insertions(+), 39 deletions(-) diff --git a/benchmark/README.md b/benchmark/README.md index 4d1f70847fed0..9f7400ef10154 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -86,13 +86,24 @@ Using the Benchmark Driver * `--num-samples` * Control the number of samples to take for each test * `--list` - * Print a list of available tests + * Print a list of available tests matching specified criteria +* `--tags` + * Run tests that are labeled with specified [tags](https://github.com/apple/swift/blob/master/benchmark/utils/TestsUtils.swift#L19) + (comma separated list); multiple tags are interpreted as logical AND, i.e. + run only test that are labeled with all the supplied tags +* `--skip-tags` + * Don't run tests that are labeled with any of the specified tags (comma + separated list); default value: `skip,unstable`; to get complete list of + tests, specify empty `--skip-tags=` + ### Examples * `$ ./Benchmark_O --num-iters=1 --num-samples=1` * `$ ./Benchmark_Onone --list` * `$ ./Benchmark_Osize Ackermann` +* `$ ./Benchmark_O --tags=Dictionary` +* `$ ./Benchmark_O --skip-tags=unstable,skip,validation` ### Note As a shortcut, you can also refer to benchmarks by their ordinal numbers. diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift index 017a7580dabea..cc1a468ee4290 100644 --- a/benchmark/utils/DriverUtils.swift +++ b/benchmark/utils/DriverUtils.swift @@ -227,39 +227,19 @@ struct TestConfig { } mutating func findTestsToRun() { - let benchmarkNameFilter = Set(filters) - - // t is needed so we don't capture an ivar of a mutable inout self. - let t = tags - let st = skipTags - let filteredTests = Array(registeredBenchmarks.filter { benchInfo in - if !t.isSubset(of: benchInfo.tags) { - return false - } - - if !st.isDisjoint(with: benchInfo.tags) { - return false - } - - // If the user did not specified a benchmark name filter and our tags are - // a subset of the specified tags by the user, return true. We want to run - // this test. - if benchmarkNameFilter.isEmpty { - return true + registeredBenchmarks.sort() + let benchmarkNames = Set(filters) + // needed so we don't capture an ivar of a mutable inout self. + let (_tags, _skipTags) = (tags, skipTags) + + tests = registeredBenchmarks.filter { benchmark in + if benchmarkNames.isEmpty { + return benchmark.tags.isSuperset(of: _tags) && + benchmark.tags.isDisjoint(with: _skipTags) + } else { + return benchmarkNames.contains(benchmark.name) } - - // Otherwise, we need to check if our benchInfo's name is in the benchmark - // name filter list. If it isn't, then we shouldn't process it. - return benchmarkNameFilter.contains(benchInfo.name) - }).sorted() - - if (filteredTests.isEmpty) { - return - } - - tests = filteredTests.enumerated().map { - Test(benchInfo: $0.element, index: $0.offset + 1) - } + }.enumerated().map { Test(benchInfo: $0.element, index: $0.offset + 1) } } } diff --git a/benchmark/utils/TestsUtils.swift b/benchmark/utils/TestsUtils.swift index 7a876b3d4187e..1330853443eaa 100644 --- a/benchmark/utils/TestsUtils.swift +++ b/benchmark/utils/TestsUtils.swift @@ -123,7 +123,7 @@ public struct BenchmarkInfo { /// A set of category tags that describe this benchmark. This is used by the /// harness to allow for easy slicing of the set of benchmarks along tag /// boundaries, e.x.: run all string benchmarks or ref count benchmarks, etc. - public var tags: [BenchmarkCategory] + public var tags: Set /// The platforms that this benchmark supports. This is an OptionSet. private var unsupportedPlatforms: BenchmarkPlatformSet @@ -158,7 +158,7 @@ public struct BenchmarkInfo { unsupportedPlatforms: BenchmarkPlatformSet = []) { self.name = name self._runFunction = runFunction - self.tags = tags + self.tags = Set(tags) self._setUpFunction = setUpFunction self._tearDownFunction = tearDownFunction self.unsupportedPlatforms = unsupportedPlatforms diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh index ea9daeaadfda0..551cd156a3750 100644 --- a/test/benchmark/Benchmark_O.test-sh +++ b/test/benchmark/Benchmark_O.test-sh @@ -17,10 +17,7 @@ // Should run benchmark by name, even if its tags match the default skip-tags // (unstable,skip). Ackermann is marked unstable // RUN: %Benchmark_O Ackermann | %FileCheck %s --check-prefix NAMEDSKIP -// NAMEDSKIP-NOT: Ackermann -// RUN: %Benchmark_O Ackermann --skip-tags= \ -// RUN: | %FileCheck %s --check-prefix NAMEDSKIPWORKAROUND -// NAMEDSKIPWORKAROUND: Ackermann +// NAMEDSKIP: Ackermann // RUN: %Benchmark_O --list --tags=Dictionary,Array \ // RUN: | %FileCheck %s --check-prefix ANDTAGS From d82c99666996837bfe9412b3a6541bfb3ef680e9 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Mon, 9 Jul 2018 21:19:09 +0200 Subject: [PATCH 08/10] [benchmark] Fixed Benchmark_Driver running tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed failure in `get_tests` which depended on the removed `Benchmark_O --run-all` option for listing all test (not just the pre-commit set). Fix: Restored the ability to run tests by ordinal number from `Benchmark_Driver` after the support for this was removed from `Benchmark_O`. Added tests that verify output format of `Benchmark_O --list` and the support for `--skip-tags= ` option which effectively replaced the old `--run-all` option. Other tools, like `Benchmark_Driver` depend on it. Added integration tests for the dependency between `Benchmark_Driver` and `Benchmark_O`. Running pre-commit test set isn’t tested explicitly here. It would take too long and it is run fairly frequently by CI bots, so if that breaks, we’ll know soon enough. --- benchmark/scripts/Benchmark_Driver | 32 ++++++++++++++----------- test/benchmark/Benchmark_Driver.test-sh | 22 +++++++++++++++++ test/benchmark/Benchmark_O.test-sh | 9 +++++++ test/lit.cfg | 2 ++ 4 files changed, 51 insertions(+), 14 deletions(-) create mode 100644 test/benchmark/Benchmark_Driver.test-sh diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index cc918aec19e68..987691b471e18 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -118,28 +118,32 @@ def instrument_test(driver_path, test, num_samples): return avg_test_output -BENCHMARK_OUTPUT_RE = re.compile('([^,]+),') - - def get_tests(driver_path, args): """Return a list of available performance tests""" driver = ([driver_path, '--list']) + # Use tab delimiter for easier parsing to override the default comma. + # (The third 'column' is always comma-separated list of tags in square + # brackets -- currently unused here.) + driver.append('--delim=\t') if args.benchmarks or args.filters: - driver.append('--run-all') - tests = [] - for l in subprocess.check_output(driver).split("\n")[1:]: - m = BENCHMARK_OUTPUT_RE.match(l) - if m is None: - continue - tests.append(m.group(1)) + driver.append('--skip-tags=') # list all tests, don't skip any tags + names = [ + line.split('\t')[0] for line in + subprocess.check_output(driver).split('\n')[1:-1] + ] if args.filters: regexes = [re.compile(pattern) for pattern in args.filters] return sorted(list(set([name for pattern in regexes - for name in tests if pattern.match(name)]))) + for name in names if pattern.match(name)]))) if not args.benchmarks: - return tests - tests.extend(map(str, range(1, len(tests) + 1))) # ordinal numbers - return sorted(list(set(tests).intersection(set(args.benchmarks)))) + return names + benchmarks = set(args.benchmarks) + indices = [str(i) for i in range(1, len(names) + 1)] + index_to_name = dict(zip(indices, names)) + indexed_names = [index_to_name[i] + for i in benchmarks.intersection(set(indices))] + return sorted(list( + benchmarks.intersection(set(names)).union(indexed_names))) def get_current_git_branch(git_repo_path): diff --git a/test/benchmark/Benchmark_Driver.test-sh b/test/benchmark/Benchmark_Driver.test-sh new file mode 100644 index 0000000000000..dec24a369da2b --- /dev/null +++ b/test/benchmark/Benchmark_Driver.test-sh @@ -0,0 +1,22 @@ +// REQUIRES: OS=macosx +// REQUIRES: asserts +// REQUIRES: benchmark +// REQUIRES: CMAKE_GENERATOR=Ninja + +// Integration tests between Benchmark_Driver and Benchmark_O +// TODO: Keep the "run just once" check and move the rest into unit tests for +// Benchmark_Driver, as they are redundant and unnecessarily slow. + +// RUN: %Benchmark_Driver run Ackermann | %FileCheck %s --check-prefix RUNNAMED +// RUNNAMED: Ackermann + +// RUN: %Benchmark_Driver run 1 | %FileCheck %s --check-prefix RUNBYNUMBER +// RUNBYNUMBER: Ackermann + +// RUN: %Benchmark_Driver run 1 Ackermann 1 \ +// RUN: | %FileCheck %s --check-prefix RUNJUSTONCE +// RUNJUSTONCE-LABEL: Ackermann +// RUNJUSTONCE-NOT: Ackermann + +// RUN: %Benchmark_Driver run -f Acker | %FileCheck %s --check-prefix RUNFILTER +// RUNFILTER: Ackermann diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh index 551cd156a3750..b6f6624dc21eb 100644 --- a/test/benchmark/Benchmark_O.test-sh +++ b/test/benchmark/Benchmark_O.test-sh @@ -39,3 +39,12 @@ // ORSKIPTAGS-NOT: DictOfArraysToArrayOfDicts // ORSKIPTAGS: Fibonacci // ORSKIPTAGS-NOT: RomanNumbers + +// RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTPRECOMMIT +// LISTPRECOMMIT: Enabled Tests,Tags +// LISTPRECOMMIT-NOT: Ackermann +// LISTPRECOMMIT: AngryPhonebook + +// RUN: %Benchmark_O --list --skip-tags= | %FileCheck %s --check-prefix LISTALL +// LISTALL: Ackermann +// LISTALL: AngryPhonebook diff --git a/test/lit.cfg b/test/lit.cfg index 830996f48cd93..46f5e7c56fd37 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -273,6 +273,7 @@ config.swift_api_digester = inferSwiftBinary('swift-api-digester') config.swift_refactor = inferSwiftBinary('swift-refactor') config.swift_demangle_yamldump = inferSwiftBinary('swift-demangle-yamldump') config.benchmark_o = inferSwiftBinary('Benchmark_O') +config.benchmark_driver = inferSwiftBinary('Benchmark_Driver') config.swift_utils = make_path(config.swift_src_root, 'utils') config.line_directive = make_path(config.swift_utils, 'line-directive') @@ -368,6 +369,7 @@ config.substitutions.append( ('%llvm-dwarfdump', config.llvm_dwarfdump) ) config.substitutions.append( ('%llvm-dis', config.llvm_dis) ) config.substitutions.append( ('%swift-demangle-yamldump', config.swift_demangle_yamldump) ) config.substitutions.append( ('%Benchmark_O', config.benchmark_o) ) +config.substitutions.append( ('%Benchmark_Driver', config.benchmark_driver) ) # This must come after all substitutions containing "%swift". config.substitutions.append( From 7f894268b2548f040c5322d6913961eb87265a93 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Mon, 9 Jul 2018 23:58:05 +0200 Subject: [PATCH 09/10] [benchmark] Restore running benchmarks by numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reintroduced feature lost during `BenchmarkInfo` modernization: All registered benchmarks are ordered alphabetically and assigned an index. This number can be used as a shortcut to invoke the test instead of its full name. (Adding and removing tests from the suite will naturally reassign the indices, but they are stable for a given build.) The `--list` parameter now prints the test *number*, *name* and *tags* separated by delimiter. The `--list` output format is modified from: ```` Enabled Tests,Tags AngryPhonebook,[String, api, validation] ... ```` to this: ```` \#,Test,[Tags] 2,AngryPhonebook,[String, api, validation] … ```` (There isn’t a backslash before the #, git was eating the whole line without it.) Note: Test number 1 is Ackermann, which is marked as “skip”, so it’s not listed with the default `skip-tags` value. Fixes the issue where running tests via `Benchmark_Driver` always reported each test as number 1. Each test is run independently, therefore every invocation was “first”. Restoring test numbers resolves this issue back to original state: The number reported in the first column when executing the tests is its ordinal number in the Swift Benchmark Suite. --- benchmark/scripts/Benchmark_Driver | 8 ++++---- benchmark/utils/DriverUtils.swift | 15 +++++++++------ test/benchmark/Benchmark_O.test-sh | 4 ++-- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 987691b471e18..92f12c627855a 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -127,10 +127,11 @@ def get_tests(driver_path, args): driver.append('--delim=\t') if args.benchmarks or args.filters: driver.append('--skip-tags=') # list all tests, don't skip any tags - names = [ - line.split('\t')[0] for line in + index_name_pairs = [ + line.split('\t')[:2] for line in subprocess.check_output(driver).split('\n')[1:-1] ] + indices, names = zip(*index_name_pairs) # unzip list of pairs into 2 lists if args.filters: regexes = [re.compile(pattern) for pattern in args.filters] return sorted(list(set([name for pattern in regexes @@ -138,8 +139,7 @@ def get_tests(driver_path, args): if not args.benchmarks: return names benchmarks = set(args.benchmarks) - indices = [str(i) for i in range(1, len(names) + 1)] - index_to_name = dict(zip(indices, names)) + index_to_name = dict(index_name_pairs) indexed_names = [index_to_name[i] for i in benchmarks.intersection(set(indices))] return sorted(list( diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift index cc1a468ee4290..5675c767ecd9b 100644 --- a/benchmark/utils/DriverUtils.swift +++ b/benchmark/utils/DriverUtils.swift @@ -228,18 +228,21 @@ struct TestConfig { mutating func findTestsToRun() { registeredBenchmarks.sort() - let benchmarkNames = Set(filters) + let indices = Dictionary(uniqueKeysWithValues: + zip(registeredBenchmarks.map{$0.name}, 1...)) + let benchmarkNamesOrIndices = Set(filters) // needed so we don't capture an ivar of a mutable inout self. let (_tags, _skipTags) = (tags, skipTags) tests = registeredBenchmarks.filter { benchmark in - if benchmarkNames.isEmpty { + if benchmarkNamesOrIndices.isEmpty { return benchmark.tags.isSuperset(of: _tags) && benchmark.tags.isDisjoint(with: _skipTags) } else { - return benchmarkNames.contains(benchmark.name) + return benchmarkNamesOrIndices.contains(benchmark.name) || + benchmarkNamesOrIndices.contains(String(indices[benchmark.name]!)) } - }.enumerated().map { Test(benchInfo: $0.element, index: $0.offset + 1) } + }.map { Test(benchInfo: $0, index: indices[$0.name]!) } } } @@ -478,9 +481,9 @@ public func main() { fatalError("\(msg)") case .listTests: config.findTestsToRun() - print("Enabled Tests\(config.delim)Tags") + print("#\(config.delim)Test\(config.delim)[Tags]") for t in config.tests { - print("\(t.name)\(config.delim)\(t.tags)") + print("\(t.index)\(config.delim)\(t.name)\(config.delim)\(t.tags)") } case .run: config.findTestsToRun() diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh index b6f6624dc21eb..7e991e9240c2d 100644 --- a/test/benchmark/Benchmark_O.test-sh +++ b/test/benchmark/Benchmark_O.test-sh @@ -41,9 +41,9 @@ // ORSKIPTAGS-NOT: RomanNumbers // RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTPRECOMMIT -// LISTPRECOMMIT: Enabled Tests,Tags +// LISTPRECOMMIT: #,Test,[Tags] // LISTPRECOMMIT-NOT: Ackermann -// LISTPRECOMMIT: AngryPhonebook +// LISTPRECOMMIT: {{[0-9]+}},AngryPhonebook // RUN: %Benchmark_O --list --skip-tags= | %FileCheck %s --check-prefix LISTALL // LISTALL: Ackermann From c288f7553e0ae9ef380ad598d49e387edc17cdcb Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Mon, 9 Jul 2018 22:17:24 +0200 Subject: [PATCH 10/10] [benchmark] Updated docs with info on test numbers * Description of listing and running tests by ordinal numbers --- benchmark/README.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/benchmark/README.md b/benchmark/README.md index 9f7400ef10154..6d1e436401ed3 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -107,12 +107,17 @@ Using the Benchmark Driver ### Note As a shortcut, you can also refer to benchmarks by their ordinal numbers. -The regular `--list` option does not provide these, but you can run: -* `$ ./Benchmark_O --list --run-all | tail -n +2 | nl` -You can use ordinal numbers instead of test names like this: +These are printed out together with benchmark names and tags using the +`--list` parameter. For a complete list of all available performance tests run +* `$ ./Benchmark_O --list --skip-tags=` + +You can use test numbers instead of test names like this: * `$ ./Benchmark_O 1 42` * `$ ./Benchmark_Driver run 1 42` +Test numbers are not stable in the long run, adding and removing tests from the +benchmark suite will reorder them, but they are stable for a given build. + Using the Harness Generator ---------------------------