diff --git a/benchmark/README.md b/benchmark/README.md index bb1722eafec49..6d1e436401ed3 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -86,22 +86,38 @@ Using the Benchmark Driver * `--num-samples` * Control the number of samples to take for each test * `--list` - * Print a list of available tests + * Print a list of available tests matching specified criteria +* `--tags` + * Run tests that are labeled with specified [tags](https://github.com/apple/swift/blob/master/benchmark/utils/TestsUtils.swift#L19) + (comma separated list); multiple tags are interpreted as logical AND, i.e. + run only test that are labeled with all the supplied tags +* `--skip-tags` + * Don't run tests that are labeled with any of the specified tags (comma + separated list); default value: `skip,unstable`; to get complete list of + tests, specify empty `--skip-tags=` + ### Examples * `$ ./Benchmark_O --num-iters=1 --num-samples=1` * `$ ./Benchmark_Onone --list` * `$ ./Benchmark_Osize Ackermann` +* `$ ./Benchmark_O --tags=Dictionary` +* `$ ./Benchmark_O --skip-tags=unstable,skip,validation` ### Note As a shortcut, you can also refer to benchmarks by their ordinal numbers. -The regular `--list` option does not provide these, but you can run: -* `$ ./Benchmark_O --list --run-all | tail -n +2 | nl` -You can use ordinal numbers instead of test names like this: +These are printed out together with benchmark names and tags using the +`--list` parameter. For a complete list of all available performance tests run +* `$ ./Benchmark_O --list --skip-tags=` + +You can use test numbers instead of test names like this: * `$ ./Benchmark_O 1 42` * `$ ./Benchmark_Driver run 1 42` +Test numbers are not stable in the long run, adding and removing tests from the +benchmark suite will reorder them, but they are stable for a given build. + Using the Harness Generator --------------------------- @@ -186,3 +202,21 @@ public func run_YourTestName(N: Int) { The current set of tags are defined by the `BenchmarkCategory` enum in `TestsUtils.swift` . + +Testing the Benchmark Drivers +----------------------------- +When working on tests, after the initial build +```` +swift-source$ ./swift/utils/build-script -R -B +```` +you can rebuild just the benchmarks: +```` +swift-source$ export SWIFT_BUILD_DIR=`pwd`/build/Ninja-ReleaseAssert/swift-macosx-x86_64 +swift-source$ ninja -C ${SWIFT_BUILD_DIR} swift-benchmark-macosx-x86_64 +```` + +When modifying the testing infrastructure, you should verify that your changes +pass all the tests: +```` +swift-source$ ./llvm/utils/lit/lit.py -sv ${SWIFT_BUILD_DIR}/test-macosx-x86_64/benchmark +```` diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index cc918aec19e68..92f12c627855a 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -118,28 +118,32 @@ def instrument_test(driver_path, test, num_samples): return avg_test_output -BENCHMARK_OUTPUT_RE = re.compile('([^,]+),') - - def get_tests(driver_path, args): """Return a list of available performance tests""" driver = ([driver_path, '--list']) + # Use tab delimiter for easier parsing to override the default comma. + # (The third 'column' is always comma-separated list of tags in square + # brackets -- currently unused here.) + driver.append('--delim=\t') if args.benchmarks or args.filters: - driver.append('--run-all') - tests = [] - for l in subprocess.check_output(driver).split("\n")[1:]: - m = BENCHMARK_OUTPUT_RE.match(l) - if m is None: - continue - tests.append(m.group(1)) + driver.append('--skip-tags=') # list all tests, don't skip any tags + index_name_pairs = [ + line.split('\t')[:2] for line in + subprocess.check_output(driver).split('\n')[1:-1] + ] + indices, names = zip(*index_name_pairs) # unzip list of pairs into 2 lists if args.filters: regexes = [re.compile(pattern) for pattern in args.filters] return sorted(list(set([name for pattern in regexes - for name in tests if pattern.match(name)]))) + for name in names if pattern.match(name)]))) if not args.benchmarks: - return tests - tests.extend(map(str, range(1, len(tests) + 1))) # ordinal numbers - return sorted(list(set(tests).intersection(set(args.benchmarks)))) + return names + benchmarks = set(args.benchmarks) + index_to_name = dict(index_name_pairs) + indexed_names = [index_to_name[i] + for i in benchmarks.intersection(set(indices))] + return sorted(list( + benchmarks.intersection(set(names)).union(indexed_names))) def get_current_git_branch(git_repo_path): diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift index bf7b4d56a5653..5675c767ecd9b 100644 --- a/benchmark/utils/DriverUtils.swift +++ b/benchmark/utils/DriverUtils.swift @@ -69,7 +69,7 @@ struct Test { /// The benchmark categories that this test belongs to. Used for filtering. var tags: [BenchmarkCategory] { - return benchInfo.tags + return benchInfo.tags.sorted() } /// An optional initialization function for a benchmark that is run before @@ -181,7 +181,7 @@ struct TestConfig { // We support specifying multiple tags by splitting on comma, i.e.: // - // --tags=array,set + // --tags=Array,Dictionary // // FIXME: If we used Error instead of .fail, then we could have a cleaner // impl here using map on x and tags.formUnion. @@ -200,7 +200,7 @@ struct TestConfig { // We support specifying multiple tags by splitting on comma, i.e.: // - // --skip-tags=array,set + // --skip-tags=Array,Set,unstable,skip // // FIXME: If we used Error instead of .fail, then we could have a cleaner // impl here using map on x and tags.formUnion. @@ -227,39 +227,22 @@ struct TestConfig { } mutating func findTestsToRun() { - let benchmarkNameFilter = Set(filters) - - // t is needed so we don't capture an ivar of a mutable inout self. - let t = tags - let st = skipTags - let filteredTests = Array(registeredBenchmarks.filter { benchInfo in - if !t.isSubset(of: benchInfo.tags) { - return false - } - - if !st.isDisjoint(with: benchInfo.tags) { - return false - } - - // If the user did not specified a benchmark name filter and our tags are - // a subset of the specified tags by the user, return true. We want to run - // this test. - if benchmarkNameFilter.isEmpty { - return true + registeredBenchmarks.sort() + let indices = Dictionary(uniqueKeysWithValues: + zip(registeredBenchmarks.map{$0.name}, 1...)) + let benchmarkNamesOrIndices = Set(filters) + // needed so we don't capture an ivar of a mutable inout self. + let (_tags, _skipTags) = (tags, skipTags) + + tests = registeredBenchmarks.filter { benchmark in + if benchmarkNamesOrIndices.isEmpty { + return benchmark.tags.isSuperset(of: _tags) && + benchmark.tags.isDisjoint(with: _skipTags) + } else { + return benchmarkNamesOrIndices.contains(benchmark.name) || + benchmarkNamesOrIndices.contains(String(indices[benchmark.name]!)) } - - // Otherwise, we need to check if our benchInfo's name is in the benchmark - // name filter list. If it isn't, then we shouldn't process it. - return benchmarkNameFilter.contains(benchInfo.name) - }).sorted() - - if (filteredTests.isEmpty) { - return - } - - tests = filteredTests.enumerated().map { - Test(benchInfo: $0.element, index: $0.offset + 1) - } + }.map { Test(benchInfo: $0, index: indices[$0.name]!) } } } @@ -382,14 +365,13 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? { let sampler = SampleRunner() for s in 0.. 0 { scale = UInt(time_per_sample / elapsed_time) @@ -402,6 +384,9 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? { } else { // Compute the scaling factor if a fixed c.fixedNumIters is not specified. scale = c.fixedNumIters + if scale == 1 { + elapsed_time = sampler.run(test.name, fn: testFn, num_iters: 1) + } } // Make integer overflow less likely on platforms where Int is 32 bits wide. // FIXME: Switch BenchmarkInfo to use Int64 for the iteration scale, or fix @@ -413,9 +398,7 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? { if c.verbose { print(" Measuring with scale \(scale).") } - test.setUpFunction?() elapsed_time = sampler.run(test.name, fn: testFn, num_iters: scale) - test.tearDownFunction?() } else { scale = 1 } @@ -424,6 +407,7 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? { if c.verbose { print(" Sample \(s),\(samples[s])") } + test.tearDownFunction?() } let (mean, sd) = internalMeanSD(samples) @@ -497,9 +481,9 @@ public func main() { fatalError("\(msg)") case .listTests: config.findTestsToRun() - print("Enabled Tests\(config.delim)Tags") + print("#\(config.delim)Test\(config.delim)[Tags]") for t in config.tests { - print("\(t.name)\(config.delim)\(t.tags)") + print("\(t.index)\(config.delim)\(t.name)\(config.delim)\(t.tags)") } case .run: config.findTestsToRun() diff --git a/benchmark/utils/TestsUtils.swift b/benchmark/utils/TestsUtils.swift index 594375c67a6b9..1330853443eaa 100644 --- a/benchmark/utils/TestsUtils.swift +++ b/benchmark/utils/TestsUtils.swift @@ -70,6 +70,18 @@ public enum BenchmarkCategory : String { case skip } +extension BenchmarkCategory : CustomStringConvertible { + public var description: String { + return self.rawValue + } +} + +extension BenchmarkCategory : Comparable { + public static func < (lhs: BenchmarkCategory, rhs: BenchmarkCategory) -> Bool { + return lhs.rawValue < rhs.rawValue + } +} + public struct BenchmarkPlatformSet : OptionSet { public let rawValue: Int @@ -111,7 +123,7 @@ public struct BenchmarkInfo { /// A set of category tags that describe this benchmark. This is used by the /// harness to allow for easy slicing of the set of benchmarks along tag /// boundaries, e.x.: run all string benchmarks or ref count benchmarks, etc. - public var tags: [BenchmarkCategory] + public var tags: Set /// The platforms that this benchmark supports. This is an OptionSet. private var unsupportedPlatforms: BenchmarkPlatformSet @@ -146,7 +158,7 @@ public struct BenchmarkInfo { unsupportedPlatforms: BenchmarkPlatformSet = []) { self.name = name self._runFunction = runFunction - self.tags = tags + self.tags = Set(tags) self._setUpFunction = setUpFunction self._tearDownFunction = tearDownFunction self.unsupportedPlatforms = unsupportedPlatforms diff --git a/test/benchmark/Benchmark_Driver.test-sh b/test/benchmark/Benchmark_Driver.test-sh new file mode 100644 index 0000000000000..dec24a369da2b --- /dev/null +++ b/test/benchmark/Benchmark_Driver.test-sh @@ -0,0 +1,22 @@ +// REQUIRES: OS=macosx +// REQUIRES: asserts +// REQUIRES: benchmark +// REQUIRES: CMAKE_GENERATOR=Ninja + +// Integration tests between Benchmark_Driver and Benchmark_O +// TODO: Keep the "run just once" check and move the rest into unit tests for +// Benchmark_Driver, as they are redundant and unnecessarily slow. + +// RUN: %Benchmark_Driver run Ackermann | %FileCheck %s --check-prefix RUNNAMED +// RUNNAMED: Ackermann + +// RUN: %Benchmark_Driver run 1 | %FileCheck %s --check-prefix RUNBYNUMBER +// RUNBYNUMBER: Ackermann + +// RUN: %Benchmark_Driver run 1 Ackermann 1 \ +// RUN: | %FileCheck %s --check-prefix RUNJUSTONCE +// RUNJUSTONCE-LABEL: Ackermann +// RUNJUSTONCE-NOT: Ackermann + +// RUN: %Benchmark_Driver run -f Acker | %FileCheck %s --check-prefix RUNFILTER +// RUNFILTER: Ackermann diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh new file mode 100644 index 0000000000000..7e991e9240c2d --- /dev/null +++ b/test/benchmark/Benchmark_O.test-sh @@ -0,0 +1,50 @@ +// REQUIRES: OS=macosx +// REQUIRES: asserts +// REQUIRES: benchmark +// REQUIRES: CMAKE_GENERATOR=Ninja + +// RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTTAGS +// LISTTAGS: AngryPhonebook,[ +// LISTTAGS-NOT: TestsUtils.BenchmarkCategory. +// LISTTAGS-SAME: String, +// LISTTAGS-SAME: ] + +// RUN: %Benchmark_O AngryPhonebook --num-iters=1 \ +// RUN: | %FileCheck %s --check-prefix NUMITERS1 +// NUMITERS1: AngryPhonebook,1 +// NUMITERS1-NOT: 0,0,0,0,0 + +// Should run benchmark by name, even if its tags match the default skip-tags +// (unstable,skip). Ackermann is marked unstable +// RUN: %Benchmark_O Ackermann | %FileCheck %s --check-prefix NAMEDSKIP +// NAMEDSKIP: Ackermann + +// RUN: %Benchmark_O --list --tags=Dictionary,Array \ +// RUN: | %FileCheck %s --check-prefix ANDTAGS +// ANDTAGS: TwoSum +// ANDTAGS-NOT: Array2D +// ANDTAGS-NOT: DictionarySwap + +// RUN: %Benchmark_O --list --tags=algorithm --skip-tags=validation \ +// RUN: | %FileCheck %s --check-prefix TAGSANDSKIPTAGS +// TAGSANDSKIPTAGS: Ackermann +// TAGSANDSKIPTAGS: DictOfArraysToArrayOfDicts +// TAGSANDSKIPTAGS: Fibonacci +// TAGSANDSKIPTAGS: RomanNumbers + +// RUN: %Benchmark_O --list --tags=algorithm \ +// RUN: --skip-tags=validation,Dictionary,String \ +// RUN: | %FileCheck %s --check-prefix ORSKIPTAGS +// ORSKIPTAGS: Ackermann +// ORSKIPTAGS-NOT: DictOfArraysToArrayOfDicts +// ORSKIPTAGS: Fibonacci +// ORSKIPTAGS-NOT: RomanNumbers + +// RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTPRECOMMIT +// LISTPRECOMMIT: #,Test,[Tags] +// LISTPRECOMMIT-NOT: Ackermann +// LISTPRECOMMIT: {{[0-9]+}},AngryPhonebook + +// RUN: %Benchmark_O --list --skip-tags= | %FileCheck %s --check-prefix LISTALL +// LISTALL: Ackermann +// LISTALL: AngryPhonebook diff --git a/validation-test/Python/benchmark-scripts.test-sh b/test/benchmark/benchmark-scripts.test-sh similarity index 100% rename from validation-test/Python/benchmark-scripts.test-sh rename to test/benchmark/benchmark-scripts.test-sh diff --git a/test/lit.cfg b/test/lit.cfg index 10a0f0dd2cf7f..46f5e7c56fd37 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -272,6 +272,8 @@ config.complete_test = inferSwiftBinary('complete-test') config.swift_api_digester = inferSwiftBinary('swift-api-digester') config.swift_refactor = inferSwiftBinary('swift-refactor') config.swift_demangle_yamldump = inferSwiftBinary('swift-demangle-yamldump') +config.benchmark_o = inferSwiftBinary('Benchmark_O') +config.benchmark_driver = inferSwiftBinary('Benchmark_Driver') config.swift_utils = make_path(config.swift_src_root, 'utils') config.line_directive = make_path(config.swift_utils, 'line-directive') @@ -366,6 +368,8 @@ config.substitutions.append( ('%swift-llvm-opt', config.swift_llvm_opt) ) config.substitutions.append( ('%llvm-dwarfdump', config.llvm_dwarfdump) ) config.substitutions.append( ('%llvm-dis', config.llvm_dis) ) config.substitutions.append( ('%swift-demangle-yamldump', config.swift_demangle_yamldump) ) +config.substitutions.append( ('%Benchmark_O', config.benchmark_o) ) +config.substitutions.append( ('%Benchmark_Driver', config.benchmark_driver) ) # This must come after all substitutions containing "%swift". config.substitutions.append( diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in index 52f935e1095a3..5d4017d092845 100644 --- a/test/lit.site.cfg.in +++ b/test/lit.site.cfg.in @@ -88,6 +88,9 @@ config.available_features.add("CMAKE_GENERATOR=@CMAKE_GENERATOR@") if "@SWIFT_ENABLE_SOURCEKIT_TESTS@" == "TRUE": config.available_features.add('sourcekit') +if "@SWIFT_BUILD_PERF_TESTSUITE@" == "TRUE": + config.available_features.add('benchmark') + if "@SWIFT_ENABLE_GUARANTEED_NORMAL_ARGUMENTS@" == "TRUE": config.available_features.add('plus_zero_runtime') else: