From b7eb476f948a645fbb6ef559c1ef8ba8afe6de38 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Sat, 7 Jul 2018 09:33:19 +0200
Subject: [PATCH 01/10] [benchmark] Moved tests for benchmark scripts

Moved `lit` test that runs unit tests for benchmark scripts from `validation-test/Python` to `test/benchmark` directory. Run the tests for benchmark infrustructure with single lit invocation:
````
swift-source$ ./llvm/utils/lit/lit.py -sv ${SWIFT_BUILD_DIR}/test-macosx-x86_64/benchmark
````

Documented the invocation of benchmark infrastructure tests in README.md
---
 benchmark/README.md                            | 18 ++++++++++++++++++
 .../benchmark}/benchmark-scripts.test-sh       |  0
 2 files changed, 18 insertions(+)
 rename {validation-test/Python => test/benchmark}/benchmark-scripts.test-sh (100%)

diff --git a/benchmark/README.md b/benchmark/README.md
index bb1722eafec49..4d1f70847fed0 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -186,3 +186,21 @@ public func run_YourTestName(N: Int) {
 
 The current set of tags are defined by the `BenchmarkCategory` enum in
 `TestsUtils.swift` .
+
+Testing the Benchmark Drivers
+-----------------------------
+When working on tests, after the initial build
+````
+swift-source$ ./swift/utils/build-script -R -B
+````
+you can rebuild just the benchmarks:
+````
+swift-source$ export SWIFT_BUILD_DIR=`pwd`/build/Ninja-ReleaseAssert/swift-macosx-x86_64
+swift-source$ ninja -C ${SWIFT_BUILD_DIR} swift-benchmark-macosx-x86_64
+````
+
+When modifying the testing infrastructure, you should verify that your changes
+pass all the tests:
+````
+swift-source$ ./llvm/utils/lit/lit.py -sv ${SWIFT_BUILD_DIR}/test-macosx-x86_64/benchmark
+````
diff --git a/validation-test/Python/benchmark-scripts.test-sh b/test/benchmark/benchmark-scripts.test-sh
similarity index 100%
rename from validation-test/Python/benchmark-scripts.test-sh
rename to test/benchmark/benchmark-scripts.test-sh

From 7438ffd0bb0fe6c13e9dc1382e2f64b9bb3a91c6 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Wed, 11 Jul 2018 23:16:57 +0200
Subject: [PATCH 02/10] [benchmark] Tests for Benchmark_O
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added `lit` substitution for running Benchmark_O binary.
Introduced `lit` tests for `Bechmark_O` to demonstrate bugs:
* listing benchmarks’ tags
* when running with `--num-iters=1`
---
 test/benchmark/Benchmark_O.test-sh | 15 +++++++++++++++
 test/lit.cfg                       |  2 ++
 test/lit.site.cfg.in               |  3 +++
 3 files changed, 20 insertions(+)
 create mode 100644 test/benchmark/Benchmark_O.test-sh

diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh
new file mode 100644
index 0000000000000..62a896da66385
--- /dev/null
+++ b/test/benchmark/Benchmark_O.test-sh
@@ -0,0 +1,15 @@
+// REQUIRES: OS=macosx
+// REQUIRES: asserts
+// REQUIRES: benchmark
+// REQUIRES: CMAKE_GENERATOR=Ninja
+
+// RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTTAGS
+// LISTTAGS: AngryPhonebook,[
+// LISTTAGS-SAME: TestsUtils.BenchmarkCategory.
+// LISTTAGS-SAME: String
+// LISTTAGS-SAME: ]
+
+// RUN: %Benchmark_O AngryPhonebook --num-iters=1 \
+// RUN:   | %FileCheck %s --check-prefix NUMITERS1
+// NUMITERS1: AngryPhonebook,1
+// NUMITERS1-SAME: 0,0,0,0,0
diff --git a/test/lit.cfg b/test/lit.cfg
index 10a0f0dd2cf7f..830996f48cd93 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -272,6 +272,7 @@ config.complete_test = inferSwiftBinary('complete-test')
 config.swift_api_digester = inferSwiftBinary('swift-api-digester')
 config.swift_refactor = inferSwiftBinary('swift-refactor')
 config.swift_demangle_yamldump = inferSwiftBinary('swift-demangle-yamldump')
+config.benchmark_o = inferSwiftBinary('Benchmark_O')
 
 config.swift_utils = make_path(config.swift_src_root, 'utils')
 config.line_directive = make_path(config.swift_utils, 'line-directive')
@@ -366,6 +367,7 @@ config.substitutions.append( ('%swift-llvm-opt', config.swift_llvm_opt) )
 config.substitutions.append( ('%llvm-dwarfdump', config.llvm_dwarfdump) )
 config.substitutions.append( ('%llvm-dis', config.llvm_dis) )
 config.substitutions.append( ('%swift-demangle-yamldump', config.swift_demangle_yamldump) )
+config.substitutions.append( ('%Benchmark_O', config.benchmark_o) )
 
 # This must come after all substitutions containing "%swift".
 config.substitutions.append(
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 52f935e1095a3..5d4017d092845 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -88,6 +88,9 @@ config.available_features.add("CMAKE_GENERATOR=@CMAKE_GENERATOR@")
 if "@SWIFT_ENABLE_SOURCEKIT_TESTS@" == "TRUE":
     config.available_features.add('sourcekit')
 
+if "@SWIFT_BUILD_PERF_TESTSUITE@" == "TRUE":
+    config.available_features.add('benchmark')
+
 if "@SWIFT_ENABLE_GUARANTEED_NORMAL_ARGUMENTS@" == "TRUE":
    config.available_features.add('plus_zero_runtime')
 else:

From 4c4c6a24090a59033ca1a37caa47a67717340b3d Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Sat, 7 Jul 2018 04:38:37 +0200
Subject: [PATCH 03/10] [benchmark] Fix: Better tags in benchmark list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When listing benchmarks with `--list` parameter, present the tags in format that is actually accepted by the `--tags` and `--skip-tags` parameters.

Changes the `--list` output from
````
Enabled Tests,Tags
AngryPhonebook,[TestsUtils.BenchmarkCategory.validation, TestsUtils.BenchmarkCategory.api, TestsUtils.BenchmarkCategory.String]
...
````
into
````
Enabled Tests,Tags
AngryPhonebook,[String, api, validation]
…
````
---
 benchmark/utils/DriverUtils.swift  |  6 +++---
 benchmark/utils/TestsUtils.swift   | 12 ++++++++++++
 test/benchmark/Benchmark_O.test-sh |  4 ++--
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift
index bf7b4d56a5653..bdb7755b83eda 100644
--- a/benchmark/utils/DriverUtils.swift
+++ b/benchmark/utils/DriverUtils.swift
@@ -69,7 +69,7 @@ struct Test {
 
   /// The benchmark categories that this test belongs to. Used for filtering.
   var tags: [BenchmarkCategory] {
-    return benchInfo.tags
+    return benchInfo.tags.sorted()
   }
 
   /// An optional initialization function for a benchmark that is run before
@@ -181,7 +181,7 @@ struct TestConfig {
 
       // We support specifying multiple tags by splitting on comma, i.e.:
       //
-      //  --tags=array,set
+      //  --tags=Array,Dictionary
       //
       // FIXME: If we used Error instead of .fail, then we could have a cleaner
       // impl here using map on x and tags.formUnion.
@@ -200,7 +200,7 @@ struct TestConfig {
 
       // We support specifying multiple tags by splitting on comma, i.e.:
       //
-      //  --skip-tags=array,set
+      //  --skip-tags=Array,Set,unstable,skip
       //
       // FIXME: If we used Error instead of .fail, then we could have a cleaner
       // impl here using map on x and tags.formUnion.
diff --git a/benchmark/utils/TestsUtils.swift b/benchmark/utils/TestsUtils.swift
index 594375c67a6b9..7a876b3d4187e 100644
--- a/benchmark/utils/TestsUtils.swift
+++ b/benchmark/utils/TestsUtils.swift
@@ -70,6 +70,18 @@ public enum BenchmarkCategory : String {
   case skip
 }
 
+extension BenchmarkCategory : CustomStringConvertible {
+  public var description: String {
+    return self.rawValue
+  }
+}
+
+extension BenchmarkCategory : Comparable {
+    public static func < (lhs: BenchmarkCategory, rhs: BenchmarkCategory) -> Bool {
+        return lhs.rawValue < rhs.rawValue
+    }
+}
+
 public struct BenchmarkPlatformSet : OptionSet {
   public let rawValue: Int
 
diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh
index 62a896da66385..2e5f4bad98b90 100644
--- a/test/benchmark/Benchmark_O.test-sh
+++ b/test/benchmark/Benchmark_O.test-sh
@@ -5,8 +5,8 @@
 
 // RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTTAGS
 // LISTTAGS: AngryPhonebook,[
-// LISTTAGS-SAME: TestsUtils.BenchmarkCategory.
-// LISTTAGS-SAME: String
+// LISTTAGS-NOT: TestsUtils.BenchmarkCategory.
+// LISTTAGS-SAME: String,
 // LISTTAGS-SAME: ]
 
 // RUN: %Benchmark_O AngryPhonebook --num-iters=1 \

From 8b03980cb0a48f0c8bf5870286484a1a03cf4c14 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Wed, 11 Jul 2018 22:23:32 +0200
Subject: [PATCH 04/10] [benchmark] DRY: Call setup and teardown only once

---
 benchmark/utils/DriverUtils.swift | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift
index bdb7755b83eda..b220bc07817da 100644
--- a/benchmark/utils/DriverUtils.swift
+++ b/benchmark/utils/DriverUtils.swift
@@ -382,14 +382,13 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? {
 
   let sampler = SampleRunner()
   for s in 0..<c.numSamples {
+    test.setUpFunction?()
     let time_per_sample: UInt64 = 1_000_000_000 * UInt64(c.iterationScale)
 
     var scale : UInt
     var elapsed_time : UInt64 = 0
     if c.fixedNumIters == 0 {
-      test.setUpFunction?()
       elapsed_time = sampler.run(test.name, fn: testFn, num_iters: 1)
-      test.tearDownFunction?()
 
       if elapsed_time > 0 {
         scale = UInt(time_per_sample / elapsed_time)
@@ -413,9 +412,7 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? {
       if c.verbose {
         print("    Measuring with scale \(scale).")
       }
-      test.setUpFunction?()
       elapsed_time = sampler.run(test.name, fn: testFn, num_iters: scale)
-      test.tearDownFunction?()
     } else {
       scale = 1
     }
@@ -424,6 +421,7 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? {
     if c.verbose {
       print("    Sample \(s),\(samples[s])")
     }
+    test.tearDownFunction?()
   }
 
   let (mean, sd) = internalMeanSD(samples)

From d40ddabcd5c28140e6ee6bd474a33195c00b24f7 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Sat, 7 Jul 2018 01:47:54 +0200
Subject: [PATCH 05/10] [benchmark] Fix: running with `--num-iters=1`

Fixed bug where the `elapsed_time` was always 0 when `--num-iters=1` was specified.
---
 benchmark/utils/DriverUtils.swift  | 3 +++
 test/benchmark/Benchmark_O.test-sh | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift
index b220bc07817da..017a7580dabea 100644
--- a/benchmark/utils/DriverUtils.swift
+++ b/benchmark/utils/DriverUtils.swift
@@ -401,6 +401,9 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? {
     } else {
       // Compute the scaling factor if a fixed c.fixedNumIters is not specified.
       scale = c.fixedNumIters
+      if scale == 1 {
+        elapsed_time = sampler.run(test.name, fn: testFn, num_iters: 1)
+      }
     }
     // Make integer overflow less likely on platforms where Int is 32 bits wide.
     // FIXME: Switch BenchmarkInfo to use Int64 for the iteration scale, or fix
diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh
index 2e5f4bad98b90..2de4127e08732 100644
--- a/test/benchmark/Benchmark_O.test-sh
+++ b/test/benchmark/Benchmark_O.test-sh
@@ -12,4 +12,4 @@
 // RUN: %Benchmark_O AngryPhonebook --num-iters=1 \
 // RUN:   | %FileCheck %s --check-prefix NUMITERS1
 // NUMITERS1: AngryPhonebook,1
-// NUMITERS1-SAME: 0,0,0,0,0
+// NUMITERS1-NOT: 0,0,0,0,0

From cf1d78be6b95827f238d578290fcf6d476f504fa Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Sat, 7 Jul 2018 05:15:31 +0200
Subject: [PATCH 06/10] [benchmark] Test running skip-tag-marked benchmark

Added test: It should be possible to run benchmark by name, even if its tags match the default skip-tags.

Added verification tests for benchmark filtering with `tags` and `skip-tags` parameters.
---
 test/benchmark/Benchmark_O.test-sh | 31 +++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh
index 2de4127e08732..ea9daeaadfda0 100644
--- a/test/benchmark/Benchmark_O.test-sh
+++ b/test/benchmark/Benchmark_O.test-sh
@@ -10,6 +10,35 @@
 // LISTTAGS-SAME: ]
 
 // RUN: %Benchmark_O AngryPhonebook --num-iters=1 \
-// RUN:   | %FileCheck %s --check-prefix NUMITERS1
+// RUN:             | %FileCheck %s --check-prefix NUMITERS1
 // NUMITERS1: AngryPhonebook,1
 // NUMITERS1-NOT: 0,0,0,0,0
+
+// Should run benchmark by name, even if its tags match the default skip-tags
+// (unstable,skip). Ackermann is marked unstable
+// RUN: %Benchmark_O Ackermann | %FileCheck %s --check-prefix NAMEDSKIP
+// NAMEDSKIP-NOT: Ackermann
+// RUN: %Benchmark_O Ackermann --skip-tags=  \
+// RUN:             | %FileCheck %s --check-prefix NAMEDSKIPWORKAROUND
+// NAMEDSKIPWORKAROUND: Ackermann
+
+// RUN: %Benchmark_O --list --tags=Dictionary,Array \
+// RUN:             | %FileCheck %s --check-prefix ANDTAGS
+// ANDTAGS: TwoSum
+// ANDTAGS-NOT: Array2D
+// ANDTAGS-NOT: DictionarySwap
+
+// RUN: %Benchmark_O --list --tags=algorithm --skip-tags=validation \
+// RUN:             | %FileCheck %s --check-prefix TAGSANDSKIPTAGS
+// TAGSANDSKIPTAGS: Ackermann
+// TAGSANDSKIPTAGS: DictOfArraysToArrayOfDicts
+// TAGSANDSKIPTAGS: Fibonacci
+// TAGSANDSKIPTAGS: RomanNumbers
+
+// RUN: %Benchmark_O  --list --tags=algorithm \
+// RUN:               --skip-tags=validation,Dictionary,String \
+// RUN:             | %FileCheck %s --check-prefix ORSKIPTAGS
+// ORSKIPTAGS: Ackermann
+// ORSKIPTAGS-NOT: DictOfArraysToArrayOfDicts
+// ORSKIPTAGS: Fibonacci
+// ORSKIPTAGS-NOT: RomanNumbers

From 2d004970fd6a63e9a4df4f533673d739ebedb6ee Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Mon, 9 Jul 2018 17:41:27 +0200
Subject: [PATCH 07/10] [benchmark] Fix: Running skip-tag-marked benchmark

Also updated benchmark documentation with more detailed description of tag handling.
---
 benchmark/README.md                | 13 ++++++++-
 benchmark/utils/DriverUtils.swift  | 44 ++++++++----------------------
 benchmark/utils/TestsUtils.swift   |  4 +--
 test/benchmark/Benchmark_O.test-sh |  5 +---
 4 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/benchmark/README.md b/benchmark/README.md
index 4d1f70847fed0..9f7400ef10154 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -86,13 +86,24 @@ Using the Benchmark Driver
 * `--num-samples`
     * Control the number of samples to take for each test
 * `--list`
-    * Print a list of available tests
+    * Print a list of available tests matching specified criteria
+* `--tags`
+    * Run tests that are labeled with specified [tags](https://github.com/apple/swift/blob/master/benchmark/utils/TestsUtils.swift#L19)
+     (comma separated list); multiple tags are interpreted as logical AND, i.e.
+     run only test that are labeled with all the supplied tags
+* `--skip-tags`
+    * Don't run tests that are labeled with any of the specified tags (comma
+      separated list); default value: `skip,unstable`; to get complete list of
+      tests, specify empty `--skip-tags=`
+
 
 ### Examples
 
 * `$ ./Benchmark_O --num-iters=1 --num-samples=1`
 * `$ ./Benchmark_Onone --list`
 * `$ ./Benchmark_Osize Ackermann`
+* `$ ./Benchmark_O --tags=Dictionary`
+* `$ ./Benchmark_O --skip-tags=unstable,skip,validation`
 
 ### Note
 As a shortcut, you can also refer to benchmarks by their ordinal numbers.
diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift
index 017a7580dabea..cc1a468ee4290 100644
--- a/benchmark/utils/DriverUtils.swift
+++ b/benchmark/utils/DriverUtils.swift
@@ -227,39 +227,19 @@ struct TestConfig {
   }
 
   mutating func findTestsToRun() {
-    let benchmarkNameFilter = Set(filters)
-
-    // t is needed so we don't capture an ivar of a mutable inout self.
-    let t = tags
-    let st = skipTags
-    let filteredTests = Array(registeredBenchmarks.filter { benchInfo in
-      if !t.isSubset(of: benchInfo.tags) {
-        return false
-      }
-
-      if !st.isDisjoint(with: benchInfo.tags) {
-        return false
-      }
-
-      // If the user did not specified a benchmark name filter and our tags are
-      // a subset of the specified tags by the user, return true. We want to run
-      // this test.
-      if benchmarkNameFilter.isEmpty {
-        return true
+    registeredBenchmarks.sort()
+    let benchmarkNames = Set(filters)
+    // needed so we don't capture an ivar of a mutable inout self.
+    let (_tags, _skipTags) = (tags, skipTags)
+
+    tests = registeredBenchmarks.filter { benchmark in
+      if benchmarkNames.isEmpty {
+        return benchmark.tags.isSuperset(of: _tags) &&
+          benchmark.tags.isDisjoint(with: _skipTags)
+      } else {
+        return benchmarkNames.contains(benchmark.name)
       }
-
-      // Otherwise, we need to check if our benchInfo's name is in the benchmark
-      // name filter list. If it isn't, then we shouldn't process it.
-      return benchmarkNameFilter.contains(benchInfo.name)
-    }).sorted()
-
-    if (filteredTests.isEmpty) {
-      return
-    }
-
-    tests = filteredTests.enumerated().map {
-      Test(benchInfo: $0.element, index: $0.offset + 1)
-    }
+    }.enumerated().map { Test(benchInfo: $0.element, index: $0.offset + 1) }
   }
 }
 
diff --git a/benchmark/utils/TestsUtils.swift b/benchmark/utils/TestsUtils.swift
index 7a876b3d4187e..1330853443eaa 100644
--- a/benchmark/utils/TestsUtils.swift
+++ b/benchmark/utils/TestsUtils.swift
@@ -123,7 +123,7 @@ public struct BenchmarkInfo {
   /// A set of category tags that describe this benchmark. This is used by the
   /// harness to allow for easy slicing of the set of benchmarks along tag
   /// boundaries, e.x.: run all string benchmarks or ref count benchmarks, etc.
-  public var tags: [BenchmarkCategory]
+  public var tags: Set<BenchmarkCategory>
 
   /// The platforms that this benchmark supports. This is an OptionSet.
   private var unsupportedPlatforms: BenchmarkPlatformSet
@@ -158,7 +158,7 @@ public struct BenchmarkInfo {
               unsupportedPlatforms: BenchmarkPlatformSet = []) {
     self.name = name
     self._runFunction = runFunction
-    self.tags = tags
+    self.tags = Set(tags)
     self._setUpFunction = setUpFunction
     self._tearDownFunction = tearDownFunction
     self.unsupportedPlatforms = unsupportedPlatforms
diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh
index ea9daeaadfda0..551cd156a3750 100644
--- a/test/benchmark/Benchmark_O.test-sh
+++ b/test/benchmark/Benchmark_O.test-sh
@@ -17,10 +17,7 @@
 // Should run benchmark by name, even if its tags match the default skip-tags
 // (unstable,skip). Ackermann is marked unstable
 // RUN: %Benchmark_O Ackermann | %FileCheck %s --check-prefix NAMEDSKIP
-// NAMEDSKIP-NOT: Ackermann
-// RUN: %Benchmark_O Ackermann --skip-tags=  \
-// RUN:             | %FileCheck %s --check-prefix NAMEDSKIPWORKAROUND
-// NAMEDSKIPWORKAROUND: Ackermann
+// NAMEDSKIP: Ackermann
 
 // RUN: %Benchmark_O --list --tags=Dictionary,Array \
 // RUN:             | %FileCheck %s --check-prefix ANDTAGS

From d82c99666996837bfe9412b3a6541bfb3ef680e9 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Mon, 9 Jul 2018 21:19:09 +0200
Subject: [PATCH 08/10] [benchmark] Fixed Benchmark_Driver running tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixed failure in `get_tests` which depended on the removed `Benchmark_O --run-all` option for listing all test (not just the pre-commit set).

Fix: Restored the ability to run tests by ordinal number from `Benchmark_Driver` after the support for this was removed from `Benchmark_O`.

Added tests that verify output format of `Benchmark_O --list` and the support for `--skip-tags= ` option which effectively replaced the old `--run-all` option. Other tools, like `Benchmark_Driver` depend on it.
Added integration tests for the dependency between `Benchmark_Driver` and `Benchmark_O`.

Running pre-commit test set isn’t tested explicitly here. It would take too long and it is run fairly frequently by CI bots, so if that breaks, we’ll know soon enough.
---
 benchmark/scripts/Benchmark_Driver      | 32 ++++++++++++++-----------
 test/benchmark/Benchmark_Driver.test-sh | 22 +++++++++++++++++
 test/benchmark/Benchmark_O.test-sh      |  9 +++++++
 test/lit.cfg                            |  2 ++
 4 files changed, 51 insertions(+), 14 deletions(-)
 create mode 100644 test/benchmark/Benchmark_Driver.test-sh

diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver
index cc918aec19e68..987691b471e18 100755
--- a/benchmark/scripts/Benchmark_Driver
+++ b/benchmark/scripts/Benchmark_Driver
@@ -118,28 +118,32 @@ def instrument_test(driver_path, test, num_samples):
     return avg_test_output
 
 
-BENCHMARK_OUTPUT_RE = re.compile('([^,]+),')
-
-
 def get_tests(driver_path, args):
     """Return a list of available performance tests"""
     driver = ([driver_path, '--list'])
+    # Use tab delimiter for easier parsing to override the default comma.
+    # (The third 'column' is always comma-separated list of tags in square
+    # brackets -- currently unused here.)
+    driver.append('--delim=\t')
     if args.benchmarks or args.filters:
-        driver.append('--run-all')
-    tests = []
-    for l in subprocess.check_output(driver).split("\n")[1:]:
-        m = BENCHMARK_OUTPUT_RE.match(l)
-        if m is None:
-            continue
-        tests.append(m.group(1))
+        driver.append('--skip-tags=')  # list all tests, don't skip any tags
+    names = [
+        line.split('\t')[0] for line in
+        subprocess.check_output(driver).split('\n')[1:-1]
+    ]
     if args.filters:
         regexes = [re.compile(pattern) for pattern in args.filters]
         return sorted(list(set([name for pattern in regexes
-                                for name in tests if pattern.match(name)])))
+                                for name in names if pattern.match(name)])))
     if not args.benchmarks:
-        return tests
-    tests.extend(map(str, range(1, len(tests) + 1)))  # ordinal numbers
-    return sorted(list(set(tests).intersection(set(args.benchmarks))))
+        return names
+    benchmarks = set(args.benchmarks)
+    indices = [str(i) for i in range(1, len(names) + 1)]
+    index_to_name = dict(zip(indices, names))
+    indexed_names = [index_to_name[i]
+                     for i in benchmarks.intersection(set(indices))]
+    return sorted(list(
+        benchmarks.intersection(set(names)).union(indexed_names)))
 
 
 def get_current_git_branch(git_repo_path):
diff --git a/test/benchmark/Benchmark_Driver.test-sh b/test/benchmark/Benchmark_Driver.test-sh
new file mode 100644
index 0000000000000..dec24a369da2b
--- /dev/null
+++ b/test/benchmark/Benchmark_Driver.test-sh
@@ -0,0 +1,22 @@
+// REQUIRES: OS=macosx
+// REQUIRES: asserts
+// REQUIRES: benchmark
+// REQUIRES: CMAKE_GENERATOR=Ninja
+
+// Integration tests between Benchmark_Driver and Benchmark_O
+// TODO: Keep the "run just once" check and move the rest into unit tests for
+//       Benchmark_Driver, as they are redundant and unnecessarily slow.
+
+// RUN: %Benchmark_Driver run Ackermann | %FileCheck %s --check-prefix RUNNAMED
+// RUNNAMED: Ackermann
+
+// RUN: %Benchmark_Driver run 1 | %FileCheck %s --check-prefix RUNBYNUMBER
+// RUNBYNUMBER: Ackermann
+
+// RUN: %Benchmark_Driver run 1 Ackermann 1 \
+// RUN:                 | %FileCheck %s --check-prefix RUNJUSTONCE
+// RUNJUSTONCE-LABEL: Ackermann
+// RUNJUSTONCE-NOT: Ackermann
+
+// RUN: %Benchmark_Driver run -f Acker | %FileCheck %s --check-prefix RUNFILTER
+// RUNFILTER: Ackermann
diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh
index 551cd156a3750..b6f6624dc21eb 100644
--- a/test/benchmark/Benchmark_O.test-sh
+++ b/test/benchmark/Benchmark_O.test-sh
@@ -39,3 +39,12 @@
 // ORSKIPTAGS-NOT: DictOfArraysToArrayOfDicts
 // ORSKIPTAGS: Fibonacci
 // ORSKIPTAGS-NOT: RomanNumbers
+
+// RUN: %Benchmark_O  --list | %FileCheck %s --check-prefix LISTPRECOMMIT
+// LISTPRECOMMIT: Enabled Tests,Tags
+// LISTPRECOMMIT-NOT: Ackermann
+// LISTPRECOMMIT: AngryPhonebook
+
+// RUN: %Benchmark_O  --list --skip-tags= | %FileCheck %s --check-prefix LISTALL
+// LISTALL: Ackermann
+// LISTALL: AngryPhonebook
diff --git a/test/lit.cfg b/test/lit.cfg
index 830996f48cd93..46f5e7c56fd37 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -273,6 +273,7 @@ config.swift_api_digester = inferSwiftBinary('swift-api-digester')
 config.swift_refactor = inferSwiftBinary('swift-refactor')
 config.swift_demangle_yamldump = inferSwiftBinary('swift-demangle-yamldump')
 config.benchmark_o = inferSwiftBinary('Benchmark_O')
+config.benchmark_driver = inferSwiftBinary('Benchmark_Driver')
 
 config.swift_utils = make_path(config.swift_src_root, 'utils')
 config.line_directive = make_path(config.swift_utils, 'line-directive')
@@ -368,6 +369,7 @@ config.substitutions.append( ('%llvm-dwarfdump', config.llvm_dwarfdump) )
 config.substitutions.append( ('%llvm-dis', config.llvm_dis) )
 config.substitutions.append( ('%swift-demangle-yamldump', config.swift_demangle_yamldump) )
 config.substitutions.append( ('%Benchmark_O', config.benchmark_o) )
+config.substitutions.append( ('%Benchmark_Driver', config.benchmark_driver) )
 
 # This must come after all substitutions containing "%swift".
 config.substitutions.append(

From 7f894268b2548f040c5322d6913961eb87265a93 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Mon, 9 Jul 2018 23:58:05 +0200
Subject: [PATCH 09/10] [benchmark] Restore running benchmarks by numbers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reintroduced feature lost during `BenchmarkInfo` modernization: All registered benchmarks are ordered alphabetically and assigned an index. This number can be used as a shortcut to invoke the test instead of its full name. (Adding and removing tests from the suite will naturally reassign the indices, but they are stable for a given build.)

The `--list` parameter now prints the test *number*, *name* and *tags* separated by delimiter.

The `--list` output format is modified from:
````
Enabled Tests,Tags
AngryPhonebook,[String, api, validation]
...
````
to this:
````
\#,Test,[Tags]
2,AngryPhonebook,[String, api, validation]
…
````
(There isn’t a backslash before the #, git was eating the whole line without it.)
Note: Test number 1 is Ackermann, which is marked as “skip”, so it’s not listed with the default `skip-tags` value.

Fixes the issue where running tests via `Benchmark_Driver` always reported each test as number 1. Each test is run independently, therefore every invocation was “first”. Restoring test numbers resolves this issue back to original state: The number reported in the first column when executing the tests is its ordinal number in the Swift Benchmark Suite.
---
 benchmark/scripts/Benchmark_Driver |  8 ++++----
 benchmark/utils/DriverUtils.swift  | 15 +++++++++------
 test/benchmark/Benchmark_O.test-sh |  4 ++--
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver
index 987691b471e18..92f12c627855a 100755
--- a/benchmark/scripts/Benchmark_Driver
+++ b/benchmark/scripts/Benchmark_Driver
@@ -127,10 +127,11 @@ def get_tests(driver_path, args):
     driver.append('--delim=\t')
     if args.benchmarks or args.filters:
         driver.append('--skip-tags=')  # list all tests, don't skip any tags
-    names = [
-        line.split('\t')[0] for line in
+    index_name_pairs = [
+        line.split('\t')[:2] for line in
         subprocess.check_output(driver).split('\n')[1:-1]
     ]
+    indices, names = zip(*index_name_pairs)  # unzip list of pairs into 2 lists
     if args.filters:
         regexes = [re.compile(pattern) for pattern in args.filters]
         return sorted(list(set([name for pattern in regexes
@@ -138,8 +139,7 @@ def get_tests(driver_path, args):
     if not args.benchmarks:
         return names
     benchmarks = set(args.benchmarks)
-    indices = [str(i) for i in range(1, len(names) + 1)]
-    index_to_name = dict(zip(indices, names))
+    index_to_name = dict(index_name_pairs)
     indexed_names = [index_to_name[i]
                      for i in benchmarks.intersection(set(indices))]
     return sorted(list(
diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift
index cc1a468ee4290..5675c767ecd9b 100644
--- a/benchmark/utils/DriverUtils.swift
+++ b/benchmark/utils/DriverUtils.swift
@@ -228,18 +228,21 @@ struct TestConfig {
 
   mutating func findTestsToRun() {
     registeredBenchmarks.sort()
-    let benchmarkNames = Set(filters)
+    let indices = Dictionary(uniqueKeysWithValues:
+      zip(registeredBenchmarks.map{$0.name}, 1...))
+    let benchmarkNamesOrIndices = Set(filters)
     // needed so we don't capture an ivar of a mutable inout self.
     let (_tags, _skipTags) = (tags, skipTags)
 
     tests = registeredBenchmarks.filter { benchmark in
-      if benchmarkNames.isEmpty {
+      if benchmarkNamesOrIndices.isEmpty {
         return benchmark.tags.isSuperset(of: _tags) &&
           benchmark.tags.isDisjoint(with: _skipTags)
       } else {
-        return benchmarkNames.contains(benchmark.name)
+        return benchmarkNamesOrIndices.contains(benchmark.name) ||
+          benchmarkNamesOrIndices.contains(String(indices[benchmark.name]!))
       }
-    }.enumerated().map { Test(benchInfo: $0.element, index: $0.offset + 1) }
+    }.map { Test(benchInfo: $0, index: indices[$0.name]!) }
   }
 }
 
@@ -478,9 +481,9 @@ public func main() {
       fatalError("\(msg)")
     case .listTests:
       config.findTestsToRun()
-      print("Enabled Tests\(config.delim)Tags")
+      print("#\(config.delim)Test\(config.delim)[Tags]")
       for t in config.tests {
-        print("\(t.name)\(config.delim)\(t.tags)")
+        print("\(t.index)\(config.delim)\(t.name)\(config.delim)\(t.tags)")
       }
     case .run:
       config.findTestsToRun()
diff --git a/test/benchmark/Benchmark_O.test-sh b/test/benchmark/Benchmark_O.test-sh
index b6f6624dc21eb..7e991e9240c2d 100644
--- a/test/benchmark/Benchmark_O.test-sh
+++ b/test/benchmark/Benchmark_O.test-sh
@@ -41,9 +41,9 @@
 // ORSKIPTAGS-NOT: RomanNumbers
 
 // RUN: %Benchmark_O  --list | %FileCheck %s --check-prefix LISTPRECOMMIT
-// LISTPRECOMMIT: Enabled Tests,Tags
+// LISTPRECOMMIT: #,Test,[Tags]
 // LISTPRECOMMIT-NOT: Ackermann
-// LISTPRECOMMIT: AngryPhonebook
+// LISTPRECOMMIT: {{[0-9]+}},AngryPhonebook
 
 // RUN: %Benchmark_O  --list --skip-tags= | %FileCheck %s --check-prefix LISTALL
 // LISTALL: Ackermann

From c288f7553e0ae9ef380ad598d49e387edc17cdcb Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Mon, 9 Jul 2018 22:17:24 +0200
Subject: [PATCH 10/10] [benchmark] Updated docs with info on test numbers

* Description of listing and running tests by ordinal numbers
---
 benchmark/README.md | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/benchmark/README.md b/benchmark/README.md
index 9f7400ef10154..6d1e436401ed3 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -107,12 +107,17 @@ Using the Benchmark Driver
 
 ### Note
 As a shortcut, you can also refer to benchmarks by their ordinal numbers.
-The regular `--list` option does not provide these, but you can run:
-* `$ ./Benchmark_O --list --run-all | tail -n +2 | nl`
-You can use ordinal numbers instead of test names like this:
+These are printed out together with benchmark names and tags using the
+`--list` parameter. For a complete list of all available performance tests run
+* `$ ./Benchmark_O --list --skip-tags=`
+
+You can use test numbers instead of test names like this:
 * `$ ./Benchmark_O 1 42`
 * `$ ./Benchmark_Driver run 1 42`
 
+Test numbers are not stable in the long run, adding and removing tests from the
+benchmark suite will reorder them, but they are stable for a given build.
+
 Using the Harness Generator
 ---------------------------