feat(minor): Support thresholds for absolute checks (#223)

For benchmarks that are not completely stable (in e.g. syscall/malloc count) due to use of e.g. async or networking, it is desirable to also be able to specify some leeway for benchmarks even for the absolute checks from thresholds. Co-authored-by: dimlio <122263440+dimlio@users.noreply.github.com>
ordo-one · Jan 11, 2024 · 3bb4e7d · 3bb4e7d
1 parent 02b4da9
commit 3bb4e7d
Show file tree

Hide file tree

Showing 17 changed files with 253 additions and 229 deletions.
diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift b/Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift
@@ -63,6 +63,12 @@ struct BenchmarkIdentifier: Codable, Hashable {
     }
 }
 
+extension Benchmark {
+    var benchmarkIdentifier: BenchmarkIdentifier {
+        .init(target: self.target, name: self.name)
+    }
+}
+
 public extension Sequence where Iterator.Element: Hashable {
     func unique() -> [Iterator.Element] {
         var seen: Set<Iterator.Element> = []
@@ -386,19 +392,25 @@ extension BenchmarkTool {
 }
 
 extension BenchmarkBaseline {
-    func thresholdsForBenchmarks(_ benchmarks: [Benchmark], name: String, target: String, metric: BenchmarkMetric) -> BenchmarkThresholds {
+    func thresholdsForBenchmarks(
+        _ benchmarks: [Benchmark],
+        name: String,
+        target: String,
+        metric: BenchmarkMetric,
+        defaultThresholds: BenchmarkThresholds = BenchmarkThresholds.default
+    ) -> BenchmarkThresholds {
         let benchmark = benchmarks.filter { $0.name == name && $0.target == target }.first
 
         guard let benchmark else {
-            return BenchmarkThresholds.default
+            return defaultThresholds
         }
 
         guard let thresholds = benchmark.configuration.thresholds else {
-            return BenchmarkThresholds.default
+            return defaultThresholds
         }
 
         guard let threshold = thresholds[metric] else {
-            return BenchmarkThresholds.default
+            return defaultThresholds
         }
 
         return threshold
@@ -445,20 +457,28 @@ extension BenchmarkBaseline: Equatable {
         return allDeviationResults
     }
 
-    public func failsAbsoluteThresholdChecks(benchmarks: [Benchmark]) -> BenchmarkResult.ThresholdDeviations {
+    public func failsAbsoluteThresholdChecks(benchmarks: [Benchmark],
+                                             p90Thresholds: [BenchmarkIdentifier : 
+                                                                [BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold]]) -> BenchmarkResult.ThresholdDeviations {
         var allDeviationResults = BenchmarkResult.ThresholdDeviations()
 
         for (lhsBenchmarkIdentifier, lhsBenchmarkResults) in results {
             for lhsBenchmarkResult in lhsBenchmarkResults {
                 let thresholds = thresholdsForBenchmarks(benchmarks,
                                                          name: lhsBenchmarkIdentifier.name,
                                                          target: lhsBenchmarkIdentifier.target,
-                                                         metric: lhsBenchmarkResult.metric)
-
-                let deviationResults = lhsBenchmarkResult.deviationsAgainstAbsoluteThresholds(thresholds,
-                                                                                              name: lhsBenchmarkIdentifier.name,
-                                                                                              target: lhsBenchmarkIdentifier.target)
-                allDeviationResults.append(deviationResults)
+                                                         metric: lhsBenchmarkResult.metric,
+                                                         defaultThresholds: BenchmarkThresholds.strict)
+
+                if let p90Thresholds = p90Thresholds[lhsBenchmarkIdentifier] {
+                    if let p90Thresholds = p90Thresholds[lhsBenchmarkResult.metric] {
+                        let deviationResults = lhsBenchmarkResult.deviationsAgainstAbsoluteThresholds(thresholds: thresholds,
+                                                                                                      p90Threshold: p90Thresholds,
+                                                                                                      name: lhsBenchmarkIdentifier.name,
+                                                                                                      target: lhsBenchmarkIdentifier.target)
+                        allDeviationResults.append(deviationResults)
+                    }
+                }
             }
         }
 

diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift b/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift
@@ -165,27 +165,18 @@ extension BenchmarkTool {
                         }
                     }
 
+                    var p90Thresholds: [BenchmarkIdentifier : [BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold]] = [:]
+
                     if let benchmarkPath = checkAbsolutePath { // load statically defined thresholds for .p90
-                        var thresholdsFound = false
                         benchmarks.forEach { benchmark in
-                            let thresholds = BenchmarkTool.makeBenchmarkThresholds(path: benchmarkPath,
-                                                                                   moduleName: benchmark.target,
-                                                                                   benchmarkName: benchmark.name)
-                            var transformed: [BenchmarkMetric: BenchmarkThresholds] = [:]
-                            if let thresholds {
-                                thresholdsFound = true
-                                thresholds.forEach { key, value in
-                                    if let metric = BenchmarkMetric(argument: key) {
-                                        let absoluteThreshold: BenchmarkThresholds.AbsoluteThresholds = [.p90: value]
-                                        transformed[metric] = BenchmarkThresholds(absolute: absoluteThreshold)
-                                    }
-                                }
-                                if transformed.isEmpty == false {
-                                    benchmark.configuration.thresholds = transformed
-                                }
+                            if let thresholds = BenchmarkTool.makeBenchmarkThresholds(
+                                path: benchmarkPath,
+                                benchmarkIdentifier: benchmark.benchmarkIdentifier) {
+                                p90Thresholds[benchmark.benchmarkIdentifier] = thresholds
                             }
                         }
-                        if !thresholdsFound {
+
+                        if p90Thresholds.isEmpty {
                             if benchmarks.count == 0 {
                                 failBenchmark("No benchmarks matching filter selection, failing threshold check.",
                                               exitCode: .thresholdRegression)
@@ -196,23 +187,31 @@ extension BenchmarkTool {
                     }
                     print("")
 
-                    let deviationResults = currentBaseline.failsAbsoluteThresholdChecks(benchmarks: benchmarks)
+                    let deviationResults = currentBaseline.failsAbsoluteThresholdChecks(benchmarks: benchmarks,
+                                                                                        p90Thresholds: p90Thresholds)
 
-                    if deviationResults.regressions.isEmpty {
-                        if deviationResults.improvements.isEmpty {
-                            print("Baseline '\(baselineName)' is EQUAL to the defined absolute baseline thresholds. (--check-absolute)")
+                    if deviationResults.regressions.isEmpty && deviationResults.improvements.isEmpty {
+                        print("Baseline '\(baselineName)' is EQUAL to the defined absolute baseline thresholds. (--check-absolute)")
+                    } else {
+                        if !deviationResults.regressions.isEmpty {
+                            prettyPrintDeviation(baselineName: "p90 threshold",
+                                                 comparingBaselineName: baselineName,
+                                                 deviationResults: deviationResults.regressions,
+                                                 deviationTitle: "Deviations worse than threshold")
+                        }
+                        if !deviationResults.improvements.isEmpty {
+                            prettyPrintDeviation(baselineName: "p90 threshold",
+                                                 comparingBaselineName: baselineName,
+                                                 deviationResults: deviationResults.improvements,
+                                                 deviationTitle: "Deviations better than threshold")
+                        }
+                        if !deviationResults.regressions.isEmpty {
+                            failBenchmark("New baseline '\(baselineName)' is WORSE than the defined absolute baseline thresholds. (--check-absolute)",
+                                          exitCode: .thresholdRegression)
                         } else {
-                            prettyPrintAbsoluteDeviation(baselineName: baselineName,
-                                                         deviationResults: deviationResults.improvements)
-
                             failBenchmark("New baseline '\(baselineName)' is BETTER than the defined absolute baseline thresholds. (--check-absolute)",
                                           exitCode: .thresholdImprovement)
                         }
-                    } else {
-                        prettyPrintAbsoluteDeviation(baselineName: baselineName,
-                                                     deviationResults: deviationResults.regressions)
-                        failBenchmark("New baseline '\(baselineName)' is WORSE than the defined absolute baseline thresholds. (--check-absolute)",
-                                      exitCode: .thresholdRegression)
                     }
                 } else {
                     guard benchmarkBaselines.count == 2 else {

diff --git a/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift b/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift
@@ -373,7 +373,8 @@ extension BenchmarkTool {
 
     func prettyPrintDeviation(baselineName: String,
                               comparingBaselineName: String,
-                              deviationResults: [BenchmarkResult.ThresholdDeviation]) {
+                              deviationResults: [BenchmarkResult.ThresholdDeviation],
+                              deviationTitle: String = "Threshold deviations") {
         guard quiet == false else { return }
 
         let metrics = deviationResults.map(\.metric).unique()
@@ -384,7 +385,7 @@ extension BenchmarkTool {
         namesAndTargets.forEach { nameAndTarget in
 
             printMarkdown("```")
-            "Threshold deviations for \(nameAndTarget.name):\(nameAndTarget.target)".printAsHeader(addWhiteSpace: false)
+            "\(deviationTitle) for \(nameAndTarget.name):\(nameAndTarget.target)".printAsHeader(addWhiteSpace: false)
             printMarkdown("```")
 
             metrics.forEach { metric in
@@ -435,47 +436,4 @@ extension BenchmarkTool {
             }
         }
     }
-
-    func prettyPrintAbsoluteDeviation(baselineName: String,
-                                      deviationResults: [BenchmarkResult.ThresholdDeviation]) {
-        guard quiet == false else { return }
-
-        let metrics = deviationResults.map(\.metric).unique()
-        // Get a unique set of all name/target pairs that have threshold deviations, sorted lexically:
-        let namesAndTargets = deviationResults.map { NameAndTarget(name: $0.name, target: $0.target) }
-            .unique().sorted { ($0.target, $0.name) < ($1.target, $1.name) }
-
-        namesAndTargets.forEach { nameAndTarget in
-
-            printMarkdown("```")
-            "Absolute threshold deviations for \(nameAndTarget.name):\(nameAndTarget.target)".printAsHeader(addWhiteSpace: false)
-            printMarkdown("```")
-
-            metrics.forEach { metric in
-
-                let absoluteResults = deviationResults.filter { $0.name == nameAndTarget.name &&
-                    $0.target == nameAndTarget.target &&
-                    $0.metric == metric &&
-                    $0.relative == false
-                }
-                let width = 40
-                let percentileWidth = 15
-
-                // The baseValue is the new baseline that we're using as the comparison base, so...
-                if absoluteResults.isEmpty == false {
-                    let absoluteTable = TextTable<BenchmarkResult.ThresholdDeviation> {
-                        [Column(title: "\(metric.description) (\(metric.countable ? $0.units.description : $0.units.timeDescription), Δ)",
-                                value: $0.percentile, width: width, align: .left),
-                         Column(title: "Threshold", value: $0.comparisonValue, width: percentileWidth, align: .right),
-                         Column(title: "\(baselineName)", value: $0.baseValue, width: percentileWidth, align: .right),
-                         Column(title: "Threshold Abs", value: $0.differenceThreshold, width: percentileWidth, align: .right)]
-                    }
-
-                    printMarkdown("```")
-                    absoluteTable.print(absoluteResults, style: Style.fancy)
-                    printMarkdown("```")
-                }
-            }
-        }
-    }
 }
diff --git a/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift b/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift
@@ -30,20 +30,22 @@ extension BenchmarkTool {
     ///   `String("\(#fileID)".prefix(while: { $0 != "/" }))`
     ///   - benchmarkName: The name of the benchmark
     /// - Returns: A dictionary with static benchmark thresholds per metric or nil if the file could not be found or read
-    static func makeBenchmarkThresholds(path: String,
-                                        moduleName: String,
-                                        benchmarkName: String) -> [String: BenchmarkThresholds.AbsoluteThreshold]? {
+    static func makeBenchmarkThresholds(
+        path: String,
+        benchmarkIdentifier: BenchmarkIdentifier
+    ) -> [BenchmarkMetric : BenchmarkThresholds.AbsoluteThreshold]? {
         var path = FilePath(path)
         if path.isAbsolute {
-            path.append("\(moduleName).\(benchmarkName).p90.json")
+            path.append("\(benchmarkIdentifier.target).\(benchmarkIdentifier.name).p90.json")
         } else {
             var cwdPath = FilePath(FileManager.default.currentDirectoryPath)
             cwdPath.append(path.components)
-            cwdPath.append("\(moduleName).\(benchmarkName).p90.json")
+            cwdPath.append("\(benchmarkIdentifier.target).\(benchmarkIdentifier.name).p90.json")
             path = cwdPath
         }
 
-        var p90Thresholds: [String: BenchmarkThresholds.AbsoluteThreshold]?
+        var p90Thresholds: [BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold] = [:]
+        var p90ThresholdsRaw: [String: BenchmarkThresholds.AbsoluteThreshold]?
 
         do {
             let fileDescriptor = try FileDescriptor.open(path, .readOnly, options: [], permissions: .ownerRead)
@@ -64,7 +66,18 @@ extension BenchmarkTool {
                             readBytes.append(contentsOf: nextBytes)
                         }
 
-                        p90Thresholds = try JSONDecoder().decode([String: BenchmarkThresholds.AbsoluteThreshold].self, from: Data(readBytes))
+                        p90ThresholdsRaw = try JSONDecoder().decode(
+                            [String: BenchmarkThresholds.AbsoluteThreshold].self,
+                            from: Data(readBytes)
+                        )
+
+                        if let p90ThresholdsRaw {
+                            p90ThresholdsRaw.forEach { metric, threshold in
+                                if let metric = BenchmarkMetric(argument: metric) {
+                                    p90Thresholds[metric] = threshold
+                                }
+                            }
+                        }
                     } catch {
                         print("Failed to read file at \(path) [\(error)] \(Errno(rawValue: errno).description)")
                     }
@@ -77,6 +90,6 @@ extension BenchmarkTool {
                 print("Failed to open file \(path), errno = [\(errno)] \(Errno(rawValue: errno).description)")
             }
         }
-        return p90Thresholds
+        return p90Thresholds.isEmpty ? nil : p90Thresholds
     }
 }
diff --git a/Sources/Benchmark/BenchmarkClock.swift b/Sources/Benchmark/BenchmarkClock.swift
@@ -22,6 +22,9 @@ import Glibc
 #error("Unsupported Platform")
 #endif
 
+#if swift(>=5.8)
+@_documentation(visibility: internal)
+#endif
 public struct BenchmarkClock {
     /// A continuous point in time used for `BenchmarkClock`.
     public struct Instant: Codable, Sendable {
@@ -35,6 +38,9 @@ public struct BenchmarkClock {
     public init() {}
 }
 
+#if swift(>=5.8)
+@_documentation(visibility: internal)
+#endif
 public extension Clock where Self == BenchmarkClock {
     /// A clock that measures time that always increments but does not stop
     /// incrementing while the system is asleep.
@@ -44,6 +50,9 @@ public extension Clock where Self == BenchmarkClock {
     static var internalUTC: BenchmarkClock { BenchmarkClock() }
 }
 
+#if swift(>=5.8)
+@_documentation(visibility: internal)
+#endif
 extension BenchmarkClock: Clock {
     /// The current continuous instant.
     public var now: BenchmarkClock.Instant {
@@ -114,6 +123,9 @@ extension BenchmarkClock: Clock {
     }
 }
 
+#if swift(>=5.8)
+@_documentation(visibility: internal)
+#endif
 extension BenchmarkClock.Instant: InstantProtocol {
     public static var now: BenchmarkClock.Instant { BenchmarkClock.now }
 
@@ -177,6 +189,9 @@ extension BenchmarkClock.Instant: InstantProtocol {
     }
 }
 
+#if swift(>=5.8)
+@_documentation(visibility: internal)
+#endif
 public extension Duration {
     func nanoseconds() -> Int64 {
         (components.seconds * 1_000_000_000) + (components.attoseconds / 1_000_000_000)

diff --git a/Sources/Benchmark/BenchmarkMetric+Defaults.swift b/Sources/Benchmark/BenchmarkMetric+Defaults.swift
@@ -121,16 +121,16 @@ public extension BenchmarkMetric {
 
 // Nicer convenience extension for Array so one can write `.extended` instead of `BenchmarkMetric.extended`
 public extension [BenchmarkMetric] {
-    /// A sutiable set of metrics for microbenchmarks that are CPU-oriented only.
+    /// A suitable set of metrics for microbenchmarks that are CPU-oriented only.
     ///
-    /// The defaults include ``wallClock`` and ``throughput``
+    /// The defaults include ``BenchmarkMetric/wallClock`` and ``BenchmarkMetric/throughput``
     static var microbenchmark: [BenchmarkMetric] {
         BenchmarkMetric.microbenchmark
     }
 
     /// The default collection of metrics used for a benchmark.
     ///
-    /// The defaults include ``wallClock``, ``cpuTotal``, ``mallocCountTotal``, ``throughput``, and ``peakMemoryResident``.
+    /// The defaults include ``BenchmarkMetric/wallClock``, ``BenchmarkMetric/cpuTotal``, ``BenchmarkMetric/mallocCountTotal``, ``BenchmarkMetric/throughput``, and ``BenchmarkMetric/peakMemoryResident``.
     static var `default`: [BenchmarkMetric] {
         BenchmarkMetric.default
     }