[llvm-exegesis] Add middle half repetition mode (#77020)

This patch adds two new repetition modes to llvm-exegesis, particularly loop and duplicate repetition modes of what I am terming the middle half repetition mode. The middle half repetition mode essentially runs each measurement twice, one with twice the number of iterations of the other. These two measurements are then agregated by taking their difference. This subtracts away any setup/overhead that is unrelated to the code in the snippet, providing more accurate results. Using this mode on a couple toy examples, I am able to get exact (integer) throughput values on all of them in contrast to the default duplicate/loop repetition modes which show a little bit of noise on the snippet value.
llvm · Jan 30, 2024 · d8b61d7 · d8b61d7
1 parent 6a21e00
commit d8b61d7
Show file tree

Hide file tree

Showing 10 changed files with 271 additions and 45 deletions.
diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -301,7 +301,7 @@ OPTIONS
   enabled can help determine the effects of the frontend and can be used to
   improve latency and throughput estimates.
 
-.. option:: --repetition-mode=[duplicate|loop|min]
+.. option:: --repetition-mode=[duplicate|loop|min|middle-half-duplicate|middle-half-loop]
 
  Specify the repetition mode. `duplicate` will create a large, straight line
  basic block with `num-repetitions` instructions (repeating the snippet
@@ -314,7 +314,11 @@ OPTIONS
  that cache decoded instructions, but consumes a register for counting
  iterations. If performing an analysis over many opcodes, it may be best to
  instead use the `min` mode, which will run each other mode,
- and produce the minimal measured result.
+ and produce the minimal measured result. The middle half repetition modes
+ will either duplicate or run the snippet in a loop depending upon the specific
+ mode. The middle half repetition modes will run two benchmarks, one twice the
+ length of the first one, and then subtract the difference between them to get
+ values without overhead.
 
 .. option:: --num-repetitions=<Number of repetitions>
 

diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/middle-half.s b/llvm/test/tools/llvm-exegesis/X86/latency/middle-half.s
@@ -0,0 +1,8 @@
+# REQUIRES: exegesis-can-measure-latency, x86_64-linux
+
+# Check that we can use the middle-half repetition mode without crashing
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -opcode-name=ADD64rr -repetition-mode=middle-half-duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -opcode-name=ADD64rr -repetition-mode=middle-half-loop | FileCheck %s
+
+# CHECK: - { key: latency, value: {{[0-9.]*}}, per_snippet_value: {{[0-9.]*}}
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -90,7 +90,7 @@ struct BenchmarkMeasure {
   static BenchmarkMeasure
   Create(std::string Key, double Value,
          std::map<ValidationEvent, int64_t> ValCounters) {
-    return {Key, Value, Value, ValCounters};
+    return {Key, Value, Value, Value, ValCounters};
   }
   std::string Key;
   // This is the per-instruction value, i.e. measured quantity scaled per
@@ -99,6 +99,8 @@ struct BenchmarkMeasure {
   // This is the per-snippet value, i.e. measured quantity for one repetition of
   // the whole snippet.
   double PerSnippetValue;
+  // This is the raw value collected from the full execution.
+  double RawValue;
   // These are the validation counter values.
   std::map<ValidationEvent, int64_t> ValidationCounters;
 };
@@ -115,7 +117,13 @@ struct Benchmark {
   // The number of instructions inside the repeated snippet. For example, if a
   // snippet of 3 instructions is repeated 4 times, this is 12.
   unsigned NumRepetitions = 0;
-  enum RepetitionModeE { Duplicate, Loop, AggregateMin };
+  enum RepetitionModeE {
+    Duplicate,
+    Loop,
+    AggregateMin,
+    MiddleHalfDuplicate,
+    MiddleHalfLoop
+  };
   // Note that measurements are per instruction.
   std::vector<BenchmarkMeasure> Measurements;
   std::string Error;

diff --git a/llvm/tools/llvm-exegesis/lib/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/CMakeLists.txt
@@ -64,6 +64,7 @@ add_llvm_library(LLVMExegesis
   PerfHelper.cpp
   RegisterAliasing.cpp
   RegisterValue.cpp
+  ResultAggregator.cpp
   SchedClassResolution.cpp
   SerialSnippetGenerator.cpp
   SnippetFile.cpp

diff --git a/llvm/tools/llvm-exegesis/lib/ResultAggregator.cpp b/llvm/tools/llvm-exegesis/lib/ResultAggregator.cpp
@@ -0,0 +1,95 @@
+//===-- ResultAggregator.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ResultAggregator.h"
+
+namespace llvm {
+namespace exegesis {
+
+class DefaultResultAggregator : public ResultAggregator {
+  void AggregateResults(Benchmark &Result,
+                        ArrayRef<Benchmark> OtherResults) const override{};
+  void AggregateMeasurement(BenchmarkMeasure &Measurement,
+                            const BenchmarkMeasure &NewMeasurement,
+                            const Benchmark &Result) const override{};
+};
+
+class MinimumResultAggregator : public ResultAggregator {
+  void AggregateMeasurement(BenchmarkMeasure &Measurement,
+                            const BenchmarkMeasure &NewMeasurement,
+                            const Benchmark &Result) const override;
+};
+
+void MinimumResultAggregator::AggregateMeasurement(
+    BenchmarkMeasure &Measurement, const BenchmarkMeasure &NewMeasurement,
+    const Benchmark &Result) const {
+  Measurement.PerInstructionValue = std::min(
+      Measurement.PerInstructionValue, NewMeasurement.PerInstructionValue);
+  Measurement.PerSnippetValue =
+      std::min(Measurement.PerSnippetValue, NewMeasurement.PerSnippetValue);
+  Measurement.RawValue =
+      std::min(Measurement.RawValue, NewMeasurement.RawValue);
+}
+
+class MiddleHalfResultAggregator : public ResultAggregator {
+  void AggregateMeasurement(BenchmarkMeasure &Measurement,
+                            const BenchmarkMeasure &NewMeasurement,
+                            const Benchmark &Result) const override;
+};
+
+void MiddleHalfResultAggregator::AggregateMeasurement(
+    BenchmarkMeasure &Measurement, const BenchmarkMeasure &NewMeasurement,
+    const Benchmark &Result) const {
+  Measurement.RawValue = NewMeasurement.RawValue - Measurement.RawValue;
+  Measurement.PerInstructionValue = Measurement.RawValue;
+  Measurement.PerInstructionValue /= Result.NumRepetitions;
+  Measurement.PerSnippetValue = Measurement.RawValue;
+  Measurement.PerSnippetValue /=
+      std::ceil(Result.NumRepetitions /
+                static_cast<double>(Result.Key.Instructions.size()));
+}
+
+void ResultAggregator::AggregateResults(
+    Benchmark &Result, ArrayRef<Benchmark> OtherResults) const {
+  for (const Benchmark &OtherResult : OtherResults) {
+    append_range(Result.AssembledSnippet, OtherResult.AssembledSnippet);
+
+    if (OtherResult.Measurements.empty())
+      continue;
+
+    assert(OtherResult.Measurements.size() == Result.Measurements.size() &&
+           "Expected to have an identical number of measurements");
+
+    for (auto I : zip(Result.Measurements, OtherResult.Measurements)) {
+      BenchmarkMeasure &Measurement = std::get<0>(I);
+      const BenchmarkMeasure &NewMeasurement = std::get<1>(I);
+
+      assert(Measurement.Key == NewMeasurement.Key &&
+             "Expected measurements to be symmetric");
+
+      AggregateMeasurement(Measurement, NewMeasurement, Result);
+    }
+  }
+}
+
+std::unique_ptr<ResultAggregator>
+ResultAggregator::CreateAggregator(Benchmark::RepetitionModeE RepetitionMode) {
+  switch (RepetitionMode) {
+  case Benchmark::RepetitionModeE::Duplicate:
+  case Benchmark::RepetitionModeE::Loop:
+    return std::make_unique<DefaultResultAggregator>();
+  case Benchmark::RepetitionModeE::AggregateMin:
+    return std::make_unique<MinimumResultAggregator>();
+  case Benchmark::RepetitionModeE::MiddleHalfDuplicate:
+  case Benchmark::RepetitionModeE::MiddleHalfLoop:
+    return std::make_unique<MiddleHalfResultAggregator>();
+  }
+}
+
+} // namespace exegesis
+} // namespace llvm
diff --git a/llvm/tools/llvm-exegesis/lib/ResultAggregator.h b/llvm/tools/llvm-exegesis/lib/ResultAggregator.h
@@ -0,0 +1,35 @@
+//===-- ResultAggregator.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Defines result aggregators that are used to aggregate the results from
+/// multiple full benchmark runs.
+///
+//===----------------------------------------------------------------------===//
+
+#include "BenchmarkResult.h"
+
+namespace llvm {
+namespace exegesis {
+
+class ResultAggregator {
+public:
+  static std::unique_ptr<ResultAggregator>
+  CreateAggregator(Benchmark::RepetitionModeE RepetitionMode);
+
+  virtual void AggregateResults(Benchmark &Result,
+                                ArrayRef<Benchmark> OtherResults) const;
+  virtual void AggregateMeasurement(BenchmarkMeasure &Measurement,
+                                    const BenchmarkMeasure &NewMeasurement,
+                                    const Benchmark &Result) const = 0;
+
+  virtual ~ResultAggregator() = default;
+};
+
+} // namespace exegesis
+} // namespace llvm
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
@@ -141,8 +141,10 @@ SnippetRepetitor::Create(Benchmark::RepetitionModeE Mode,
                          const LLVMState &State) {
   switch (Mode) {
   case Benchmark::Duplicate:
+  case Benchmark::MiddleHalfDuplicate:
     return std::make_unique<DuplicateSnippetRepetitor>(State);
   case Benchmark::Loop:
+  case Benchmark::MiddleHalfLoop:
     return std::make_unique<LoopSnippetRepetitor>(State);
   case Benchmark::AggregateMin:
     break;

diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -20,6 +20,7 @@
 #include "lib/LlvmState.h"
 #include "lib/PerfHelper.h"
 #include "lib/ProgressMeter.h"
+#include "lib/ResultAggregator.h"
 #include "lib/SnippetFile.h"
 #include "lib/SnippetRepetitor.h"
 #include "lib/Target.h"
@@ -106,10 +107,13 @@ static cl::opt<exegesis::Benchmark::RepetitionModeE> RepetitionMode(
     cl::values(
         clEnumValN(exegesis::Benchmark::Duplicate, "duplicate",
                    "Duplicate the snippet"),
-        clEnumValN(exegesis::Benchmark::Loop, "loop",
-                   "Loop over the snippet"),
+        clEnumValN(exegesis::Benchmark::Loop, "loop", "Loop over the snippet"),
         clEnumValN(exegesis::Benchmark::AggregateMin, "min",
-                   "All of the above and take the minimum of measurements")),
+                   "All of the above and take the minimum of measurements"),
+        clEnumValN(exegesis::Benchmark::MiddleHalfDuplicate,
+                   "middle-half-duplicate", "Middle half duplicate mode"),
+        clEnumValN(exegesis::Benchmark::MiddleHalfLoop, "middle-half-loop",
+                   "Middle half loop mode")),
     cl::init(exegesis::Benchmark::Duplicate));
 
 static cl::opt<bool> BenchmarkMeasurementsPrintProgress(
@@ -421,30 +425,39 @@ static void runBenchmarkConfigurations(
   std::optional<ProgressMeter<>> Meter;
   if (BenchmarkMeasurementsPrintProgress)
     Meter.emplace(Configurations.size());
+
+  SmallVector<unsigned, 2> MinInstructions = {NumRepetitions};
+  if (RepetitionMode == Benchmark::MiddleHalfDuplicate ||
+      RepetitionMode == Benchmark::MiddleHalfLoop)
+    MinInstructions.push_back(NumRepetitions * 2);
+
   for (const BenchmarkCode &Conf : Configurations) {
     ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &*Meter : nullptr);
     SmallVector<Benchmark, 2> AllResults;
 
     for (const std::unique_ptr<const SnippetRepetitor> &Repetitor :
          Repetitors) {
-      auto RC = ExitOnErr(Runner.getRunnableConfiguration(
-          Conf, NumRepetitions, LoopBodySize, *Repetitor));
-      std::optional<StringRef> DumpFile;
-      if (DumpObjectToDisk.getNumOccurrences())
-        DumpFile = DumpObjectToDisk;
-      auto [Err, BenchmarkResult] =
-          Runner.runConfiguration(std::move(RC), DumpFile);
-      if (Err) {
-        // Errors from executing the snippets are fine.
-        // All other errors are a framework issue and should fail.
-        if (!Err.isA<SnippetExecutionFailure>()) {
-          errs() << "llvm-exegesis error: " << toString(std::move(Err));
-          exit(1);
+      for (unsigned IterationRepetitions : MinInstructions) {
+        auto RC = ExitOnErr(Runner.getRunnableConfiguration(
+            Conf, IterationRepetitions, LoopBodySize, *Repetitor));
+        std::optional<StringRef> DumpFile;
+        if (DumpObjectToDisk.getNumOccurrences())
+          DumpFile = DumpObjectToDisk;
+        auto [Err, BenchmarkResult] =
+            Runner.runConfiguration(std::move(RC), DumpFile);
+        if (Err) {
+          // Errors from executing the snippets are fine.
+          // All other errors are a framework issue and should fail.
+          if (!Err.isA<SnippetExecutionFailure>()) {
+            llvm::errs() << "llvm-exegesis error: " << toString(std::move(Err));
+            exit(1);
+          }
+          BenchmarkResult.Error = toString(std::move(Err));
         }
-        BenchmarkResult.Error = toString(std::move(Err));
+        AllResults.push_back(std::move(BenchmarkResult));
       }
-      AllResults.push_back(std::move(BenchmarkResult));
     }
+
     Benchmark &Result = AllResults.front();
 
     // If any of our measurements failed, pretend they all have failed.
@@ -454,29 +467,10 @@ static void runBenchmarkConfigurations(
         }))
       Result.Measurements.clear();
 
-    if (RepetitionMode == Benchmark::RepetitionModeE::AggregateMin) {
-      for (const Benchmark &OtherResult :
-           ArrayRef<Benchmark>(AllResults).drop_front()) {
-        append_range(Result.AssembledSnippet, OtherResult.AssembledSnippet);
-        // Aggregate measurements, but only if all measurements succeeded.
-        if (Result.Measurements.empty())
-          continue;
-        assert(OtherResult.Measurements.size() == Result.Measurements.size() &&
-               "Expected to have identical number of measurements.");
-        for (auto I : zip(Result.Measurements, OtherResult.Measurements)) {
-          BenchmarkMeasure &Measurement = std::get<0>(I);
-          const BenchmarkMeasure &NewMeasurement = std::get<1>(I);
-          assert(Measurement.Key == NewMeasurement.Key &&
-                 "Expected measurements to be symmetric");
-
-          Measurement.PerInstructionValue =
-              std::min(Measurement.PerInstructionValue,
-                       NewMeasurement.PerInstructionValue);
-          Measurement.PerSnippetValue = std::min(
-              Measurement.PerSnippetValue, NewMeasurement.PerSnippetValue);
-        }
-      }
-    }
+    std::unique_ptr<ResultAggregator> ResultAgg =
+        ResultAggregator::CreateAggregator(RepetitionMode);
+    ResultAgg->AggregateResults(Result,
+                                ArrayRef<Benchmark>(AllResults).drop_front());
 
     // With dummy counters, measurements are rather meaningless,
     // so drop them altogether.

diff --git a/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt b/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt
@@ -17,6 +17,7 @@ set(exegesis_sources
   ClusteringTest.cpp
   ProgressMeterTest.cpp
   RegisterValueTest.cpp
+  ResultAggregatorTest.cpp
   )
 
 set(exegesis_link_libraries LLVMExegesis)