continuous benchmarks based on load tests (#23849)

This PR sets up continuous benchmarking based on the collector loadtests using the [github-action-benchmark](https://github.com/benchmark-action/github-action-benchmark) project. This gives us charts with historical benchmark data based on prior runs of the load tests. You can see an example of the output in the screenshot below, or at this link: https://mwear.github.io/opentelemetry-collector-contrib/loadtest/. <img width="1107" alt="Screenshot 2023-06-29 at 12 07 05 PM" src="https://github.com/open-telemetry/opentelemetry-collector-contrib/assets/2513372/dab3f0b5-8c7a-4d39-a44f-6959ffac7889"> github-action-benchmark does not need an external datastore, but it does need GitHub pages to be enabled. It writes results as json to the GitHub pages branch. For this reason, the action is setup to run on commits to main, but not the pull requests themselves since PR authors do not have permissions to write to the gh-pages repository. This will require enabling GitHub pages and it assumes that `/docs` will be the folder the site is built from. Currently the action will write the benchmarks to `docs/benchmarks/loadtests` and the url to the benchmarks will be https://open-telemetry.github.io/opentelemetry-collector-contrib/benchmarks/loadtests/. We can change the locations if we'd like. For example: <img width="732" alt="Screenshot 2023-06-29 at 12 08 10 PM" src="https://github.com/open-telemetry/opentelemetry-collector-contrib/assets/2513372/2d8f465e-e6ec-41d4-a3bd-f102fa87ca40"> There are two PRs related to this work. This PR updates the testbed to write loadtest results in the format expected by github-action-benchmark, and updates the loadtests workflow to report the data. The [second PR](#23850) is for the `gh-pages` branch. It includes a customized version of github-action-benchmark template to render the results. While working on this, I have run the load tests and collected historical data leaving the rest of the codebase unchanged, and there is variation in the results based on the runner that picks up the job. In fact, all of the results in [example](https://mwear.github.io/opentelemetry-collector-contrib/loadtest/) were run on the same codebase. We will have to do future work to try to stabilize the loadtest results. One option to consider would be [self-hosted runners](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners). Once we stabilize the run times we can enable additional alerts through the benchmark action. **Link to tracking Issue:** #9121, #20718 **Testing:** I tested that this works as expected on my repo. Benchmarks are reported on PRs that are merged to main, but not the PRs themselves. --------- Co-authored-by: Alex Boten <aboten@lightstep.com>
open-telemetry · Jul 5, 2023 · 24c80cc · 24c80cc
1 parent 67fb043
commit 24c80cc
Show file tree

Hide file tree

Showing 2 changed files with 91 additions and 4 deletions.
diff --git a/.github/workflows/load-tests.yml b/.github/workflows/load-tests.yml
@@ -105,6 +105,33 @@ jobs:
         uses: actions/upload-artifact@v3
         with:
           path: ./*.tar
+      - run: cp testbed/tests/results/benchmarks.json testbed/tests/results/${{steps.filename.outputs.name}}.json
+      - name: Upload benchmarks.json
+        uses: actions/upload-artifact@v3
+        with:
+          name: benchmark-results
+          path: testbed/tests/results/${{steps.filename.outputs.name}}.json
+
       - name: GitHub Issue Generator
         if: ${{ failure() && github.ref == 'refs/heads/main' }}
         run: issuegenerator $TEST_RESULTS
+
+  update-benchmarks:
+    runs-on: ubuntu-latest
+    needs: [loadtest]
+    if: github.event_name != 'pull_request'
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/download-artifact@v3
+        with:
+          name: benchmark-results
+          path: results
+      - run: jq -s 'map(.[])' results/*.json > output.json
+      - uses: benchmark-action/github-action-benchmark@v1
+        with:
+          tool: 'customSmallerIsBetter'
+          output-file-path: output.json
+          gh-pages-branch: gh-pages
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          benchmark-data-dir-path: "docs/benchmarks/loadtests"
+          auto-push: true
diff --git a/testbed/testbed/results.go b/testbed/testbed/results.go
@@ -4,6 +4,7 @@
 package testbed // import "github.com/open-telemetry/opentelemetry-collector-contrib/testbed/testbed"
 
 import (
+	"encoding/json"
 	"fmt"
 	"io"
 	"log"
@@ -22,13 +23,25 @@ type TestResultsSummary interface {
 	Save()
 }
 
+// benchmarkResult holds the results of a benchmark to be stored by benchmark-action. See
+// https://github.com/benchmark-action/github-action-benchmark#examples for more details on the
+// format
+type benchmarkResult struct {
+	Name  string  `json:"name"`
+	Unit  string  `json:"unit"`
+	Value float64 `json:"value"`
+	Range string  `json:"range,omitempty"`
+	Extra string  `json:"extra,omitempty"`
+}
+
 // PerformanceResults implements the TestResultsSummary interface with fields suitable for reporting
 // performance test results.
 type PerformanceResults struct {
-	resultsDir     string
-	resultsFile    *os.File
-	perTestResults []*PerformanceTestResult
-	totalDuration  time.Duration
+	resultsDir       string
+	resultsFile      *os.File
+	perTestResults   []*PerformanceTestResult
+	benchmarkResults []*benchmarkResult
+	totalDuration    time.Duration
 }
 
 // PerformanceTestResult reports the results of a single performance test.
@@ -48,6 +61,7 @@ type PerformanceTestResult struct {
 func (r *PerformanceResults) Init(resultsDir string) {
 	r.resultsDir = resultsDir
 	r.perTestResults = []*PerformanceTestResult{}
+	r.benchmarkResults = []*benchmarkResult{}
 
 	// Create resultsSummary file
 	if err := os.MkdirAll(resultsDir, os.FileMode(0755)); err != nil {
@@ -72,6 +86,7 @@ func (r *PerformanceResults) Save() {
 	_, _ = io.WriteString(r.resultsFile,
 		fmt.Sprintf("\nTotal duration: %.0fs\n", r.totalDuration.Seconds()))
 	r.resultsFile.Close()
+	r.saveBenchmarks()
 }
 
 // Add results for one test.
@@ -80,6 +95,7 @@ func (r *PerformanceResults) Add(_ string, result interface{}) {
 	if !ok {
 		return
 	}
+
 	_, _ = io.WriteString(r.resultsFile,
 		fmt.Sprintf("%-40s|%-6s|%7.0fs|%8.1f|%8.1f|%11d|%11d|%10d|%14d|%s\n",
 			testResult.testName,
@@ -95,6 +111,50 @@ func (r *PerformanceResults) Add(_ string, result interface{}) {
 		),
 	)
 	r.totalDuration += testResult.duration
+
+	// individual benchmark results
+	cpuChartName := fmt.Sprintf("%s - Cpu Percentage", testResult.testName)
+	memoryChartName := fmt.Sprintf("%s - RAM (MiB)", testResult.testName)
+	droppedSpansChartName := fmt.Sprintf("%s - Dropped Span Count", testResult.testName)
+
+	r.benchmarkResults = append(r.benchmarkResults, &benchmarkResult{
+		Name:  "cpu_percentage_avg",
+		Value: testResult.cpuPercentageAvg,
+		Unit:  "%",
+		Extra: cpuChartName,
+	})
+	r.benchmarkResults = append(r.benchmarkResults, &benchmarkResult{
+		Name:  "cpu_percentage_max",
+		Value: testResult.cpuPercentageMax,
+		Unit:  "%",
+		Extra: cpuChartName,
+	})
+	r.benchmarkResults = append(r.benchmarkResults, &benchmarkResult{
+		Name:  "ram_mib_avg",
+		Value: float64(testResult.ramMibAvg),
+		Unit:  "MiB",
+		Extra: memoryChartName,
+	})
+	r.benchmarkResults = append(r.benchmarkResults, &benchmarkResult{
+		Name:  "ram_mib_max",
+		Value: float64(testResult.ramMibMax),
+		Unit:  "MiB",
+		Extra: memoryChartName,
+	})
+	r.benchmarkResults = append(r.benchmarkResults, &benchmarkResult{
+		Name:  "dropped_span_count",
+		Value: float64(testResult.sentSpanCount - testResult.receivedSpanCount),
+		Unit:  "spans",
+		Extra: droppedSpansChartName,
+	})
+}
+
+// saveBenchmarks writes benchmarks to file as json to be stored by
+// benchmark-action
+func (r *PerformanceResults) saveBenchmarks() {
+	path := path.Join(r.resultsDir, "benchmarks.json")
+	j, _ := json.MarshalIndent(r.benchmarkResults, "", "  ")
+	_ = os.WriteFile(path, j, 0600)
 }
 
 // CorrectnessResults implements the TestResultsSummary interface with fields suitable for reporting data translation