From 6756120fbdaaa4f093e5ff92b553d48625a32a2d Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Wed, 1 Oct 2025 15:11:29 -0700
Subject: [PATCH 1/4] Move upload benchmark results to a separate workflows

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/workflows/benchmark.yml               | 84 +++++++++++--------
 .github/workflows/benchmark_dispatch.yml      | 37 ++++++--
 .../workflows/compute-benchmark-matrix.yml    | 24 ++++++
 3 files changed, 107 insertions(+), 38 deletions(-)
 create mode 100644 .github/workflows/compute-benchmark-matrix.yml

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 87cd26f02..ec4ca331d 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -21,30 +21,18 @@ on:
       alias:
         required: true
         type: string
-      max-runners:
+      num-shards:
+        required: true
+        type: number
+        description: "Number of shards benchmark is running on"
+      shard:
         required: true
         type: number
         description: "Maximum parallel runners to determine shards"
 
 jobs:
-  compute-matrix:
-    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.gen.outputs.matrix }}
-    steps:
-      - id: gen
-        run: |
-          n="${{ inputs.max-runners }}"
-          shards=$(seq 0 $((n-1)) | paste -sd, -)
-          echo "matrix={\"shard\": [${shards}], \"num_shards\": [${n}]}" >> $GITHUB_OUTPUT
-
   benchmark:
-    name: benchmark-${{ inputs.runtime-version }}-py${{ inputs.python-version }}-${{ inputs.alias }}
-    needs: compute-matrix
-
-    strategy:
-      fail-fast: false
-      matrix: ${{ fromJSON(needs.compute-matrix.outputs.matrix) }}
+    name: benchmark-${{ inputs.runtime-version }}-shard${{ inputs.shard }}-py${{ inputs.python-version }}-${{ inputs.alias }}
 
     container:
       image: ${{ inputs.image }}
@@ -59,6 +47,11 @@ jobs:
       run:
         shell: bash -l {0}
 
+    outputs:
+      benchmark-metadata: ${{ steps.gather-benchmark-metadata.outputs.benchmark-metadata }}
+      runners-info: ${{ steps.gather-runners-info.outputs.runners-info }}
+      dependencies: ${{ steps.gather-dependencies.outputs.dependencies }}
+
     steps:
       - name: Check out code
         uses: actions/checkout@v4
@@ -129,8 +122,8 @@ jobs:
           source .venv/bin/activate
 
           KERNELS=("softmax" "geglu" "swiglu" "jsd" "welford" "kl_div" "int4_gemm" "layer_norm" "layer_norm-bwd" "rms_norm" "rms_norm-bwd" "cross_entropy")
-          NUMSHARDS=${{ matrix.num_shards }}
-          SHARD=${{ matrix.shard }}
+          NUMSHARDS=${{ inputs.num-shards }}
+          SHARD=${{ inputs.shard }}
 
           SHARD_KERNELS=()
           for ((i=0; i<${#KERNELS[@]}; i++)); do
@@ -203,19 +196,44 @@ jobs:
           fi
           cat "$TEST_REPORTS_DIR/helionbench.json"
 
-      - name: Authenticate with AWS
-        uses: aws-actions/configure-aws-credentials@v4
+      - name: Gather benchmark metadata
+        id: gather-benchmark-metadata
+        # TODO: Switch to main once https://github.com/pytorch/test-infra/pull/7269 lands
+        uses: pytorch/test-infra/.github/actions/gather-benchmark-metadata@upload-benchmark-results-v2
         with:
-          role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
-          # The max duration enforced by the server side
-          role-duration-seconds: 18000
-          aws-region: us-east-1
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          venv: .venv/bin/activate
 
-      - name: Upload the benchmark results to OSS benchmark database for the dashboard
-        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+      - name: Gather runners info
+        id: gather-runners-info
+        # TODO: Switch to main once https://github.com/pytorch/test-infra/pull/7269 lands
+        uses: pytorch/test-infra/.github/actions/gather-runners-info@upload-benchmark-results-v2
         with:
-          benchmark-results-dir: test/test-reports
-          dry-run: false
-          schema-version: v3
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          venv: ".venv/bin/activate"
+          venv: .venv/bin/activate
+
+      - name: Gather dependencies
+        id: gather-dependencies
+        # TODO: Switch to main once https://github.com/pytorch/test-infra/pull/7269 lands
+        uses: pytorch/test-infra/.github/actions/gather-dependencies@upload-benchmark-results-v2
+        with:
+          venv: .venv/bin/activate
+
+      - name: Upload the benchmark results to GitHub
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results-${{ inputs.alias }}-${{ inputs.shard }}
+          path: test/test-reports
+
+  upload-benchmark-results:
+    needs: benchmark
+    uses:
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      benchmark-artifact: benchmark-results-${{ inputs.alias }}-${{ inputs.shard }}
+      benchmark-metadata: ${{ needs.benchmark.outputs.benchmark-metadata }}
+      runners-info: ${{ needs.benchmark.outputs.runners-info }}
+      dependencies: ${{ needs.benchmark.outputs.dependencies }}
+      schema-version: v3
+      dry-run: false
diff --git a/.github/workflows/benchmark_dispatch.yml b/.github/workflows/benchmark_dispatch.yml
index 7f5be9d16..266ff65a9 100644
--- a/.github/workflows/benchmark_dispatch.yml
+++ b/.github/workflows/benchmark_dispatch.yml
@@ -22,9 +22,17 @@ on:
     - cron: '0 8 * * *'  # Runs at midnight PST (8 AM UTC)
 
 jobs:
-  run-h100:
+  gen-matrix-h100:
     if: ${{ github.event.inputs.run_h100 == 'true' || github.event_name == 'schedule' }}
+    uses: ./.github/workflows/compute-benchmark-matrix.yml
+    with:
+      max-runners: 12
+
+  run-h100:
+    needs: gen-matrix-h100
     uses: ./.github/workflows/benchmark.yml
+    strategy:
+      matrix: ${{ fromJSON(needs.gen-matrix-h100.outputs.matrix) }}
     permissions:
       id-token: write
       contents: read
@@ -35,11 +43,20 @@ jobs:
       runtime-version: cu129
       container-options: --gpus all
       alias: h100
+      num-shards: ${{ matrix.num_shards }}
+      shard: ${{ matrix.shard }}
+
+  gen-matrix-b200:
+    uses: ./.github/workflows/compute-benchmark-matrix.yml
+    if: ${{ github.event.inputs.run_h100 == 'true' || github.event_name == 'schedule' }}
+    with:
       max-runners: 12
 
   run-b200:
-    if: ${{ github.event.inputs.run_b200 == 'true' || github.event_name == 'schedule' }}
+    needs: gen-matrix-b200
     uses: ./.github/workflows/benchmark.yml
+    strategy:
+      matrix: ${{ fromJSON(needs.gen-matrix-b200.outputs.matrix) }}
     permissions:
       id-token: write
       contents: read
@@ -50,11 +67,20 @@ jobs:
       runtime-version: cu129
       container-options: --gpus all
       alias: b200
-      max-runners: 12
+      num-shards: ${{ matrix.num_shards }}
+      shard: ${{ matrix.shard }}
 
-  run-mi325x:
+  gen-matrix-mi325x:
+    uses: ./.github/workflows/compute-benchmark-matrix.yml
     if: ${{ github.event.inputs.run_mi325x == 'true' || github.event_name == 'schedule' }}
+    with:
+      max-runners: 6
+
+  run-mi325x:
+    needs: gen-matrix-mi325x
     uses: ./.github/workflows/benchmark.yml
+    strategy:
+      matrix: ${{ fromJSON(needs.gen-matrix-mi325x.outputs.matrix) }}
     permissions:
       id-token: write
       contents: read
@@ -65,4 +91,5 @@ jobs:
       runtime-version: rocm7.0
       container-options: --device=/dev/kfd --device=/dev/dri
       alias: mi325x
-      max-runners: 6
+      num-shards: ${{ matrix.num_shards }}
+      shard: ${{ matrix.shard }}
diff --git a/.github/workflows/compute-benchmark-matrix.yml b/.github/workflows/compute-benchmark-matrix.yml
new file mode 100644
index 000000000..160210814
--- /dev/null
+++ b/.github/workflows/compute-benchmark-matrix.yml
@@ -0,0 +1,24 @@
+name: Compute Benchmark Matrix
+
+on:
+  workflow_call:
+    inputs:
+      max-runners:
+        required: true
+        type: string
+    outputs:
+      matrix:
+        description: "The generated matrix for sharding"
+        value: ${{ jobs.gen.outputs.matrix }}
+
+jobs:
+  gen:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.gen.outputs.matrix }}
+    steps:
+      - id: gen
+        run: |
+          n="${{ inputs.max-runners }}"
+          shards=$(seq 0 $((n-1)) | paste -sd, -)
+          echo "matrix={\"shard\": [${shards}], \"num_shards\": [${n}]}" >> $GITHUB_OUTPUT

From ced4c3a376d45bcf2b4ceb28ae860804ac909a63 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Wed, 1 Oct 2025 15:15:32 -0700
Subject: [PATCH 2/4] Use the right workflow

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/workflows/benchmark.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index ec4ca331d..5f836cd7e 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -226,7 +226,8 @@ jobs:
 
   upload-benchmark-results:
     needs: benchmark
-    uses:
+    # TODO: Switch to main once https://github.com/pytorch/test-infra/pull/7269 lands
+    uses: pytorch/test-infra/.github/workflows/upload_benchmark_results.yml@upload-benchmark-results-v2
     permissions:
       id-token: write
       contents: read

From 9f17465f6f26ed48d5d8116c6719a59ef95a7955 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Wed, 1 Oct 2025 15:22:02 -0700
Subject: [PATCH 3/4] Typo

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/workflows/benchmark_dispatch.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/benchmark_dispatch.yml b/.github/workflows/benchmark_dispatch.yml
index 266ff65a9..fa351adc2 100644
--- a/.github/workflows/benchmark_dispatch.yml
+++ b/.github/workflows/benchmark_dispatch.yml
@@ -48,7 +48,7 @@ jobs:
 
   gen-matrix-b200:
     uses: ./.github/workflows/compute-benchmark-matrix.yml
-    if: ${{ github.event.inputs.run_h100 == 'true' || github.event_name == 'schedule' }}
+    if: ${{ github.event.inputs.run_b200 == 'true' || github.event_name == 'schedule' }}
     with:
       max-runners: 12
 

From 9c56f3c59b4ff01ef340040e591fb5a497e7d4b9 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Wed, 1 Oct 2025 17:18:16 -0700
Subject: [PATCH 4/4] Ready to land

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/workflows/benchmark.yml | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 5f836cd7e..1beb3e10e 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -198,23 +198,20 @@ jobs:
 
       - name: Gather benchmark metadata
         id: gather-benchmark-metadata
-        # TODO: Switch to main once https://github.com/pytorch/test-infra/pull/7269 lands
-        uses: pytorch/test-infra/.github/actions/gather-benchmark-metadata@upload-benchmark-results-v2
+        uses: pytorch/test-infra/.github/actions/gather-benchmark-metadata@main
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           venv: .venv/bin/activate
 
       - name: Gather runners info
         id: gather-runners-info
-        # TODO: Switch to main once https://github.com/pytorch/test-infra/pull/7269 lands
-        uses: pytorch/test-infra/.github/actions/gather-runners-info@upload-benchmark-results-v2
+        uses: pytorch/test-infra/.github/actions/gather-runners-info@main
         with:
           venv: .venv/bin/activate
 
       - name: Gather dependencies
         id: gather-dependencies
-        # TODO: Switch to main once https://github.com/pytorch/test-infra/pull/7269 lands
-        uses: pytorch/test-infra/.github/actions/gather-dependencies@upload-benchmark-results-v2
+        uses: pytorch/test-infra/.github/actions/gather-dependencies@main
         with:
           venv: .venv/bin/activate
 
@@ -226,8 +223,7 @@ jobs:
 
   upload-benchmark-results:
     needs: benchmark
-    # TODO: Switch to main once https://github.com/pytorch/test-infra/pull/7269 lands
-    uses: pytorch/test-infra/.github/workflows/upload_benchmark_results.yml@upload-benchmark-results-v2
+    uses: pytorch/test-infra/.github/workflows/upload_benchmark_results.yml@main
     permissions:
       id-token: write
       contents: read